def test_get_majority_vote(self):
     point1 = DataPoint([1.0, 1.0], 'a')
     point2 = DataPoint([0.0, 0.0], 'b')
     point3 = DataPoint([1.0, 0.0], 'a')
     points = [point1, point2, point3]
     majority = get_majority_vote(points)
     self.assertEqual(majority, 'a', 'Majority label should "a"')
 def test_random_centroids(self):
     '''测试不传入初始中心点时
     会创建随机中心点'''
     point1: DataPoint = DataPoint([2.0, 1.0, 1.0])
     point2: DataPoint = DataPoint([2.0, 2.0, 5.0])
     point3: DataPoint = DataPoint([3.0, 1.5, 2.5])
     kmeans: KMeans = KMeans(2, [point1, point2, point3])
     self.assertIsNotNone(kmeans._centroids)
 def test_euclidean_distance(self):
     point1 = DataPoint([1.0, 1.0], 'a')
     point2 = DataPoint([0.0, 0.0], 'b')
     d = 2**0.5
     result = euclidean(point1, point2)
     self.assertEqual(
         result, d,
         'Euclidean distance between these points should be sqrt(2)')
Beispiel #4
0
def basic_kmeans_test():
    p1 = DataPoint((2, 1, 1))
    p2 = DataPoint((2, 2, 5))
    p3 = DataPoint((3, 1.5, 2.5))
    test = KMeans(2, (p1, p2, p3))
    test_result = test.run()
    for i, cluster in enumerate(test_result):
        print("Cluster {}: {}".format(i, cluster.points))
 def test_wrong_centroids_number(self):
     '''测试传入的初始中心点数量和k不一致时
     会抛出ValueError异常'''
     point1: DataPoint = DataPoint([2.0, 1.0, 1.0])
     point2: DataPoint = DataPoint([2.0, 2.0, 5.0])
     point3: DataPoint = DataPoint([3.0, 1.5, 2.5])
     centroid: DataPoint = DataPoint([1.0, 1.0, 1.0])
     self.assertRaises(ValueError, KMeans, 2, [point1, point2, point3],
                       [centroid])
 def test_given_right_centroids(self):
     '''测试传入了数量正确的初始中心点时
     初始中心点会被正确设置'''
     point1: DataPoint = DataPoint([2.0, 1.0, 1.0])
     point2: DataPoint = DataPoint([2.0, 2.0, 5.0])
     point3: DataPoint = DataPoint([3.0, 1.5, 2.5])
     centroid1: DataPoint = DataPoint([1.0, 1.0, 1.0])
     centroid2: DataPoint = DataPoint([2.0, 2.0, 2.0])
     kmeans: KMeans = KMeans(2, [point1, point2, point3],
                             [centroid1, centroid2])
     self.assertEqual(kmeans._centroids, [centroid1, centroid2])
 def test_get_neighbours(self):
     point1 = DataPoint([1.0, 1.0], 'a')
     point2 = DataPoint([0.0, 0.0], 'b')
     point3 = DataPoint([1.0, 0.0], 'a')
     points = [point3, point1, point2]
     new_point = DataPoint([0.0, 1.0], None)
     result = [point1, point2]
     neighbours = get_neighbours(points, new_point, 2)
     self.assertEqual(
         neighbours, result,
         'Should return 2 closest points: point1[1.0, 1.0] and point2[0.0, 0.0]'
     )
Beispiel #8
0
 def _random_point(self) -> DataPoint:
     rand_dimensions: List[float] = []
     for dimension in range(self._points[0].num_dimensions):
         values: List[float] = self._dimension_slice(dimension)
         rand_value: float = uniform(min(values), max(values))
         rand_dimensions.append(rand_value)
     return DataPoint(rand_dimensions)
Beispiel #9
0
 def __query_by_group(self, col: str, **kwargs) -> List[DataPoint]:
   """
   Query that returns counts grouped by input column.
   Optional filter args can be passed in.
   """
   results = []
   query_addon = []
   query_params = []
   for key, val in kwargs.items():
     if val:
       query_addon.append(f"{key} LIKE ?")
       query_params += ['%'+val+'%']
   if query_addon:
     query_addon = " AND " + " AND ".join(query_addon)
   else:
     query_addon = ""
   db_conn = sqlite3.connect(self.db_file)
   with db_conn:
     cur = db_conn.cursor()
     query_str = f"SELECT {col} AS name, COUNT({col}) AS value \
       FROM titles \
       WHERE {col} IS NOT NULL {query_addon} \
       GROUP BY {col}"
     cur.execute(query_str, query_params)
     for row in cur.fetchall():
       try:
         results.append(DataPoint(**{ 'name': row[0], 'value': row[1] }))
       except ValidationError as ve:
         self.logger.error(ve.errors())
   return results
Beispiel #10
0
 def _random_point(self):
     rand_dims = []
     for dim in range(self._points[0].num_dimensions):
         dim_slice = self._dimension_slice(dim)  
         rand_val = uniform(min(dim_slice), max(dim_slice))
         rand_dims.append(rand_val)
     return DataPoint(rand_dims)
 def _generate_centroids(self) -> None:
     for cluster in self._clusters:
         if len(cluster.points) == 0: continue
         means: List[float] = []
         for dimension in range(self._points[0].num_dimensions):
             dimension_slice: List[float] = [x.dimensions[dimension] for x in cluster.points]
             means.append(mean(dimension_slice))
         cluster.centroid = DataPoint(means)
Beispiel #12
0
def send_values(omf_client):
	value = 0
	while (True):
		value+=1
		data_point = DataPoint(datetime.datetime.now(), value)
		data_message = DataMessage(stream_id, [data_point])
		payload = jsonpickle.encode([data_message], unpicklable=False)
		omf_client.send_omf_message("data", "create", payload)
Beispiel #13
0
 def _generate_centroids(self) -> None:
     for cluster in self._clusters:
         if len(cluster.points) == 0:
             continue # оставить тот же центроид, если нет очков
         means: List[float] = []
         for dimension in range(cluster.points[0].num_dimensions):
             dimension_slice: List[float] = [p.dimensions[dimension] for p in cluster.points]
             means.append(mean(dimension_slice))
         cluster.centroid = DataPoint(means)
Beispiel #14
0
 def _generate_cluster_centroids(self):
     for cluster in self._clusters:
         if len(cluster.points) == 0:
             continue
         means = []
         for dim in range(self._points[0].num_dimensions):
             dim_slice = [p.dimensions[dim] for p in cluster.points]
             means.append(mean(dim_slice))
         cluster.centroid = DataPoint(means)
Beispiel #15
0
    def _random_point(self):
        random_dimensions = []

        for dimension in range(self._points[0].num_dimensions):
            values = self._dimension_slice(dimension)
            random_value = uniform(min(values), max(values))
            random_dimensions.append(random_value)

        return DataPoint(random_dimensions)
Beispiel #16
0
 def __read_gyroscope(self):
     gyr_reading = DataPoint()
     gyr_reading.sensor_type = 'gyr'
     gyr = self.gyroscope.read_data()
     gyr_reading.x = gyr[0]
     gyr_reading.y = gyr[1]
     gyr_reading.z = gyr[2]
     gyr_reading.time = self.current_millis_frac() - self.started_ms
     return gyr_reading
Beispiel #17
0
 def __read_compass(self):
     comp_reading = DataPoint()
     comp_reading.sensor_type = 'comp'
     comp = self.compass.read_data()
     comp_reading.x = comp[0]
     comp_reading.y = comp[1]
     comp_reading.z = comp[2]
     comp_reading.time = self.current_millis_frac() - self.started_ms
     return comp_reading
Beispiel #18
0
 def __read_accelerometer(self):
     acc_reading = DataPoint()
     acc_reading.sensor_type = 'acc'
     acc = self.accelerometer.read_data()
     acc_reading.x = acc[0]
     acc_reading.y = acc[1]
     acc_reading.z = acc[2]
     acc_reading.time = self.current_millis_frac() - self.started_ms
     return acc_reading
 def start_classification_phase(self):
     """
     Starts Classification Phase where points are sent to Slave nodes for classification.
     :return: Classification results
     """
     print('CLASSIFICATION PHASE')
     for i, connection in enumerate(self.connections):
         data_batch = self.points
         data = connection[0].recv(1024)
         if data and data == messages.ClientMessages.SEND_DATA_REQUEST:
             connection[0].send(str.encode(str(len(data_batch))))
             data = connection[0].recv(1024)
             if data and data == messages.ClientMessages.READY:
                 for d in data_batch:
                     connection[0].send(str.encode(str(d)))
     for connection in self.connections:
         data = connection[0].recv(1024)
         if data and data == messages.ClientMessages.REQUEST_FOR_K:
             connection[0].send(str.encode(str(self.k)))
             connection[0].send(str.encode(str(len(self.connections))))
     neighbouring_points = {}
     for point in self.points:
         neighbouring_points[str(point.id)] = []
     for connection in self.connections:
         data = connection[0].recv(1024)
         if data and data == messages.ClientMessages.SEND_CLASSIFICATION_DATA_REQUEST:
             connection[0].send(messages.ServerMessages.ALLOW_PROCEED)
             data = connection[0].recv(1024)
             if data:
                 points_to_receive = int(data.decode())
                 for _ in range(points_to_receive):
                     data = connection[0].recv(1024)
                     if data:
                         neighbours_to_receive = int(data.decode())
                         data = connection[0].recv(1024)
                         if data:
                             point_id = str(data.decode())
                             for _ in range(neighbours_to_receive):
                                 data = connection[0].recv(1024)
                                 if data:
                                     neighbour_point = parse_data_point(
                                         data.decode())
                                     neighbouring_points[point_id].append(
                                         neighbour_point)
     final_classified_points = []
     for point in self.points:
         neighbours = neighbouring_points[str(point.id)]
         final_label = get_majority_vote(neighbours)
         print('Received all neighbours for', point.data,
               '(' + str(point.id) + '):', [p.label for p in neighbours])
         final_classified_points.append(
             DataPoint(point.data, final_label, point.id))
     self.points = final_classified_points
     print('CLASSIFICATION PHASE IS FINISHED')
 def _random_point(self) -> DataPoint:
     # empty list to hold the dimensions of the random point, which
     # this method will return
     rand_dimensions: List[float] = []
     # .num_dimensions is a property of DataPoint class
     for dimension in range(self._points[0].num_dimensions):
         # get the list of values for the dimension
         values: List[float] = self._dimension_slice(dimension)
         # calculate a random value that's within the bound of values
         rand_value: float = uniform(min(values), max(values))
         rand_dimensions.append(rand_value)
     return DataPoint(rand_dimensions)
  def to_data_points(self, timeframe=30):
    """
    You only need to run this once, the data points are saved to a new csv matching the format "[out_path]_[timeframe]min"
    """
    df = pd.read_csv(self.csv_file, error_bad_lines=False)
    origin = pd.to_datetime(df['created_at'][0])
    datapoints = []
    current_data_point = DataPoint(origin) # pandas recommendation for datetime -> timestamp 

    widgets = [
        '\x1b[ Converting to Datapoints\x1b[39m',
        progressbar.Percentage(),
        progressbar.Bar(marker='\x1b[32m#\x1b[39m'),
    ]
    progress = progressbar.ProgressBar(widgets=widgets, max_value=len(df.index)).start()
    progress.update(0)
  
    for index, row in df.iterrows():
      current_time = pd.to_datetime(row['created_at'], errors='coerce')
      if (current_time - origin).total_seconds() > (timeframe * 60):
        origin = current_time
        datapoints.append(current_data_point)
        current_data_point = DataPoint(current_time)
      current_data_point.add_entry(row['sentiment'])
      progress.update(index)
    progress.finish()
    temp = pd.DataFrame([x.as_dict() for x in datapoints])
    temp.to_csv(self.csv_file + "_{}min.csv".format(timeframe))
    return datapoints
 def _generate_centroids(self) -> None:
     for cluster in self._clusters:
         if len(cluster.points) == 0:  # keep the same centroid if no points
             continue
         means: List[float] = []
         for dimension in range(cluster.points[0].num_dimensions):
             dimension_slice: List[float] = [
                 p.dimensions[dimension] for p in cluster.points
             ]
             # add the mean of a particular dimension to the means list
             means.append(mean(dimension_slice))
         # the means list is now new centroid of the cluster
         cluster.centroid = DataPoint(means)
Beispiel #23
0
    def _generate_centroids(self):
        for cluster in self._clusters:
            if len(cluster.points) == 0:  # Keep the same centroid if no points
                continue

            means = []

            for dimension in range(cluster.points[0].num_dimensions):
                dimension_slice = [
                    p.dimensions[dimension] for p in cluster.points
                ]
                means.append(mean(dimension_slice))

            cluster.centroid = DataPoint(means)
def load_dataset(filename):
    """
    loads the csv from a passed filename and puts in into a list of DataPoint
    """
    pickle_name = filename + ".pickle"
    try:
        print("trying to load " + filename + " from pickle")
        dataset = pickle.load(open(pickle_name, "rb"))
    except:
        with open(filename, 'rb') as csv_file:
            print("no pickle exists. parsing file " + filename)
            dataset = [
                DataPoint(item[1:], item[0])
                for item in csv.reader(csv_file, delimiter=',')
            ]
            pickle.dump(dataset, open(pickle_name, "wb"))
    print("loaded " + filename)
    return dataset
 def map_component_points(self, obj, points):
     for key, attribute_name in obj.point_mapping.iteritems():
         matches = [x for x in points if x['mesa_name'] == key]
         point = matches[0] if matches else None
         if point:
             buffer_length = point.get('buffer_length')
             if buffer_length and buffer_length != getattr(obj, attribute_name).maxlen:
                 setattr(obj, attribute_name, DataPoint(maxlen=buffer_length, rpc_wait=self.agent.default_rpc_wait))
             attribute = getattr(obj, attribute_name)
             attribute.agent = self.agent
             attribute.point_name = point.get('driver_point_name', attribute.point_name)
             attribute.topic = point.get('topic_prefix', attribute.topic)
             # TODO: Deal with sunspec_sf registers.
             attribute.scale_factor = float(point.get('scale_factor') or attribute.scale_factor)
             attribute.offset = float(point.get('offset') or attribute.offset)
             attribute.unit = point.get('unit', attribute.unit)
             attribute.rpc_attempts = int(point.get('rpc_attempts') or attribute.rpc_attempts)
             attribute.rpc_wait = float(point.get('rpc_wait') or attribute.rpc_wait)
             attribute.max_data_age = float(point.get('max_data_age') or attribute.max_data_age)
             if attribute.topic and attribute.point_name:
                 self.subscriptions['devices/' + attribute.topic + '/all'].append(attribute)
Beispiel #26
0
 def __query_and_post_process(self, col: str, **kwargs) -> List[DataPoint]:
   """
   Query that returns counts grouped by comma separated values in input column.
   Post processing is required for columns that have comma separated lists
   rather than single string values.
   Optional filter args can be passed in.
   """
   results = []
   query_addon = []
   query_params = []
   for key, val in kwargs.items():
     if val:
       query_addon.append(f"{key} LIKE ?")
       query_params += ['%'+val+'%']
   if query_addon:
     query_addon = " AND " + " AND ".join(query_addon)
   else:
     query_addon = ""
   db_conn = sqlite3.connect(self.db_file)
   with db_conn:
     cur = db_conn.cursor()
     query_str = f"SELECT {col} FROM titles WHERE {col} IS NOT NULL {query_addon}"
     cur.execute(query_str, query_params)
     result_dict = {}
     for row in cur.fetchall():
       row_vals = row[0].split(',')
       for val in row_vals:
         val = val.strip()
         if val not in result_dict:
           result_dict[val] = 1
         else:
           result_dict[val] += 1
   for item in result_dict.items():
     try:
       results.append(DataPoint(**{ 'name': item[0], 'value': item[1] }))
     except ValidationError as ve:
       self.logger.error(ve.errors())
   return results
 def test_knn_classification(self):
     point1 = DataPoint([1.0, 1.0], 'a')
     point2 = DataPoint([0.0, 0.0], 'b')
     point3 = DataPoint([1.0, 0.0], 'a')
     point4 = DataPoint([-1.0, 1.0], 'b')
     point5 = DataPoint([-1.0, 0.0], 'a')
     points = [point3, point1, point2, point5, point4]
     new_point = DataPoint([0.0, 1.0], None)
     k1 = 3
     label1 = 'b'
     result1 = classify(points, new_point, k1)
     self.assertEqual(
         result1, label1,
         'With 3 neighbours classification should result in "b" label')
     k2 = 5
     label2 = 'a'
     result2 = classify(points, new_point, k2)
     self.assertEqual(
         result2, label2,
         'With 5 neighbours classification should result in "a" label')
Beispiel #28
0
        for cluster in self._clusters:
            if len(cluster.points) == 0: # keep the same centroid if no points
                continue
            means: List[float] = []
            for dimension in range(cluster.points[0].num_dimensions):
                dimension_slice: List[float] = [p.dimensions[dimension] for p in cluster.points]
                means.append(mean(dimension_slice))
            cluster.centroid = DataPoint(means)

    def run(self, max_iterations: int = 100) -> List[KMeans.Cluster]:
        for iteration in range(max_iterations):
            for cluster in self._clusters: # clear all clusters
                cluster.points.clear()
            self._assign_clusters() # find cluster each point is closest to
            old_centroids: List[DataPoint] = deepcopy(self._centroids) # record centroids
            self._generate_centroids() # find new centroids
            if old_centroids == self._centroids: # have centroids moved?
                print(f"Converged after {iteration} iterations")
                return self._clusters
        return self._clusters


if __name__ == "__main__":
    point1: DataPoint = DataPoint([2.0, 1.0, 1.0])
    point2: DataPoint = DataPoint([2.0, 2.0, 5.0])
    point3: DataPoint = DataPoint([3.0, 1.5, 2.5])
    kmeans_test: KMeans[DataPoint] = KMeans(2, [point1, point2, point3])
    test_clusters: List[KMeans.Cluster] = kmeans_test.run()
    for index, cluster in enumerate(test_clusters):
        print(f"Cluster {index}: {cluster.points}")
Beispiel #29
0
                ]
                means.append(mean(dimension_slice))

            cluster.centroid = DataPoint(means)

    def run(self, max_iterations=100):
        for iteration in range(max_iterations):
            for cluster in self._clusters:
                cluster.points.clear()

            self._assign_clusters()  # Find cluster each point is closest to
            old_centroids = deepcopy(self._centroids)  # Record old centroids
            self._generate_centroids()  # Find new centroids

            if old_centroids == self._centroids:  # Have centroids moved?
                print(f'Converged after {iteration} iterations')
                return self._clusters

        return self._clusters


if __name__ == '__main__':
    point_0 = DataPoint([2.0, 1.0, 1.0])
    point_1 = DataPoint([2.0, 2.0, 5.0])
    point_2 = DataPoint([3.0, 1.5, 2.5])
    k_means_test = K_Means(2, [point_0, point_1, point_2])
    test_clusters = k_means_test.run()

    for i, cluster in enumerate(test_clusters):
        print(f'Cluster {i}: {cluster.points}')
Beispiel #30
0
 def __read_accelerometer(self):
     acc_reading = DataPoint()
     acc_reading.sensor_type = 'acc'
     acc = self.accelerometer.read_data()
     if acc:
         acc_reading.x = acc[0]
         acc_reading.y = acc[1]
         acc_reading.z = acc[2]
     try:
         acc_reading.YZ = math.degrees(
             math.atan(acc_reading.y / acc_reading.z))
     except ZeroDivisionError:
         acc_reading.YZ = 0
     try:
         acc_reading.XZ = math.degrees(
             math.atan(acc_reading.x / acc_reading.z))
     except ZeroDivisionError:
         acc_reading.XZ = 0
     acc_reading.time = self.current_millis_frac() - self.started_ms
     return acc_reading