Esempio n. 1
0
    def test_cluster_points_two_cluster(self):
        test_vector = self.create_test_data_vector()

        kmeans = KMeans(test_vector, 2)

        test_point0 = datapoint.DataPoint()
        test_point0.add_dimension(1.1)
        test_point0.add_dimension(2.1)
        test_point0.add_dimension(3.1)
        test_point1 = datapoint.DataPoint()
        test_point1.add_dimension(3.1)
        test_point1.add_dimension(1.1)
        test_point1.add_dimension(2.1)
        test_cluster = datapoint.DataVector()
        test_cluster.add_point(test_point0)
        test_cluster.add_point(test_point1)

        self.assertEqual(
            [1.0, 2.0, 3.0],
            kmeans.cluster_points(test_cluster)[0].data_points[0].coordinates)
        self.assertEqual(
            [2.0, 3.0, 1.0],
            kmeans.cluster_points(test_cluster)[0].data_points[1].coordinates)
        self.assertEqual(
            [3.0, 1.0, 2.0],
            kmeans.cluster_points(test_cluster)[1].data_points[0].coordinates)
 def read_data(self, filename, p, train):
     positive_temp = list()
     negative_temp = list()
     all_temp = list()
     with open(filename) as File:
         reader = csv.reader(File, delimiter=',')
         for row in reader:
             if row[3] == '1':
                 point = datapoint.DataPoint(float(row[1]), float(row[2]),
                                             int(row[3]))
                 positive_temp.append(point)
                 all_temp.append(point)
             else:
                 if p is True:
                     row[3] = -1
                 point = datapoint.DataPoint(float(row[1]), float(row[2]),
                                             int(row[3]))
                 negative_temp.append(point)
                 all_temp.append(point)
     shuffle(all_temp)
     train_temp = all_temp[:train]
     test_temp = all_temp[train:]
     self.positive = list(set(positive_temp) & set(train_temp))
     self.positive_test = list(set(positive_temp) & set(test_temp))
     self.negative = list(set(negative_temp) & set(train_temp))
     self.negative_test = list(set(negative_temp) & set(test_temp))
Esempio n. 3
0
    def get_rds_metric(self, starttime, endtime, metricname, namespace,
                       dbclusterid, statistic, period):
        datapoints = self.cloudwatch.get_metric_statistics(
            Period=period,
            StartTime=datetime.datetime.strptime(starttime,
                                                 "%Y-%m-%dT%H:%M:%S"),
            EndTime=datetime.datetime.strptime(endtime, "%Y-%m-%dT%H:%M:%S"),
            MetricName=metricname,
            Namespace=namespace,
            Dimensions=[{
                'Name': 'DBClusterIdentifier',
                'Value': dbclusterid
            }],
            Statistics=[statistic])['Datapoints']

        datapoint_list = []
        for i in datapoints:
            datapoint_list.append(
                dp.DataPoint(i["Timestamp"].strftime("%Y-%m-%dT%H:%M:%S"),
                             float(i["Average"])))

        datapoint_list.sort(key=operator.attrgetter('timestamp'))

        timestamp = []
        value = []

        for i in datapoint_list:
            timestamp.append(i.timestamp)
            value.append(i.value)

        return timestamp, value
Esempio n. 4
0
    def create_test_data_vector():
        data_vector = datapoint.DataVector()
        values_list = [[1.0, 2.0, 3.0], [3.0, 1.0, 2.0], [2.0, 3.0, 1.0]]

        for values in values_list:
            point = datapoint.DataPoint()
            for dimension_value in values:
                point.add_dimension(dimension_value)
            data_vector.add_point(point)
        return data_vector
Esempio n. 5
0
    def test_closest_cluster_index(self):
        test_vector = self.create_test_data_vector()

        kmeans = KMeans(test_vector, 2)

        test_point = datapoint.DataPoint()
        test_point.add_dimension(1.1)
        test_point.add_dimension(2.1)
        test_point.add_dimension(3.1)

        self.assertEqual(0,
                         kmeans.closest_cluster_index(test_vector, test_point))
Esempio n. 6
0
 def read_file_to_data_points(self) -> list():
     with open(self.file_path, mode='r') as open_file:
         csv_reader = csv.reader(open_file, delimiter=self.delimiter)
         data_vector = datapoint.DataVector()
         line_count = 0
         for row in csv_reader:
             point = datapoint.DataPoint()
             for item in row:
                 point.coordinates.append(float(item))
             data_vector.add_point(point)
             line_count += 1
         print(f'Processed {line_count} lines.')
     return data_vector