Ejemplo n.º 1
0
    def __init__(self, use_scikit=True):
        self.dc = DataClass()
        self.data = []
        self.node_data = []
        self.assignments_series = []
        self.min_final = None
        self.max_final = None
        self.files = [
            f for f in listdir("data/sensors")
            if isfile(join("data/sensors", f))
        ]
        print(self.files)
        self.n_nodes = len(self.files)
        self.n_series_disp = 10
        # self.i_run = int(self.n_nodes/2)
        self.i_run2 = 1
        # self.use_previous_cluster_data = False
        self.centroids = None
        self.final_centroids = None
        self.final_clusters = None
        self.clusters = []
        self.node_centroids = []

        self.partial_sample_index = 0
        self.use_scikit = use_scikit
Ejemplo n.º 2
0
class ML_Export_TS:
    def __init__(self, use_scikit=True):
        self.dc = DataClass()
        self.data = []
        self.node_data = []
        self.assignments_series = []
        self.min_final = None
        self.max_final = None
        self.files = [
            f for f in listdir("data/sensors")
            if isfile(join("data/sensors", f))
        ]
        print(self.files)
        self.n_nodes = len(self.files)
        self.n_series_disp = 10
        # self.i_run = int(self.n_nodes/2)
        self.i_run2 = 1
        # self.use_previous_cluster_data = False
        self.centroids = None
        self.final_centroids = None
        self.final_clusters = None
        self.clusters = []
        self.node_centroids = []

        self.partial_sample_index = 0
        self.use_scikit = use_scikit

    def set_lib(self, use_scikit):
        self.use_scikit = use_scikit

    def init(self):
        self.final_centroids = None
        self.centroids = None
        self.read_data()

    def read_data(self):
        """
            read data from files
            each file has the data for a measurement node
            over a time frame of n days, for every hour
        :return:
        """
        self.data = []
        self.node_data = []
        for i, f in enumerate(self.files[0:self.n_nodes]):
            # print(str(i) + ". reading: " + f)
            fdata = self.dc.read_data(join("data/sensors/", f))
            data = copy.copy(fdata)
            self.data.append(data)
            node = Constants.NODE_MODEL
            node["id"] = i
            self.node_data.append(copy.deepcopy(node))

    def export_ts(self, node):
        self.dc.write_data("data/output/" + str(node) + ".csv",
                           self.data[node]["series"], 2)
Ejemplo n.º 3
0
def MakePlan(Parameters):
	
	STARTTIME = time.time()
	#makedb()
	#print "dbCreated: ", time.time()-STARTTIME
	Data=DataClass(Parameters)
	
	#print time.time()-STARTTIME
	emptyRoute=np.empty((Data.DAYS,2), dtype=int)
	for i in range (Parameters.days):
		for j in range (2):
			emptyRoute[i,j]=Data.n+2*i+j

	rmvd=[[]]*Data.DAYS

	Plan=PlanVariables(emptyRoute,Data)
	heuristicResponse= Heuristic(Plan,rmvd,Data,Parameters.timeMultiplier)
	newPlan=heuristicResponse[0]
	bestPlan=newPlan
	bestObjective=heuristicResponse[1]
	
	iterations=0
	
	#print time.time()-STARTTIME
	while (iterations<MAXITERATIONS and (time.time() - STARTTIME)<25):
		metaheu=1
		
		while(metaheu<=3):
			if metaheu==1:
				meta=MetaH1(newPlan.route)
			if metaheu==2:
				meta=MetaH2(newPlan.route)
			if metaheu==3:
				meta=MetaH3(newPlan.route)
			
			Plan=PlanVariables(meta[0],Data)
			rmvd=meta[1]
			heuristicResponse=Heuristic(Plan,rmvd,Data,Parameters.timeMultiplier)
			newPlan=heuristicResponse[0]
			newObjective=heuristicResponse[1]
			
			if newObjective>bestObjective:
				bestObjective=newObjective
				bestPlan=newPlan
				metaheu=1
				iterations=0
			else:
				metaheu=metaheu+1
		iterations=iterations+1
		
	
	#print time.time()-STARTTIME
	#print bestObjective
	#print bestPlan.route
	#return (no_of_locations,names,latitudes,longitudes,start,end,free)
	return [bestPlan,Data]
Ejemplo n.º 4
0
    def __init__(self, use_scikit=True):
        self.dc = DataClass()
        self.data = []
        self.node_data = []
        self.assignments_series = []
        self.min_final = None
        self.max_final = None
        self.files = []
        self.n_nodes = 0

        self.n_series_disp = 10
        # self.i_run = int(self.n_nodes/2)
        self.i_run2 = 1
        # self.use_previous_cluster_data = False
        self.centroids = None
        self.final_centroids = None
        self.final_clusters = None
        self.clusters = []
        self.node_centroids = []

        self.partial_sample_index = 0
        self.use_scikit = use_scikit
Ejemplo n.º 5
0
    def __init__(self, parent=None):
        super(mainWindow, self).__init__(parent)

        ## ---------------------- TAB 1--------------------------------------

        self.timer = QTimer()
        self.timer.timeout.connect(self.tick)
        self.timer.start(1000)

        self.RunPlots = False
        self.NoOfPlots = 8
        self.PlotNo = {}
        self.unitNo = {}
        self.comboColor2 = {}
        self.comboStyle2 = {}
        self.sensorData = {}
        self.checkboxShow = {}
        self.comboCOMport = 1
        self.comboBaudRate = 9600
        self.filled = {}

        self.time_var = 0
        self.data1_arr = []

        self.plotColor = ['k', 'k', 'b', 'r', 'y', 'g', 'b', 'r', 'y', 'g']
        self.plotStyle = ['-', '-', '-', '-', '-', '-', '-', '-', '-', '-']
        self.usePlot = ['1', '1', '1', '1', '1', '1', '1', '1', '1', '1']

        # Tab 1
        self.tab1 = QtGui.QWidget()
        self.addTab(self.tab1, "Incomming Data")

        self.figure = plt.figure(figsize=(30, 15))
        self.resize(1200, 700)
        self.canvas = FigureCanvas(self.figure)

        # Label
        l1 = QLabel()
        l2 = QLabel()
        l3 = QLabel()
        l4 = QLabel()
        l5 = QLabel()
        l6 = QLabel()
        l7 = QLabel()

        l1.setText("Show")
        l2.setText("Unit No")
        l3.setText("Data")
        l4.setText("Plot No")
        l5.setText("Offset")
        l6.setText("Plot Color")
        l7.setText("Plot Style")

        l1.setAlignment(Qt.AlignLeft)
        l2.setAlignment(Qt.AlignLeft)
        l3.setAlignment(Qt.AlignLeft)
        l4.setAlignment(Qt.AlignLeft)
        l5.setAlignment(Qt.AlignLeft)
        l6.setAlignment(Qt.AlignLeft)
        l7.setAlignment(Qt.AlignLeft)

        ## Create Grid for
        grid = QGridLayout()

        grid.addWidget(l1, 1, 1)
        grid.addWidget(l2, 1, 2)
        grid.addWidget(l3, 1, 3)
        grid.addWidget(l4, 1, 4)
        grid.addWidget(l5, 1, 5)
        grid.addWidget(l6, 1, 6)
        grid.addWidget(l7, 1, 7)

        for i in range(2, self.NoOfPlots + 1):
            # Checkboxes
            self.checkboxShow[i] = QtGui.QCheckBox('', self)
            self.checkboxShow[i].setChecked(True)

            # Combo box 1 - Plot nr
            self.PlotNo[i] = QtGui.QComboBox(self)
            self.PlotNo[i].addItem("1")
            self.PlotNo[i].addItem("2")
            self.PlotNo[i].setFixedWidth(50)

            # Combo box 2 - Slave nr
            self.unitNo[i] = QtGui.QComboBox(self)
            self.unitNo[i].addItem("1")
            self.unitNo[i].addItem("2")
            self.unitNo[i].addItem("3")
            self.unitNo[i].addItem("4")
            self.unitNo[i].addItem("5")
            self.unitNo[i].addItem("6")
            self.unitNo[i].addItem("7")
            self.unitNo[i].setFixedWidth(50)

            # Combo box 3 - Sensor Data
            self.sensorData[i] = QtGui.QComboBox(self)
            self.sensorData[i].addItem("Temperature 1")
            self.sensorData[i].addItem("Temperature 2")
            self.sensorData[i].addItem("Temperature 3")
            self.sensorData[i].addItem("Humidity 1")
            self.sensorData[i].addItem("Light 1")
            self.sensorData[i].setFixedWidth(150)

            # Offset
            line = QtGui.QLineEdit(self)
            line.setFixedWidth(50)

            # Plot Color
            colorPath = "C:/Users/KWFO/Desktop/Python_GUI/plot_colors/"
            self.comboColor2[i] = QtGui.QComboBox(self)
            self.comboColor2[i].addItem(QIcon(colorPath + "black.png"), "")
            self.comboColor2[i].addItem(QIcon(colorPath + "blue.png"), "")
            self.comboColor2[i].addItem(QIcon(colorPath + "red1.png"), "")
            #self.comboColor2[i].addItem(QIcon(colorPath + "yellow1.png"),"")
            self.comboColor2[i].addItem(QIcon(colorPath + "green.png"), "")
            self.comboColor2[i].addItem(QIcon(colorPath + "orange.png"), "")
            self.comboColor2[i].addItem(QIcon(colorPath + "magenta.png"), "")
            self.comboColor2[i].addItem(QIcon(colorPath + "cyan2.png"), "")
            self.comboColor2[i].setFixedWidth(50)
            self.comboColor2[i].setCurrentIndex(
                i - 2)  # Set different color for all at startup

            # Plot Style
            self.comboStyle2[i] = QtGui.QComboBox(self)
            self.comboStyle2[i].addItem("solid")
            self.comboStyle2[i].addItem("dashed")
            self.comboStyle2[i].addItem("dots")
            self.comboStyle2[i].addItem("solid + dots")
            self.comboStyle2[i].setFixedWidth(90)

            grid.addWidget(self.checkboxShow[i], i, 1)
            grid.addWidget(self.unitNo[i], i, 2)
            grid.addWidget(self.sensorData[i], i, 3)
            grid.addWidget(self.PlotNo[i], i, 4)
            grid.addWidget(line, i, 5)
            grid.addWidget(self.comboColor2[i], i, 6)
            grid.addWidget(self.comboStyle2[i], i, 7)

        b1 = QPushButton("Plot incomming data")
        b1.clicked.connect(self.b1_clicked)
        b1.setFixedHeight(40)
        b1.setFixedWidth(125)

        b2 = QPushButton("Stop plotting")
        b2.clicked.connect(self.b2_clicked)
        b2.setFixedHeight(40)
        b2.setFixedWidth(125)

        serial_Setup = QGridLayout()
        com_port = QLabel()
        com_port.setText("COM Port")
        baudrate = QLabel()
        baudrate.setText("Baud Rate")
        self.comboCOMport = QtGui.QComboBox(self)
        self.comboCOMport.addItem("COM1")
        self.comboCOMport.addItem("COM2")
        self.comboCOMport.addItem("COM3")
        self.comboCOMport.addItem("COM4")
        self.comboCOMport.addItem("COM5")
        self.comboCOMport.addItem("COM6")
        self.comboCOMport.addItem("COM7")
        self.comboCOMport.addItem("COM8")
        self.comboCOMport.addItem("COM9")
        self.comboBaudRate = QtGui.QComboBox(self)
        self.comboBaudRate.addItem("9600")
        self.comboBaudRate.addItem("18200")
        self.comboBaudRate.addItem("36400")
        self.comboBaudRate.addItem("72800")
        self.comboBaudRate.addItem("115600")
        serial_Setup.addWidget(com_port, 1, 1)
        serial_Setup.addWidget(self.comboCOMport, 2, 1)
        serial_Setup.addWidget(baudrate, 1, 2)
        serial_Setup.addWidget(self.comboBaudRate, 2, 2)

        buttons = QtGui.QHBoxLayout()
        buttons.addWidget(b1)
        buttons.addWidget(b2)
        buttons.addSpacing(100)
        buttons.addLayout(serial_Setup)
        buttons.addStretch()

        self.show_plot_1 = QtGui.QCheckBox('Show Plot 1', self)
        self.show_plot_1.setChecked(True)
        self.show_plot_2 = QtGui.QCheckBox('Show Plot 2', self)
        self.show_plot_2.setChecked(True)

        # Input Data on Left Side Of Screen
        input_data = QtGui.QVBoxLayout()
        input_data.addLayout(buttons)
        input_data.addSpacing(20)
        input_data.addWidget(self.show_plot_1)
        input_data.addWidget(self.show_plot_2)
        input_data.addSpacing(40)
        input_data.addLayout(grid)
        input_data.addStretch()

        hbox = QtGui.QHBoxLayout()
        hbox.addLayout(input_data)
        hbox.addWidget(self.canvas)

        self.tab1.setLayout(hbox)
        ## ---------------------------------------------------

        # Tab 2
        self.txt_data = {}
        self.txt_data2 = {}
        self.unit_1 = {}
        self.unit_2 = {}
        #self.lineEdit = ''

        self.tab2 = QtGui.QWidget()
        self.addTab(self.tab2, "Load Saved Data")

        buttonLoadData = QPushButton("Load Data")
        buttonLoadData.clicked.connect(self.loadData_clicked)
        buttonLoadData.setFixedHeight(40)
        buttonLoadData.setFixedWidth(125)

        self.figure2 = plt.figure(figsize=(30, 15))
        self.canvas2 = FigureCanvas(self.figure2)

        tab2_hbox = QtGui.QHBoxLayout()
        tab2_hbox.addWidget(buttonLoadData)
        tab2_hbox.addWidget(self.canvas2)

        self.tab2.setLayout(tab2_hbox)

        # --------------Tab 3
        self.tab3 = QtGui.QWidget()
        self.addTab(self.tab3, "Tab3 Test")

        btnLoadData = QPushButton("Load Data")
        btnLoadData.clicked.connect(self.loadData_clicked)
        btnLoadData.setFixedHeight(40)
        btnLoadData.setFixedWidth(125)

        load_table = QTableWidget()
        load_table.setWindowTitle("Loaded Data")
        #load_table.resize(800, 800)
        load_table.setRowCount(5)
        load_table.setColumnCount(2)

        view_table = QTableWidget()
        view_table.setWindowTitle("Data to View")
        #view_table.resize(800, 800)
        view_table.setRowCount(5)
        view_table.setColumnCount(2)

        table_hbox = QtGui.QHBoxLayout()
        table_hbox.addWidget(load_table)
        table_hbox.addSpacing(50)
        table_hbox.addWidget(view_table)

        myData = DataClass(6)
        myObj = ActionClass(self.tab3, myData)

        input_data2 = QtGui.QVBoxLayout()
        input_data2.addWidget(btnLoadData)
        input_data2.addLayout(myObj.grid)
        input_data2.addLayout(table_hbox)
        input_data2.addStretch()

        self.figure3 = plt.figure(figsize=(30, 15))
        self.canvas3 = FigureCanvas(self.figure3)

        tab3_hbox = QtGui.QHBoxLayout()
        tab3_hbox.addLayout(input_data2)
        tab3_hbox.addWidget(self.canvas3)

        self.tab3.setLayout(tab3_hbox)

        # Plot example
        tab3_data = [1, 2, 3, 4, 6, 8]
        self.figure3.clf()
        tab3_ax = self.figure3.add_subplot(111)
        tab3_ax.plot(tab3_data, '--', color='blue')

        self.canvas2.draw()

        # Plot First Time
        self.plot()
Ejemplo n.º 6
0
# from sklearn.metrics import classification_report
from sklearn.metrics.pairwise import pairwise_distances_argmin
from sklearn.cluster import KMeans

import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import math
import random

from math import sqrt

from matplotlib import style
style.use('ggplot')

dc = DataClass()

from os import listdir
from os.path import isfile, join
files = [f for f in listdir("data") if isfile(join("data", f))]
print(files)

centroids = []

plotdata = 1
plot_original_data = 0

n_series = len(files)
n_series = 1

experiment_id = 2
Ejemplo n.º 7
0
class MachineLearningMain:
    def __init__(self, use_scikit=True):
        self.dc = DataClass()
        self.data = []
        self.node_data = []
        self.assignments_series = []
        self.min_final = None
        self.max_final = None
        self.files = [
            f for f in listdir("data/sensors")
            if isfile(join("data/sensors", f))
        ]
        print(self.files)
        self.n_nodes = len(self.files)
        self.n_series_disp = 10
        # self.i_run = int(self.n_nodes/2)
        self.i_run2 = 1
        # self.use_previous_cluster_data = False
        self.centroids = None
        self.final_centroids = None
        self.final_clusters = None
        self.clusters = []
        self.node_centroids = []

        self.partial_sample_index = 0
        self.use_scikit = use_scikit

    def set_lib(self, use_scikit):
        self.use_scikit = use_scikit

    def init(self):
        self.final_centroids = None
        self.centroids = None
        self.read_data()

        # self.assign_class_to_nodes()

    def assign_class_to_nodes(self):
        print("machine learning: assign class to nodes")
        assignment_index = 0
        node_id = 0
        for node in self.node_data:
            cluster = 0
            # get average cluster index for node

            n_series_node = len(self.data[node_id]["series"])

            # get the assignments for the time series corresponding to the node
            node_assignments = [None] * n_series_node
            for i in range(n_series_node):
                # cluster += self.assignments_series[assignment_index]["cluster"]
                if assignment_index < len(self.assignments_series):
                    node_assignments[i] = self.assignments_series[
                        assignment_index]["cluster"]
                    assignment_index += 1

            # node["class"] = int(cluster/n_series_node)
            # get class with max number of occurences in list
            node["class"] = max(node_assignments, key=node_assignments.count)

            node["demand"] = int(self.clusters[node["class"]]["avg_demand"])
            node["priority"] = int(self.clusters[node["class"]]["priority"])

            # print(node)
            node_id += 1
        return self.node_data

    def get_info(self, node_id=None):
        if node_id is None:
            info = {"n_nodes": len(self.node_data), "nodes": self.node_data}
        else:
            info = self.node_data[node_id]
        return info

    def read_data(self):
        """
            read data from files
            each file has the data for a measurement node
            over a time frame of n days, for every hour
        :return:
        """
        self.data = []
        self.node_data = []
        for i, f in enumerate(self.files[0:self.n_nodes]):
            # print(str(i) + ". reading: " + f)
            fdata = self.dc.read_data(join("data/sensors/", f))
            data = copy.copy(fdata)
            self.data.append(data)
            node = Constants.NODE_MODEL
            node["id"] = i
            self.node_data.append(copy.deepcopy(node))

    def get_raw_data(self, node=0):
        t_start = time.time()
        # self.read_data()
        data = self.data[node]
        imax = data["series"].shape[0]
        imax_all = 0

        for i in range(len(data)):
            data_array = data["series"][i]
            imax_all += data_array.shape[0]

        # print('imax: ' + str(imax))
        t_end = time.time()
        min = int(np.min(data["series"]))
        max = int(np.max(data["series"]))
        dt = t_end - t_start
        info = {
            "description": "Raw data",
            "details": {
                "node": node,
                "n_series": imax,
                "n_nodes": len(data),
                "n_series_total": imax_all,
                "dt": int(dt * 1000),
                "min": min,
                "max": max
            },
            "headers": np.ndarray.tolist(data["headers"]),
            "dt": dt,
            "lines": data["series"].shape[0],
            "columns": data["series"].shape[1]
        }

        return (np.ndarray.tolist(data["series"][:self.n_series_disp]), info)

    def get_array_of_arrays(self, a):
        array = []
        for ag in a:
            for ag1 in ag:
                array.append(ag1)
        return array

    def get_display_data(self, d, global_scale=False):
        if d is not None:
            # centroids = d[0]
            # info = d[1]
            # return np.ndarray.tolist(centroids[:self.n_series_disp]), info

            ddata = d[0]
            info = d[1]
            start = len(ddata) - self.n_series_disp - 1
            if start < 0:
                start = 0
            end = len(ddata)
            # start = 0
            # end = len(ddata)
            # if end > self.n_series_disp - 1:
            #     end = self.n_series_disp - 1

            ddata = ddata[start:end]

            if global_scale and self.min_final is not None:
                # print("use global scale")
                min = self.min_final
                max = self.max_final
            else:
                min = int(np.min(ddata))
                max = int(np.max(ddata))

            info["details"]["min"] = min
            info["details"]["max"] = max
            return np.ndarray.tolist(ddata), info
        else:
            return None

    def get_centroids(self, data, n_clusters=8, init=None):
        if self.use_scikit:
            if n_clusters is not None:
                if init is not None:
                    kmeans = KMeans(n_clusters=n_clusters, init=init)
                else:
                    kmeans = KMeans(n_clusters=n_clusters)
            else:
                n_clusters_range = range(2, 10)
                max_silhouette_avg = [0] * len(n_clusters_range)
                # data = np.array(data)
                for (i, k) in enumerate(n_clusters_range):
                    kmeans = KMeans(n_clusters=k)
                    a = kmeans.fit_predict(data)
                    # print(data.shape)
                    # print(a)
                    # The silhouette_score gives the average value for all the samples.
                    # This gives a perspective into the density and separation of the formed
                    # clusters
                    silhouette_avg = silhouette_score(data, a)
                    # print("For n_clusters =", k,
                    #       "The average silhouette_score is :", silhouette_avg)
                    max_silhouette_avg[i] = silhouette_avg

                n_clusters = n_clusters_range[max_silhouette_avg.index(
                    max(max_silhouette_avg))]
                kmeans = KMeans(n_clusters=n_clusters)

            a = kmeans.fit(data)
            centroids = a.cluster_centers_
            return centroids, a
        else:
            if n_clusters is None:
                n_clusters = 3
            dcluster.reinit(data, n_clusters)
            # dcluster.add_new_data(data, n_clusters)
            centroids, a = dcluster.k_means_clust_dynamic()
            # print(centroids)
            return centroids, a

    def get_assignments(self, a, data):
        if self.use_scikit:
            return a.predict(data)
        else:
            return a

    def assign_sample_to_cluster(self, node_id, sample_id):
        data = self.data[node_id]["series"]
        data1 = data[sample_id]
        data1 = [data1]
        assignments = self.get_assignments(self.final_clusters, data1)
        return assignments[0]

    def assign_partial_sample_to_cluster(self, node_id, sample_id, init=False):
        data = list(self.data[node_id]["series"][sample_id])

        if init:
            self.partial_sample_index = 0

        index = self.partial_sample_index

        min_dist = 0
        min_index = 0
        for (i, c) in enumerate(self.final_centroids):
            d = ml.euclid_dist(data[0:index], c[0:index])
            if i == 0:
                min_dist = d
            else:
                if d < min_dist:
                    min_dist = d
                    min_index = i

        partial_time_series = [0] * len(data)
        partial_time_series[0:index] = data[0:index]

        assignment = min_index

        if self.partial_sample_index < len(data) - 1:
            self.partial_sample_index += 1
        else:
            self.partial_sample_index = 0

        # # get assignments of time series to the final clusters
        partial_time_series = np.array(partial_time_series)
        return assignment, partial_time_series

    def assign_partial_sample_to_cluster_default(self,
                                                 node_id,
                                                 sample_id,
                                                 init=False):
        data = list(self.data[node_id]["series"][sample_id])

        if init:
            self.partial_sample_index = 0

        data1 = [0] * len(data)
        partial_time_series = [0] * len(data)
        # print(data1)
        cluster_mean = list(np.mean(self.final_centroids, axis=0))
        # print(cluster_mean)
        # print(data)
        for i in range(0, len(data)):
            if i <= self.partial_sample_index:
                data1[i] = data[i]
                partial_time_series[i] = data[i]
            elif i > self.partial_sample_index:
                data1[i] = cluster_mean[i]

        assignments = self.get_assignments(self.final_clusters, [data1])

        if self.partial_sample_index < len(data1) - 1:
            self.partial_sample_index += 1
        else:
            self.partial_sample_index = 0

        # # get assignments of time series to the final clusters
        partial_time_series = np.array(partial_time_series)
        return assignments[0], partial_time_series

    def run_clustering_on_partial_sample(self, node_id, sample_id, init=False):
        assignment, partial_time_series = self.assign_partial_sample_to_cluster(
            node_id, sample_id, init)
        min = int(np.min(partial_time_series))
        max = int(np.max(partial_time_series))
        info = {
            "description": "Partial node data loading vs global clusters",
            "headers": ["new sample"],
            "dt": 0,
            "details": {
                "node_id": node_id,
                "node_sample": sample_id,
                "assignment": int(assignment),
                "min": min,
                "max": max
            },
            "assignments": None
        }

        # print(partial_time_series)
        partial_time_series = [list(partial_time_series)]
        for (i, c) in enumerate(self.final_centroids):
            partial_time_series.append(list(c))
            info["headers"].append("cluster " + str(i))

        partial_time_series = np.array(partial_time_series)
        return partial_time_series, info

    def update_node_clusters_with_partial_sample(self,
                                                 node_id,
                                                 sample_id,
                                                 init=False):
        data = self.node_centroids[node_id]["centroids"]
        info = {
            "description": "Node clusters loading vs global clusters",
            "headers": ["data"],
            "dt": 0,
            "details": {
                "node_id": node_id,
                "node_sample": sample_id,
                "min": 0,
                "max": 0
            },
            "assignments": None
        }

        # print(partial_time_series)
        # partial_time_series = [list(partial_time_series)]
        # for (i, c) in enumerate(self.final_centroids):
        #     partial_time_series.append(list(c))
        #     info["headers"].append("cluster " + str(i))
        #
        # partial_time_series = np.array(partial_time_series)
        return data, info

    def run_clustering_on_node_id(self,
                                  node_id,
                                  nclusters,
                                  partial_sample_until_id=None,
                                  add_deviation_value=None):
        """
        Run clustering on specified node. The data from the node is an array of arrays
        (for each day there is an array of 24 values)
        The result is the consumer behaviour over the analyzed time frame
        :param node_id:
        :param nclusters:
        :return:
        """
        t_start = time.time()
        # print(self.data)
        data = copy.deepcopy(self.data[node_id]["series"])
        if partial_sample_until_id is not None:
            data = data[0:partial_sample_until_id]
            if add_deviation_value is not None:
                data[partial_sample_until_id] += add_deviation_value

        if nclusters is not None and nclusters > len(data):
            print("node " + str(node_id) + "nclusters > len(data): " +
                  str(nclusters) + "," + str(len(data)))
            return [], None, data
        res = self.get_centroids(data, nclusters)
        centroids = res[0]
        nc = len(centroids)
        centroids_np = np.array(centroids)
        desc = "Clusters from all data (single clustering)"
        # assign each time series to a cluster
        assignments = []

        headers = []
        for i in range(len(centroids_np)):
            headers.append("cluster " + str(i))

        # the assignments of the data series to the clusters
        assignments_series = [None] * len(assignments)
        for (i, a) in enumerate(assignments):
            assignments_series[i] = {
                "series": i,
                "cluster": int(assignments[i])
            }

        t_end = time.time()
        dt = t_end - t_start
        min = int(np.min(centroids_np))
        max = int(np.max(centroids_np))

        append = True
        for n in self.node_centroids:
            if n["id"] == node_id:
                n["centroids"] = centroids_np
                append = False
                break

        if append:
            self.node_centroids.append({
                "id": node_id,
                "centroids": centroids_np
            })

        info = {
            "description": desc,
            "headers": headers,
            "dt": t_end - t_start,
            "details": {
                "node": node_id,
                "new_node": node_id,
                "n_clusters": nc,
                "n_nodes": len(self.data),
                "dt": int(dt * 1000),
                "min": min,
                "max": max
            },
            "assignments": assignments_series
        }

        return centroids_np, info, data

    def run_clustering_on_node_range(self, r, nclusters):
        """
        Run clustering on specified node range. The data from a node is an array of arrays
        (for each day there is an array of 24 values). The clusters are calculated
        separately for each node and added to the cluster array (various consumer
        behaviours in the network)
        :param start:
        :param end:
        :param nclusters:
        :return:
        """
        t_start = time.time()
        centroid_vect = []
        raw_data_vect = []

        if r is None:
            r = list(range(0, len(self.data)))

        # run clustering for each node and save clusters into array
        for node_id in r:
            res = self.run_clustering_on_node_id(node_id, nclusters)
            centroid_vect.append(res[0])
            raw_data_vect.append(res[2])

        centroid_vect = self.get_array_of_arrays(centroid_vect)
        # raw_data_vect = self.get_array_of_arrays(raw_data_vect)
        centroids_np = np.array(centroid_vect)

        headers = []
        for i in range(len(centroids_np)):
            headers.append("cluster " + str(i))

        t_end = time.time()
        dt = t_end - t_start
        min = int(np.min(centroids_np))
        max = int(np.max(centroids_np))
        info = {
            "description": "Clusters from node range (single clustering)",
            "headers": headers,
            "dt": t_end - t_start,
            "details": {
                "node_range": r,
                "n_clusters": len(centroids_np),
                "n_nodes": len(self.data),
                "dt": int(dt * 1000),
                "min": min,
                "max": max
            },
            "assignments": None
        }

        return centroids_np, info

    def run_single_clustering_on_node_range(self, r, nclusters, n_data=None):
        """
        Run clustering on specified node. The data from the node is an array of arrays
        (for each day there is an array of 24 values)
        The result is the consumer behaviour over the analyzed time frame
        :param n_data: the number of samples to be used from the data
        :param node_id:
        :param nclusters:
        :return:
        """

        if r is None:
            r = list(range(0, len(self.data)))

        t_start = time.time()
        # print(self.data)
        # data = self.data[node_id]["series"]
        # data = [[series for series in node_series["series"]] for node_series in self.data]
        # data = np.array(data)
        # print(data.shape)
        # data = np.array([])
        data = []
        for id in r:
            for (i, s) in enumerate(self.data[id]["series"]):
                if n_data is not None:
                    if i < n_data:
                        data.append(s)
                else:
                    data.append(s)
        data = np.array(data)
        # print(data.shape)

        # print(self.data[0]["series"])
        # print(data)
        res = self.get_centroids(data, nclusters)
        centroids = res[0]
        nc = len(centroids)
        centroids_np = np.array(centroids)
        desc = "Clusters from all data from all nodes (single clustering)"
        # assign each time series to a cluster
        assignments = []

        headers = []
        for i in range(len(centroids_np)):
            headers.append("cluster " + str(i))

        # the assignments of the data series to the clusters
        assignments_series = [None] * len(assignments)
        for (i, a) in enumerate(assignments):
            assignments_series[i] = {
                "series": i,
                "cluster": int(assignments[i])
            }

        t_end = time.time()
        dt = t_end - t_start
        min = int(np.min(centroids_np))
        max = int(np.max(centroids_np))

        info = {
            "description": desc,
            "headers": headers,
            "dt": t_end - t_start,
            "details": {
                "n_clusters": nc,
                "n_nodes": len(self.data),
                "dt": int(dt * 1000),
                "min": min,
                "max": max
            },
            "assignments": assignments_series
        }

        return centroids_np, info, data

    def run_dual_clustering_on_node_range(self, r, nclusters, nclusters_final):
        """
         Run dual clustering on specified node range.
         The data from a node is an array of arrays
        (for each day there is an array of 24 values).
        The clusters are calculated separately for each node and added to the cluster array.
        Then, there is another clustering on this cluster array which returns
        the final clusters for all the network (consumer types in the network)
        :param r:
        :param nclusters:
        :param nclusters_final:
        :return:
        """

        t_start = time.time()
        centroid_vect = []
        raw_data_vect = []

        if r is None:
            r = list(range(0, len(self.data)))

        print("node range: ", r)

        # run clustering for each node and save clusters into array
        for node_id in r:
            res = self.run_clustering_on_node_id(node_id, nclusters)
            centroid_vect.append(res[0])
            raw_data_vect.append(res[2])

        centroid_vect = self.get_array_of_arrays(centroid_vect)
        raw_data_vect = self.get_array_of_arrays(raw_data_vect)

        n_clusters_total = len(centroid_vect)
        centroids_np = np.array(centroid_vect)

        # run clustering again for the previous clusters
        res = self.get_centroids(centroids_np, nclusters_final,
                                 self.final_centroids)
        centroids = res[0]
        self.final_centroids = res[0]
        self.final_clusters = res[1]
        nc = len(centroids)
        centroids_np = np.array(centroids)

        # get assignments of time series to the final clusters

        assignments = self.get_assignments(res[1], raw_data_vect)

        n = len(centroids_np)
        headers = [None] * n
        self.clusters = []
        demands = []
        for i in range(n):
            headers[i] = "cluster " + str(i)
            cluster = Constants.CLUSTER_MODEL
            cluster["id"] = assignments[i]
            avg_demand = np.average(centroids_np[i])
            cluster["avg_demand"] = avg_demand
            demands.append(avg_demand)
            cluster["centroid"] = centroids_np[i]
            self.clusters.append(copy.deepcopy(cluster))

        demands = np.array(demands)
        temp = demands.argsort()
        ranks = np.empty_like(temp)
        ranks[temp] = np.arange(len(demands))

        for i in range(n):
            self.clusters[i]["priority"] = ranks[i]

        # print(self.clusters)
        # the assignments of the data series to the clusters
        self.assignments_series = [None] * len(assignments)
        for (i, a) in enumerate(assignments):
            self.assignments_series[i] = {"series": i, "cluster": int(a)}

        t_end = time.time()
        dt = t_end - t_start
        min = int(np.min(centroids_np))
        max = int(np.max(centroids_np))

        self.min_final = min
        self.max_final = max

        info = {
            "description": "Clusters from node range (dual clustering)",
            "headers": headers,
            "dt": t_end - t_start,
            "details": {
                "node_range": r,
                "n_clusters": nc,
                "n_nodes": len(self.data),
                "dt": int(dt * 1000),
                "min": min,
                "max": max
            },
            "assignments": self.assignments_series
        }

        return centroids_np, info

    def run_clustering_twice(self, node=None):
        """
        NOTE: DEPRECATED
        node == None => run clustering for all nodes and then run clustering again on all clusters
        node > 0 => run clustering for the selected node
        :param plot:
        :return:
        :param node:
        :return:
        """
        t_start = time.time()
        nclusters = 2
        nclusters_final = 3
        centroids = []
        data = []
        desc = ""
        assignments = []
        data_array_for_all = []
        try:
            if node is None:
                # print("consumer nodes: " + str(len(self.data)))

                # for i in range(0, len(self.data)):
                for i in range(0, self.i_run2):
                    data_array = self.data[i]["series"]
                    # data_array_for_all.append([d.tolist() for d in data_array])
                    for data_array1 in data_array:
                        data_array_for_all.append(data_array1)
                    # data_array has multiple time series from the same consumer
                    len_data = len(data_array)
                    data_array1 = data_array
                    # data_array1 = data_array[0:int(len_data / 2)]
                    # if self.centroids is None:
                    #     kmeans = KMeans(n_clusters=nclusters)
                    # else:
                    #     kmeans = KMeans(n_clusters=nclusters, init=self.centroids[i])

                    kmeans = KMeans(n_clusters=nclusters)
                    # print kmeans
                    # Compute cluster centers and predict cluster index for each sample.
                    a = kmeans.fit(data_array1)
                    # print a.cluster_centers_
                    assignments = a.predict(data_array1)

                    centroid = a.cluster_centers_
                    # print(centroid)
                    centroids.append(centroid)

                self.centroids = centroids
                centroids_all = []
                for centroid_group in centroids:
                    for centroid in centroid_group:
                        centroids_all.append(centroid)

                n_clusters_total = len(centroids_all)
                centroids_np = centroids_all
                centroids_np = np.array(centroids_np)

                desc = "Final clusters (double clustering)"

                if self.final_centroids is None:
                    kmeans = KMeans(n_clusters=nclusters_final)
                else:
                    kmeans = KMeans(n_clusters=nclusters_final,
                                    init=self.final_centroids)
                # kmeans = KMeans(n_clusters=nclusters_final)
                # print kmeans
                # Compute cluster centers and predict cluster index for each sample.
                a = kmeans.fit(centroids_np)
                assignments = a.predict(data_array_for_all)
                self.final_centroids = a.cluster_centers_
                data = self.final_centroids

            else:
                desc = "Clusters from all data (single clustering)"
                data_array = self.data[node]["series"]
                kmeans = KMeans(n_clusters=nclusters_final)
                # print kmeans
                # Compute cluster centers and predict cluster index for each sample.
                a = kmeans.fit(data_array)
                # print a.cluster_centers_
                assignments = a.predict(data_array)
                centroids = a.cluster_centers_

                n_clusters_total = len(centroids)
                data = centroids

            headers = []
            for i in range(len(data)):
                headers.append("cluster " + str(i))

            assignments_series = [None] * len(assignments)
            for (i, a) in enumerate(assignments):
                assignments_series[i] = {
                    "series": i,
                    "cluster": int(assignments[i])
                }

            t_end = time.time()
            dt = t_end - t_start
            min = np.min(data)
            max = np.max(data)
            # print("min: " + str(np.min(data)))
            info = {
                "description": desc,
                "headers": headers,
                "dt": t_end - t_start,
                "details": {
                    "node": node,
                    "new_node": self.i_run2,
                    "n_clusters": n_clusters_total,
                    "n_nodes": len(self.data),
                    "dt": int(dt * 1000),
                    "min": min,
                    "max": max
                },
                "assignments": assignments_series
            }
        except:
            info = "failed"

        self.i_run2 += 1
        if self.i_run2 >= self.n_nodes:
            self.i_run2 = 1
        return np.ndarray.tolist(data[:self.n_series_disp]), info
Ejemplo n.º 8
0
class ML_dualVsSingleNumberOfClusters:
    def __init__(self, use_scikit=True):
        self.dc = DataClass()
        self.data = []
        self.node_data = []
        self.assignments_series = []
        self.min_final = None
        self.max_final = None
        self.files = [
            f for f in listdir("data/sensors")
            if isfile(join("data/sensors", f))
        ]
        print(self.files)
        self.n_nodes = len(self.files)
        self.n_series_disp = 10
        # self.i_run = int(self.n_nodes/2)
        self.i_run2 = 1
        # self.use_previous_cluster_data = False
        self.centroids = None
        self.final_centroids = None
        self.final_clusters = None
        self.clusters = []
        self.node_centroids = []

        self.partial_sample_index = 0
        self.use_scikit = use_scikit

    def set_lib(self, use_scikit):
        self.use_scikit = use_scikit

    def init(self):
        self.final_centroids = None
        self.centroids = None
        self.read_data()

    def read_data(self):
        """
            read data from files
            each file has the data for a measurement node
            over a time frame of n days, for every hour
        :return:
        """
        self.data = []
        self.node_data = []
        for i, f in enumerate(self.files[0:self.n_nodes]):
            # print(str(i) + ". reading: " + f)
            fdata = self.dc.read_data(join("data/sensors/", f))
            data = copy.copy(fdata)
            self.data.append(data)
            node = Constants.NODE_MODEL
            node["id"] = i
            self.node_data.append(copy.deepcopy(node))

    def test_single_scenario(self):
        """
        test with different number of clusters for stage 2 (2 stage clustering)
        comparing the deviation from single stage clustering
        """
        self.set_lib(True)

        # res_dual = self.run_dual_clustering_on_node_range(None, None, 3)
        # n_data = len(self.data[0])
        n_data = 81
        n_clusters = 3
        # n_clusters_for_nodes = 80
        n_clusters_for_nodes = None

        print("n_data: ", n_data)
        print("n_clusters_for_nodes: ", n_clusters_for_nodes)
        res_dual1 = run_dual_clustering_on_node_range(self.data, None,
                                                      n_clusters_for_nodes,
                                                      n_clusters)
        res_single1 = run_clustering_for_all_nodes_at_once(
            self.data, None, n_clusters, n_data)
        res_all1 = np.concatenate((res_dual1, res_single1), axis=0)
        comp, ca, rd = get_comp(res_dual1, res_single1)
        print("comp_avg: " + str(ca))

        res_all = copy.copy(res_all1)
        res_dual = copy.copy(res_dual1)
        res_single = copy.copy(res_single1)
        comp_avg = ca
        res_diff = rd

        colors = ['b'] * n_clusters
        colors2 = ['g:'] * n_clusters
        cluster_labels1 = ["cd" + str(i + 1) for i in range(n_clusters)]
        cluster_labels2 = ["cs" + str(i + 1) for i in range(n_clusters)]

        plot_from_matrix(res_all, colors + colors2)
        plt.legend(cluster_labels1 + cluster_labels2)

        if n_clusters_for_nodes is None:
            n_clusters_for_nodes = "auto"

        plt.title("number of clusters for nodes: " +
                  str(n_clusters_for_nodes) + ", average deviation: " +
                  str(int(comp_avg)))
        plt.xlabel("Time of day (hours)")
        plt.ylabel("Value of cluster centroids ($m^3 / s$)")
        plt.show()

    def test_full_range(self):
        """
        test with different number of clusters for stage 2 (2 stage clustering)
        comparing the deviation from single stage clustering
        """
        self.set_lib(True)

        # res_dual = self.run_dual_clustering_on_node_range(None, None, 3)
        n_clusters = 3

        nc_max = 81
        n_data = nc_max

        r1 = list(range(2, nc_max))
        # r1 = [2, 10, 82]
        n_clusters_for_nodes_range = [None] + r1
        comp_avg_vect = [0] * len(n_clusters_for_nodes_range)
        # test_index = 0

        test_index = len(n_clusters_for_nodes_range) - 1

        for (i, k) in enumerate(n_clusters_for_nodes_range):
            ncn = k
            print("n_clusters_for_nodes: " + str(k))
            res_dual1 = run_dual_clustering_on_node_range(
                self.data, None, ncn, n_clusters)
            res_single1 = run_clustering_for_all_nodes_at_once(
                self.data, None, n_clusters, n_data)
            res_all1 = np.concatenate((res_dual1, res_single1), axis=0)
            comp, ca, rd = get_comp(res_dual1, res_single1)
            comp_avg_vect[i] = ca
            print("comp_avg: " + str(ca))

            if i == test_index:
                res_all = copy.copy(res_all1)
                res_dual = copy.copy(res_dual1)
                res_single = copy.copy(res_single1)
                n_clusters_for_nodes = k
                comp_avg = ca
                res_diff = rd

        colors = ['b'] * n_clusters
        colors2 = ['g:'] * n_clusters
        cluster_labels1 = ["cd" + str(i + 1) for i in range(n_clusters)]
        cluster_labels2 = ["cs" + str(i + 1) for i in range(n_clusters)]

        plot_from_matrix(res_all, colors + colors2)
        plt.legend(cluster_labels1 + cluster_labels2)

        if n_clusters_for_nodes is None:
            n_clusters_for_nodes = "auto"

        plt.title("number of clusters for nodes: " +
                  str(n_clusters_for_nodes) + ", average deviation: " +
                  str(int(comp_avg)))

        plt.figure()

        comp_avg_dynamic = comp_avg_vect[0]
        comp_avg_vect = comp_avg_vect[1:]
        n_clusters_for_nodes_range = n_clusters_for_nodes_range[1:]

        print("comp_avg_trim")
        print(comp_avg_vect)
        print("comp_dynamic")
        print(comp_avg_dynamic)

        result_obj = [0] * len(n_clusters_for_nodes_range)
        for (i, nc) in enumerate(n_clusters_for_nodes_range):
            result_obj[i] = {"nc": nc, "val": comp_avg_vect[i]}

        result_b_obj = {"nc": None, "val": comp_avg_dynamic}

        print(result_obj)

        vmax = max(node["val"] for node in result_obj)
        vmin = min(node["val"] for node in result_obj)
        imin = result_obj[0]["nc"]
        imax = result_obj[0]["nc"]
        # ncmax = result_obj

        for obj in result_obj:
            if obj["val"] == vmax:
                imax = obj["nc"]
            if obj["val"] == vmin:
                imin = obj["nc"]

        if result_b_obj["val"] < vmin:
            result_b_obj["nc"] = imin
        if result_b_obj["val"] > vmax:
            result_b_obj["nc"] = imax
        for (i, res) in enumerate(result_obj):
            if i < len(result_obj) - 1:
                if (result_obj[i]["val"] <= result_b_obj["val"]
                        and result_b_obj["val"] <= result_obj[i + 1]["val"]
                    ) or (result_obj[i]["val"] >= result_b_obj["val"]
                          and result_b_obj["val"] >= result_obj[i + 1]["val"]):
                    result_b_obj["nc"] = result_obj[i]["nc"]
                    break

        print(result_b_obj)
        # return True
        width = 0.35  # the width of the bars
        plt.bar(n_clusters_for_nodes_range, comp_avg_vect, width)
        plt.bar(result_b_obj["nc"] - width, result_b_obj["val"], width)
        plt.xlabel("number of clusters for nodes")
        plt.ylabel("average deviation from single clustering")
        plt.show()
Ejemplo n.º 9
0
class ML_Anomaly:
    def __init__(self, use_scikit=True):
        self.dc = DataClass()
        self.data = []
        self.node_data = []
        self.assignments_series = []
        self.min_final = None
        self.max_final = None
        self.files = [
            f for f in listdir("data/sensors")
            if isfile(join("data/sensors", f))
        ]
        print(self.files)
        self.n_nodes = len(self.files)
        self.n_series_disp = 10
        # self.i_run = int(self.n_nodes/2)
        self.i_run2 = 1
        # self.use_previous_cluster_data = False
        self.centroids = None
        self.final_centroids = None
        self.final_clusters = None
        self.clusters = []
        self.node_centroids = []

        self.partial_sample_index = 0
        self.use_scikit = use_scikit

    def set_lib(self, use_scikit):
        self.use_scikit = use_scikit

    def init(self):
        self.final_centroids = None
        self.centroids = None
        self.read_data()

    def read_data(self):
        """
            read data from files
            each file has the data for a measurement node
            over a time frame of n days, for every hour
        :return:
        """
        self.data = []
        self.node_data = []
        for i, f in enumerate(self.files[0:self.n_nodes]):
            # print(str(i) + ". reading: " + f)
            fdata = self.dc.read_data(join("data/sensors/", f))
            data = copy.copy(fdata)
            self.data.append(data)
            node = Constants.NODE_MODEL
            node["id"] = i
            self.node_data.append(copy.deepcopy(node))

    def run_test_3(self):
        """
        adding an anomaly at a certain point (constant additional demand)
        comparing the evolution of clusters (with partial clustering) to the normal evolution of clusters
        observing the velocity of change that gives the time until steady state error
        """

        day_start_deviation = 10
        day_end = 20
        deviation = 200
        # iterations = day_end - day_start_deviation
        iterations = day_end
        deviation_element_vect = [None] * iterations
        deviation_total_vect = [None] * iterations
        res_partial_whole_vect = [None] * iterations
        res_partial_whole_anomaly_vect = [None] * iterations

        data = copy.deepcopy(self.data[0]["series"])
        data_with_constant_anomaly = copy.deepcopy(self.data[0]["series"])

        # add constant deviation (anomaly) to the second data set
        # starting with day_start_deviation (index for the day of the start of the anomaly)
        for i, d in enumerate(range(day_start_deviation, day_end)):
            for j in range(len(data_with_constant_anomaly[d])):
                data_with_constant_anomaly[d][j] += deviation

        centroids_init = dcluster.reinit(data[0:day_start_deviation - 1], 2, 5)
        res_partial_whole, a = dcluster.k_means_clust_dynamic()

        # for i, d in enumerate(range(day_start_deviation, day_end)):
        for i, d in enumerate(range(day_end)):
            print(str(i) + "," + str(d))
            res_partial_whole_vect[i] = copy.deepcopy(
                dcluster.k_means_clust_dynamic_partial_update_whole(data[d]))

        # run clustering update for second data set with anomalies
        dcluster.reinit(data_with_constant_anomaly[0:day_start_deviation - 1],
                        2, 5, centroids_init)
        res_partial_whole_anomaly, a = dcluster.k_means_clust_dynamic()

        # for i,d in enumerate(range(day_start_deviation, day_end)):
        for i, d in enumerate(range(day_end)):
            print(str(i) + "," + str(d))
            res_partial_whole_anomaly_vect[i] = copy.deepcopy(
                dcluster.k_means_clust_dynamic_partial_update_whole(
                    data_with_constant_anomaly[d]))

        # plot the results (deviation between the 2 data sets)
        for i, d in enumerate(range(day_end)):
            total_deviation, average_deviation, deviation = get_comp(
                res_partial_whole_anomaly_vect[i], res_partial_whole_vect[i])
            print(average_deviation)
            deviation_total_vect[i] = average_deviation
            deviation_element_vect[i] = deviation

            plt.clf()
            for ts in res_partial_whole_vect[i]:
                plt.plot(ts)
            for ts in res_partial_whole_anomaly_vect[i]:
                plt.plot(ts)

            plt.gca().set_title("deviation " + str(d) +
                                ", with anomaly from day " +
                                str(day_start_deviation))
            plt.pause(0.2)

        print(deviation_total_vect)

        plt.clf()
        # plt.subplot(212)
        plt.plot(deviation_total_vect)
        # plt.ylim([-100, 100])
        plt.gca().set_title("anomaly transient effect on cluster centroids")
        plt.xlabel("time (days)")
        plt.ylabel("standard deviation")
        plt.show()