Exemple #1
0
    def calc(self):
        load = ld.load_data()

        data = load.csv('busi_yyt.csv')

        dict_data = dict(data)

        starttime = 1522080000
        endtime = 1523030340

        for i in range(starttime, endtime, 60):
            tmptime = []
            tmpdata = []
            tmpvalue = []

            j = i
            for j in range(i, i + 60 * 29, 60):
                tmpdata.append((j, dict_data[j]))
                tmptime.append(j)
                tmpvalue.append(dict_data[j])

            anomalous, ensemble, datatime, datapoint = skyline_algorithms.run_selected_algorithm(
                tmpdata, 'test')

            time_local = time.localtime(datatime)

            if anomalous == True:

                data1 = []
                data2 = []
                data3 = []

                data_new = []
                '''
                for t in tmptime:
                    data1.append((t - 86400 * 1,dict_data[t - 86400 * 1]))
                    data2.append((t - 86400 * 2,dict_data[t - 86400 * 2]))
                    data3.append((t - 86400 * 3,dict_data[t - 86400 * 3]))
                    data_new.append((t - 86400 * 1,(dict_data[t - 86400 * 1] + dict_data[t - 86400 * 2] + dict_data[t - 86400 * 1])/3))
                
                e1 = euctsd_euclidean_metrictE(data1,data2)
                e2 = euctsd_euclidean_metrictE(data2,data3)
                e3 = euctsd_euclidean_metrictE(data1,data3)
                '''

                v_mean = 0
                for t in range(1, 10):
                    v_mean += dict_data[tmpdata[-1][0] - 86400 * t]

                v_mean = v_mean / 10
                print(v_mean)
                if tmpdata[-1][1] < v_mean * 0.7:
                    print(
                        str(ensemble) + ':' +
                        str(time.strftime("%Y-%m-%d %H:%M:%S", time_local)) +
                        ':' + str(datapoint))
                '''
Exemple #2
0
    def calc(self):
        load = ld.load_data()
        
        data = load.csv('busi_yyt.csv')
        
        for i in range(data.__len__() - 30):
            tmpdata = []
            j=i
            
            for j in range(i+30):
                tmpdata.append(data[j])
                                
            anomalous, ensemble, datatime, datapoint = skyline_algorithms.run_selected_algorithm(tmpdata, 'test')
            
            time_local = time.localtime(datatime)

            #if time_local[3] >= 9 and time_local[3] < 18 and anomalous == True:
                                
            #    print(str(ensemble) + ':' + str(time.strftime("%Y-%m-%d %H:%M:%S",time_local)) + ':' +str(datapoint))

            print(str(ensemble) + ':' + str(time.strftime("%Y-%m-%d %H:%M:%S",time_local)) + ':' +str(datapoint))
    def calc(self):
        load = ld.load_data()

        data = []

        load = load.csv('busi_1.csv')

        for key in sorted(load):
            data.append((key, load[key]))

        cnt = data.__len__()

        i = 0
        while i <= data.__len__() - 60:
            tmpdata = []
            j = i
            while j < i + 60:
                tmpdata.append(data[j])
                j += 1

            anomalous, ensemble, datatime, datapoint = skyline_algorithms.run_selected_algorithm(
                tmpdata, 'test')
            '''
            if anomalous:
                
                time_local = time.localtime(datatime)
                
                print(str(ensemble) + ':' + str(time.strftime("%Y-%m-%d %H:%M:%S",time_local)) + ':' +str(datapoint))
            '''

            time_local = time.localtime(datatime)

            print(
                str(ensemble) + ':' +
                str(time.strftime("%Y-%m-%d %H:%M:%S", time_local)) + ':' +
                str(datapoint))

            i += 1
Exemple #4
0
    def calc(self):
        load = ld.load_data()
        
        data = load.csv('busi_yyt.csv')
        
        data_dict = dict(data)        
                
        starttime = 1522598400
        endtime = starttime + 60 * 60 * 24
        
        data_value = []
        data_time = []
           
        data_ano_x = []
        data_ano_y = []
        
        data_ano_x_new = []
        data_ano_y_new = []
        
        for i in range(starttime + 30 * 60, endtime, 60):
            data_value.append(data_dict[i])
            data_time.append(float((i - starttime)) / 3600)
            
            tmpdata = []
            tmpvalue = []
            
            for j in range(i - 30 * 60, i, 60):
                tmpdata.append((j, data_dict[j]))
                tmpvalue.append(data_dict[j])
        
            anomalous, ensemble, datatime, datapoint = skyline_algorithms.run_selected_algorithm(tmpdata, 'test')

            print(str(datatime) + ':' + str(ensemble))

            if anomalous:
                # 异动点
                data_ano_x.append(float((i - starttime)) / 3600)
                data_ano_y.append(datapoint)

                data_filter = anomaly_filter(datatime, data)

                if data_filter:
                    data_ano_x_new.append(float((i - starttime)) / 3600)
                    data_ano_y_new.append(datapoint)                    
                
        #####

        plt.subplot(3, 1, 1)
        plt.plot(data_time, data_value, linewidth=0.5)
        
        plt.scatter(data_ano_x, data_ano_y, color='green', s=5)
        plt.scatter(data_ano_x_new, data_ano_y_new, color='red', s=5)        

        plt.ylabel('异动判断')
        
        my_x_ticks = np.arange(0, 24, 1)
        plt.xticks(my_x_ticks)
        
        '''  
        #####

        plt.subplot(3,1,2)
        plt.plot(data_time,lastday_data_minus,linewidth=0.5)
        
        plt.ylabel('滑动平均数')
        
        my_x_ticks = np.arange(0,24,1)
        plt.xticks(my_x_ticks)
      
        #####

        plt.subplot(3,1,3)
        plt.plot(data_time,risedata_list,linewidth=0.5)
        
        plt.ylabel('增长')
        
        my_x_ticks = np.arange(0,24,1)
        plt.xticks(my_x_ticks)
        '''            

        plt.show()
Exemple #5
0
    def calc(self):
        load = ld.load_data()

        data = load.csv('busi_yyt.csv')

        data_dict = dict(data)

        starttime = 1522598400
        endtime = starttime + 60 * 60 * 24

        #数据集
        data_value = []
        data_time = []

        #异动坐标
        data_ano_x = []
        data_ano_y = []

        #斜率数据集
        data_gradient = []
        #data_coef = []

        #斜率异动坐标
        data_xielv_ano_x = []
        data_xielv_ano_y = []

        for i in range(starttime, endtime - 60, 60):

            data_value.append(data_dict[i])
            data_time.append(float((i - starttime)) / 3600)

            tmpdata = []
            tmpvalue = []

            j = i

            for j in range(i, i + 60 * 60, 60):
                tmpdata.append((j, data_dict[j]))
                tmpvalue.append(data_dict[j])

            anomalous, ensemble, datatime, datapoint = skyline_algorithms.run_selected_algorithm(
                tmpdata, 'test')

            #time_local = time.localtime(datatime)

            #coef,intercept = LinearRegression.LinearRegression(tmpdata)
            #data_coef.append(coef)

            v_gradient = tsd_gradient.getGradient(tmpdata)
            data_gradient.append(v_gradient)

            np_data = np.array(data_gradient)

            min_max_scaler = preprocessing.MinMaxScaler()
            data_xielv_new = min_max_scaler.fit_transform(
                np_data.reshape(-1, 1))

            #print(v_xielv_mean)

            if anomalous:
                data_ano_x.append(float((datatime - starttime)) / 3600)
                data_ano_y.append(datapoint)

                if data_xielv_new[-1] < 0.3 or data_xielv_new[-1] > 0.7:

                    data_xielv_ano_x.append(
                        float((datatime - starttime)) / 3600)
                    data_xielv_ano_y.append(datapoint)

        plt.subplot(3, 1, 1)
        plt.plot(data_time, data_value, linewidth=0.5)

        plt.scatter(data_ano_x, data_ano_y, color='green', s=5)
        plt.scatter(data_xielv_ano_x, data_xielv_ano_y, color='red', s=5)

        plt.ylabel('异动判断')

        my_x_ticks = np.arange(0, 24, 1)
        plt.xticks(my_x_ticks)
        '''
        plt.subplot(3,1,2)
        
        plt.ylabel('通过线性回归计算的斜率')        
        
        plt.plot(data_time,data_coef,linewidth=0.5)
                
        my_x_ticks = np.arange(0,24,1)
        plt.xticks(my_x_ticks)
        '''

        plt.subplot(3, 1, 3)

        plt.ylabel('通过首末点计算的斜率')

        plt.plot(data_time, data_xielv_new, linewidth=0.5)

        my_x_ticks = np.arange(0, 24, 1)
        plt.xticks(my_x_ticks)

        plt.show()
Exemple #6
0
    def calc(self):
        load = ld.load_data()
        
        data = load.csv('busi_yyt.csv')
        
        data_dict = dict(data)
        
        datamean = np.mean(data_dict.values())
        
                
        starttime = 1522166400
        endtime = starttime + 60 * 60 * 24
        
        data_value = []
        data_time = []
           
        data_ano_x = []
        data_ano_y = []
        
        data_coef = []
        coef_time = []
        
        data_ano_x_new = []
        data_ano_y_new = []
        
        meandata_list = []
        risedata_list = []        
        
        data_gradient = []
        
        for i in range(starttime + 60 * 60,endtime,60):
            data_value.append(data_dict[i])
            data_time.append(float((i-starttime))/3600)
            
            tmpdata = []
            tmpvalue = []
            
            for j in range(i - 60 * 60,i,60):
                tmpdata.append((j,data_dict[j]))
                tmpvalue.append(data_dict[j])
        
            anomalous, ensemble, datatime, datapoint = skyline_algorithms.run_selected_algorithm(tmpdata, 'test')

            meandata = []

            for j in range(i - 10 * 60,i,60):
                meandata.append(data_dict[j])              

            meandata_list.append(np.mean(meandata))
            
            if len(meandata_list)>1:
                risedata_list.append(meandata_list[-1] - meandata_list[-2])
            else:
                risedata_list.append(0)
        #####

        plt.subplot(3,1,1)
        plt.plot(data_time,data_value,linewidth=0.5)
        
        plt.scatter(data_ano_x,data_ano_y,color='green',s=5)
        #plt.scatter(data_ano_x_new,data_ano_y_new,color='red',s=5)        
        
        plt.ylabel('异动判断')
        
        my_x_ticks = np.arange(0,24,1)
        plt.xticks(my_x_ticks)
        
        #####

        plt.subplot(3,1,2)
        plt.plot(data_time,meandata_list,linewidth=0.5)
        
        plt.ylabel('滑动平均数')
        
        my_x_ticks = np.arange(0,24,1)
        plt.xticks(my_x_ticks)
        
        #####

        plt.subplot(3,1,3)
        plt.plot(data_time,risedata_list,linewidth=0.5)
        
        plt.ylabel('增长')
        
        my_x_ticks = np.arange(0,24,1)
        plt.xticks(my_x_ticks)             

        plt.show()
    def calc(self):
        load = ld.load_data()

        data = load.csv('busi_yyt.csv')

        data_dict = dict(data)

        starttime = 1522080000
        endtime = starttime + 60 * 60 * 24

        data_value = []
        data_time = []

        data1_ano_x = []
        data1_ano_y = []

        data2_ano_x = []
        data2_ano_y = []

        data3_ano_x = []
        data3_ano_y = []

        data4_ano_x = []
        data4_ano_y = []

        for i in range(starttime + 60 * 60, endtime, 60):
            data_value.append(data_dict[i])
            data_time.append(float((i - starttime)) / 3600)

            tmpdata = []
            tmpvalue = []

            for j in range(i - 30 * 60, i, 60):
                tmpdata.append((j, data_dict[j]))
                tmpvalue.append(data_dict[j])

            print(tmpdata[-1])
            print(data_dict[i])

            anomalous1 = median_absolute_deviation(tmpdata)

            print(anomalous1)

            if anomalous1:
                # 异动点
                data1_ano_x.append(float((i - 60 - starttime)) / 3600)
                data1_ano_y.append(data_dict[i - 60])

            anomalous2 = stddev_from_average(tmpdata)

            if anomalous2:
                # 异动点
                data2_ano_x.append(float((i - 60 - starttime)) / 3600)
                data2_ano_y.append(data_dict[i - 60])

            anomalous3 = stddev_from_moving_average(tmpdata)

            if anomalous3:
                # 异动点
                data3_ano_x.append(float((i - 60 - starttime)) / 3600)
                data3_ano_y.append(data_dict[i - 60])

            anomalous4 = mean_subtraction_cumulation(tmpdata)

            if anomalous4:
                # 异动点
                data4_ano_x.append(float((i - 60 - starttime)) / 3600)
                data4_ano_y.append(data_dict[i - 60])

        #####

        plt.subplot(4, 1, 1)
        plt.plot(data_time, data_value, linewidth=0.5)

        plt.scatter(data1_ano_x, data1_ano_y, color='green', s=5)

        plt.ylabel('median_absolute_deviation')

        my_x_ticks = np.arange(0, 24, 1)
        plt.xticks(my_x_ticks)

        plt.subplot(4, 1, 2)
        plt.plot(data_time, data_value, linewidth=0.5)

        plt.scatter(data2_ano_x, data2_ano_y, color='green', s=5)

        plt.ylabel('stddev_from_average')

        my_x_ticks = np.arange(0, 24, 1)
        plt.xticks(my_x_ticks)

        plt.subplot(4, 1, 3)
        plt.plot(data_time, data_value, linewidth=0.5)

        plt.scatter(data3_ano_x, data3_ano_y, color='green', s=5)

        plt.ylabel('stddev_from_moving_average')

        my_x_ticks = np.arange(0, 24, 1)
        plt.xticks(my_x_ticks)

        plt.subplot(4, 1, 4)
        plt.plot(data_time, data_value, linewidth=0.5)

        plt.scatter(data4_ano_x, data4_ano_y, color='green', s=5)

        plt.ylabel('mean_subtraction_cumulation')

        my_x_ticks = np.arange(0, 24, 1)
        plt.xticks(my_x_ticks)
        '''  
        #####

        plt.subplot(3,1,2)
        plt.plot(data_time,lastday_data_minus,linewidth=0.5)
        
        plt.ylabel('滑动平均数')
        
        my_x_ticks = np.arange(0,24,1)
        plt.xticks(my_x_ticks)
      
        #####

        plt.subplot(3,1,3)
        plt.plot(data_time,risedata_list,linewidth=0.5)
        
        plt.ylabel('增长')
        
        my_x_ticks = np.arange(0,24,1)
        plt.xticks(my_x_ticks)
        '''

        plt.show()
Exemple #8
0
    def IsolationForest(self):
                
        load = ld.load_data()
        
        data1 = load.csv('busi_qd.csv')
        
        data1_new = []
        data2_new = []

        for d in data1:
            data1_new.append(d[0])
            
        for d in data1:
            data2_new.append(d[1])            

        '''
        datakey = {}
        
        index = 0
        
        for key in data1.keys():
            
            tmplist = []
            
            if data1.has_key(key):
                tmplist.append(float(data1.get(key)))
            else:
                tmplist.append(None)
                
            if data2.has_key(key):
                tmplist.append(float(data2.get(key)))
            else:
                tmplist.append(None)
                
            if data3.has_key(key):
                tmplist.append(float(data3.get(key)))
            else:
                tmplist.append(None)
                
            data.append(tmplist)
            
            index += 1
            
            datakey[index] = key
            
        '''
        
        data = zip(data1_new,data2_new)
        
        
        array = np.array(data)         
        
        clf = IsolationForest() #contamination=0.9
        clf.fit(array)
        y_pred_train = clf.predict(array)
        
        anomaly_point_x = []
        anomaly_point_y = []
        #anomaly_point_z = []
        normal_point_x = []
        normal_point_y = []
        #normal_point_z = []
        
        i=0
        
        while i< 24 * 60:
            
            if y_pred_train[i]==-1:
                anomaly_point_x.append(array[i][0])
                anomaly_point_y.append(array[i][1])    
                #anomaly_point_z.append(array[i][2])
                #print(datakey.get(i))   
            else:
                normal_point_x.append(array[i][0])
                normal_point_y.append(array[i][1])
                #normal_point_z.append(array[i][2])
                
            i+=1
        
        #fig = plt.figure()
        #ax = plt3d.Axes3D(fig)
        
        #ax.scatter(anomaly_point_x, anomaly_point_y, anomaly_point_z, c='red')
        #ax.scatter(normal_point_x, normal_point_y, normal_point_z, c='green')
        
        
        plt.scatter(anomaly_point_x, anomaly_point_y, c='red')
        plt.scatter(normal_point_x, normal_point_y, c='green')

        plt.show()
    def algorithms_test(self):
        
        
        load = ld.load_data()
        
        data1 = load.csv('data_3.csv')
        data2 = load.csv('data_2.csv')
        
        data1_new = []
        data2_new = []

        '''
        for d in data1:
            data1_new.append(d[1])
            
        for d in data2:
            data2_new.append(d[1])            
        '''
        
        for d in data1:
            data1_new.append(d[0])
            data2_new.append(d[1])         
        
        data = zip(data1_new, data2_new)
        
        print(data)
        
        array = np.array(data)   
        '''
        rng = np.random.RandomState(42000)

        array = 0.3 * rng.randn(1000, 2)
        '''
        
        classifiers = {"One-Class SVM": svm.OneClassSVM(),"Robust covariance": EllipticEnvelope(),"Isolation Forest": IsolationForest(),"Local Outlier Factor": LocalOutlierFactor()}
        plt.figure(figsize=(150,150))

        for n, (clf_name, clf) in enumerate(classifiers.items()):
            
            if clf_name == "Local Outlier Factor":
                y_pred = clf.fit_predict(array)
            else:
                clf.fit(array)
                y_pred = clf.predict(array)
            
            anomaly_point_x = []
            anomaly_point_y = []
            normal_point_x = []
            normal_point_y = []
            
            i = 0
            
            while i < 60:
                
                if y_pred[i] == -1:
                    anomaly_point_x.append(array[i][0])
                    anomaly_point_y.append(array[i][1])    
                    # anomaly_point_z.append(array[i][2])
                    # print(datakey.get(i))   
                else:
                    normal_point_x.append(array[i][0])
                    normal_point_y.append(array[i][1])
                    # normal_point_z.append(array[i][2])
                    
                i += 1
            
            plt.subplot(2,2,n + 1)
            plt.title(clf_name)
            
            plt.scatter(anomaly_point_x, anomaly_point_y, c='red')
            plt.scatter(normal_point_x, normal_point_y, c='green')
            
        plt.show()
    def calc(self):
        load = ld.load_data()

        data = load.csv('busi_yyt.csv')

        data_dict = dict(data)

        starttime = 1522166400
        endtime = starttime + 60 * 60 * 24

        data_value = []
        data_time = []

        data_ano_x = []
        data_ano_y = []

        data_coef = []
        coef_time = []

        data_ano_x_new = []
        data_ano_y_new = []

        data_gradient = []

        for i in range(starttime + 60 * 60, endtime, 60):
            data_value.append(data_dict[i])
            data_time.append(float((i - starttime)) / 3600)

            tmpdata = []
            tmpvalue = []

            for j in range(i - 60 * 60, i, 60):
                tmpdata.append((j, data_dict[j]))
                tmpvalue.append(data_dict[j])

            anomalous, ensemble, datatime, datapoint = skyline_algorithms.run_selected_algorithm(
                tmpdata, 'test')

            coef, intercept = tsd_his_gradient.getGradient(datatime, data_dict)
            gradient = tsd_gradient_recent.getGradientRecent(tmpdata)

            print('coef:' + str(coef))
            print('gradient:' + str(gradient))

            if anomalous:
                data_ano_x.append(float((datatime - starttime)) / 3600)
                data_ano_y.append(datapoint)

                if abs(gradient) > abs(coef) * 1.3 or abs(
                        gradient) < abs(coef) * 0.7:
                    data_ano_x_new.append(float((datatime - starttime)) / 3600)
                    data_ano_y_new.append(datapoint)

            data_coef.append(coef)

            data_gradient.append(gradient)
        '''
        data_coef_new = tsd_scaler.getScaler(data_coef)
        data_gradient_new = tsd_scaler.getScaler(data_gradient)
        

        coef_dict = dict(zip(data_time,data_coef_new))
        gradient_dict = dict(zip(data_time,data_gradient_new))  
        
        for x in data_time:
            if coef_dict[x] >= 0.7 or coef_dict[x] <= 0.3:
                if
                data_ano_x_new.append(x)
                data_ano_y_new.append(data_dict[starttime + x[0] * 3600])
        '''
        #####

        plt.subplot(3, 1, 1)
        plt.plot(data_time, data_value, linewidth=0.5)

        plt.scatter(data_ano_x, data_ano_y, color='green', s=5)
        #plt.scatter(data_ano_x_new,data_ano_y_new,color='red',s=5)

        plt.ylabel('异动判断')

        my_x_ticks = np.arange(0, 24, 1)
        plt.xticks(my_x_ticks)

        #####

        plt.subplot(3, 1, 2)
        plt.plot(data_time, data_coef, linewidth=0.5)

        plt.ylabel('前一日斜率')

        my_x_ticks = np.arange(0, 24, 1)
        plt.xticks(my_x_ticks)

        #####

        plt.subplot(3, 1, 3)
        plt.plot(data_time, data_gradient, linewidth=0.5)

        plt.ylabel('当前斜率')

        my_x_ticks = np.arange(0, 24, 1)
        plt.xticks(my_x_ticks)

        plt.show()
Exemple #11
0
    def calc(self):
        load = ld.load_data()
        
        data = load.csv('busi_yyt.csv')
        
        data_dict = dict(data)
        
        datamean = np.mean(data_dict.values())
        
                
        starttime = 1522166400
        endtime = starttime + 60 * 60 * 24
        
        data_value = []
        data_time = []
           
        data_ano_x = []
        data_ano_y = []
        
        data_coef = []
        coef_time = []
        
        data_ano_x_new = []
        data_ano_y_new = []
        
        data_gradient = []      
        
        for i in range(starttime + 60 * 60,endtime,60):
            data_value.append(data_dict[i])
            data_time.append(float((i-starttime))/3600)
            
            tmpdata = []
            tmpvalue = []
            
            for j in range(i - 60 * 60,i,60):
                tmpdata.append((j,data_dict[j]))
                tmpvalue.append(data_dict[j])
        
            anomalous, ensemble, datatime, datapoint = skyline_algorithms.run_selected_algorithm(tmpdata, 'test')

      
            if anomalous:
                data_ano_x.append(float((datatime-starttime))/3600)
                data_ano_y.append(datapoint)
                
                data_ano_point = (float((datatime-starttime))/3600,datapoint)
                
                tmptime = datatime - 86400
                
                tmpdata_front = []
                tmpdata_back = []
                
                
                for i in range(1,6,1):
                    tmpdata_front.append((tmptime + i * 5 * 60,data_dict[tmptime + i * 5 * 60]))
                    tmpdata_back.append((tmptime - i * 5 * 60,data_dict[tmptime - i * 5 * 60]))
                    
                print(tmpdata_front)
                print(tmpdata_back)

                result_front = isgradiented(data_ano_point,tmpdata_front,datamean,'front')
                result_back = isgradiented(data_ano_point,tmpdata_back,datamean,'back') 
                
                print('front:'+str(result_front))
                print('back:'+str(result_back))          

        '''
        data_coef_new = tsd_scaler.getScaler(data_coef)
        data_gradient_new = tsd_scaler.getScaler(data_gradient)
        

        coef_dict = dict(zip(data_time,data_coef_new))
        gradient_dict = dict(zip(data_time,data_gradient_new))  
        
        for x in data_time:
            if coef_dict[x] >= 0.7 or coef_dict[x] <= 0.3:
                if
                data_ano_x_new.append(x)
                data_ano_y_new.append(data_dict[starttime + x[0] * 3600])
        '''
        #####

        plt.subplot(3,1,1)
        plt.plot(data_time,data_value,linewidth=0.5)
        
        plt.scatter(data_ano_x,data_ano_y,color='green',s=5)
        #plt.scatter(data_ano_x_new,data_ano_y_new,color='red',s=5)        
        
        plt.ylabel('异动判断')
        
        my_x_ticks = np.arange(0,24,1)
        plt.xticks(my_x_ticks)
        
        #####

        plt.subplot(3,1,2)
        plt.plot(data_time,data_coef,linewidth=0.5)
        
        plt.ylabel('前一日斜率')
        
        my_x_ticks = np.arange(0,24,1)
        plt.xticks(my_x_ticks)
        
        #####
        
        plt.subplot(3,1,3)
        plt.plot(data_time,data_gradient,linewidth=0.5)
        
        plt.ylabel('当前斜率')
        
        my_x_ticks = np.arange(0,24,1)
        plt.xticks(my_x_ticks)             

        plt.show()
    def calc(self):
        load = ld.load_data()

        data = load.csv('busi_qs.csv')

        data_dict = dict(data)

        starttime = 1522857600
        endtime = starttime + 60 * 60 * 24

        data_value = []
        data_time = []

        data1_ano_x = []
        data1_ano_y = []

        data2_ano_x = []
        data2_ano_y = []

        data2_ano_x_line = []
        data2_ano_y_line = []

        data3_ano_x = []
        data3_ano_y = []

        data3_ano_x_line = []
        data3_ano_y_line = []

        ano_count = 0
        ano_count_new = 0

        for i in range(starttime + 30 * 60, endtime, 60):
            data_value.append(data_dict[i])
            data_time.append(float((i - starttime)) / 3600)

            tmpdata = []
            tmpvalue = []

            for j in range(i - 30 * 60, i + 60, 60):
                tmpdata.append((j, data_dict[j]))
                tmpvalue.append(data_dict[j])

            anomalous = median_absolute_deviation_down(tmpdata)

            anomalous1 = median_absolute_deviation(tmpdata)

            anomalous2, ensemble, datatime, datapoint = run_selected_algorithm(
                tmpdata, 'test')

            print('ensemble:' + str(ensemble))
            time_local = time.localtime(i)
            print('datatime:' +
                  str(time.strftime("%Y-%m-%d %H:%M:%S", time_local)))
            print('datapoint:' + str(data_dict[i]))

            if anomalous:
                ano_count += 1
                data1_ano_x.append(float((i - starttime)) / 3600)
                data1_ano_y.append(data_dict[i])
                '''
                # 异动点
                if time_local[3] >= 7 and time_local[3] < 23:
                    data_filter = anomaly_filter(i, data)
                    if data_filter:
                        #if upanddown(tmpdata,1):
                            ano_count_new+=1
                            data2_ano_x.append(float((i - starttime)) / 3600)
                            data2_ano_y.append(data_dict[i])
                '''

        #####

        print(ano_count)
        print(ano_count_new)

        plt.subplot(3, 1, 1)
        plt.plot(data_time, data_value, linewidth=0.5)

        #plt.scatter(data1_ano_x, data1_ano_y, color='green', s=5)
        plt.scatter(data1_ano_x, data1_ano_y, color='red', s=5)

        plt.ylabel('仅下降')

        my_x_ticks = np.arange(0, 24, 1)
        plt.xticks(my_x_ticks)

        plt.subplot(3, 1, 2)
        plt.plot(data_time, data_value, linewidth=0.5)
        plt.plot(data2_ano_x_line,
                 data2_ano_y_line,
                 linewidth=0.5,
                 color='red')

        #plt.scatter(data1_ano_x, data1_ano_y, color='green', s=5)
        plt.scatter(data2_ano_x, data2_ano_y, color='red', s=5)

        plt.ylabel('全部异动点,带判断线')

        my_x_ticks = np.arange(0, 24, 1)
        plt.xticks(my_x_ticks)

        plt.subplot(3, 1, 3)
        plt.plot(data_time, data_value, linewidth=0.5)
        plt.plot(data3_ano_x_line,
                 data3_ano_y_line,
                 linewidth=0.5,
                 color='red')

        #plt.scatter(data1_ano_x, data1_ano_y, color='green', s=5)
        plt.scatter(data3_ano_x, data3_ano_y, color='red', s=5)

        plt.ylabel('ks_test')

        my_x_ticks = np.arange(0, 24, 1)
        plt.xticks(my_x_ticks)

        plt.show()
Exemple #13
0
    def calc(self):
        load = ld.load_data()

        data = load.csv('busi_yyt.csv')

        data_dict = dict(data)

        starttime = 1522166400
        endtime = starttime + 60 * 60 * 24

        starttime1 = starttime - 60 * 60 * 24 * 1
        endtime1 = endtime - 60 * 60 * 24 * 1

        starttime2 = starttime - 60 * 60 * 24 * 2
        endtime2 = endtime - 60 * 60 * 24 * 2

        starttime3 = starttime - 60 * 60 * 24 * 3
        endtime3 = endtime - 60 * 60 * 24 * 3

        starttime4 = starttime - 60 * 60 * 24 * 4
        endtime4 = endtime - 60 * 60 * 24 * 4

        starttime5 = starttime - 60 * 60 * 24 * 5
        endtime5 = endtime - 60 * 60 * 24 * 5

        data1 = []
        data2 = []
        data3 = []
        data4 = []
        data5 = []

        for i in range(starttime1, endtime1, 60):
            data1.append((i, data_dict[i]))

        for i in range(starttime2, endtime2, 60):
            data2.append((i, data_dict[i]))

        for i in range(starttime3, endtime3, 60):
            data3.append((i, data_dict[i]))

        for i in range(starttime4, endtime4, 60):
            data4.append((i, data_dict[i]))

        for i in range(starttime5, endtime5, 60):
            data5.append((i, data_dict[i]))

        data_list = [data1, data2]

        data_value = []
        data_time = []

        data_mv_value = []
        data_mv_time = []

        data1_ano_x = []
        data1_ano_y = []

        for i in range(starttime + 30 * 60, endtime, 60):

            data_value.append(data_dict[i])
            data_time.append(float((i - starttime)) / 3600)

            tmpdata = []
            tmpvalue = []

            for j in range(i - 30 * 60, i + 60, 60):
                tmpdata.append((j, data_dict[j]))
                tmpvalue.append(data_dict[j])

            anomalous, mv_value = history_movingaverage(tmpdata, data_list)

            #anomalous, ensemble, datatime, datapoint = run_selected_algorithm(tmpdata,'test')

            data_mv_value.append(mv_value)
            data_mv_time.append(float((i - starttime)) / 3600)

            if anomalous:
                data1_ano_x.append(float((i - starttime)) / 3600)
                data1_ano_y.append(data_dict[i])
                '''
                # 异动点
                if time_local[3] >= 7 and time_local[3] < 23:
                    data_filter = anomaly_filter(i, data)
                    if data_filter:
                        #if upanddown(tmpdata,1):
                            ano_count_new+=1
                            data2_ano_x.append(float((i - starttime)) / 3600)
                            data2_ano_y.append(data_dict[i])
                '''

        #####

        plt.subplot(3, 1, 1)
        plt.plot(data_time, data_value, linewidth=0.5)
        plt.plot(data_mv_time, data_mv_value, linewidth=0.5)

        #plt.scatter(data1_ano_x, data1_ano_y, color='green', s=5)
        #plt.scatter(data1_ano_x, data1_ano_y, color='red', s=5)

        plt.ylabel('4月1日-处理前')

        my_x_ticks = np.arange(0, 24, 1)
        plt.xticks(my_x_ticks)

        plt.show()
    def calc(self):
        load = ld.load_data()
        
        data = load.csv('busi_yyt.csv')
        
        data_dict = dict(data)
                
        starttime = 1522252800
        endtime = starttime + 60 * 60 * 24
        
        data_value = []
        data_time = []
           
        data_ano_x = []
        data_ano_y = []
        
        data_up = []
        data_down = []
        
        
        for i in range(starttime,endtime,60):
            data_value.append(data_dict[i])
            data_time.append(float((i-starttime))/3600)
            
            tmpdata = []
            tmpvalue = []
            
            j=i
            
            for j in range(i,i + 60 * 29,60):
                tmpdata.append((j,data_dict[j]))
                tmpvalue.append(data_dict[j])
        
            anomalous, ensemble, datatime, datapoint = skyline_algorithms.run_selected_algorithm(tmpdata, 'test')

            time_local = time.localtime(datatime)

            v_mean = 0
            for t in range(1,10):
                v_mean += data_dict[tmpdata[-1][0] - 86400 * t]
                #print(str(datatime) + ':' +str(data_dict[tmpdata[-1][0] - 86400 * t]))
            
            v_mean = v_mean/10
            
            data_up.append(v_mean * 1.3)
            data_down.append(v_mean * 0.7)            
                        
            if anomalous:
                data_ano_x.append(float((datatime-starttime))/3600)
                data_ano_y.append(datapoint)

        plt.subplot(3,1,1)
        plt.plot(data_time,data_value,linewidth=0.5)
        plt.plot(data_time,data_up,linewidth=0.5)
        plt.plot(data_time,data_down,linewidth=0.5)
        
        
        plt.scatter(data_ano_x,data_ano_y,color='green',s=5)
        
        plt.ylabel('异动判断')
        
        
        my_x_ticks = np.arange(0,24,1)
        plt.xticks(my_x_ticks)
        
        plt.show()