def split_data_plot(filename): plot_data = [] gps_data,timestamp = readdata.get_network_data(filename) for i in range(0,len(gps_data) - len(gps_data) % 30,30): temp = numpy.array(gps_data[i:i + 30]) temp = temp.flatten() temp = list(temp) plot_data.append(temp) return plot_data
filenames = os.listdir(path) for name in filenames: current = split_data_plot(path + '\\' + name) plot = plot + current gps_num.append(len(current)) return plot,gps_num def calculate_pca(data): p = sklearn.decomposition.pca.PCA(n_components = 5) p.fit(data) feature = p.transform(data) return feature if __name__ == '__main__': # ''' gps_data,timestamp = readdata.get_network_data('.\\ubiqlog\\log_5-21-2014.txt') # gps_data,timestamp = readdata.get_network_data('.\\log_10-31-2014.txt') print len(gps_data) # find_N_num(gps_data) # cluster_centers,labels = k_means_cluster(gps_data,n_clusters = 2) # labels = DBSCANJoint.dbscan_joint(gps_data,0.000005,30) cluster_centers,labels = science_cluster(gps_data,30,show = False) # draw_result(gps_data,labels) print cluster_centers ''' f = open('labels.txt','w') for i in range(len(labels)): f.write(str(i + 1)) f.write('\t') f.write(str(labels[i])) f.write('\n')