def main_(): if len(argv) < 2: print 'The input format is\nmain train/test [tcp/b] [filename]' return 0 t=argv[1] mode='tcp' data=[] if len(argv) == 4 : mode=argv[2] if mode == 'tcp': getdata = getdata_tcp else : getdata = getdata_b if t == 'test': if len(argv) == 2: test() else: fname=argv[3] data=getdata_file(fname) data=filter_xyz(data) data14=cluster(data,14); simulate(data14); elif t == 'train': train() else: while(True): data=getdata() data=filter_xyz(data) data14=cluster(data,14) simulate(data14)
def cluster_mesh(mesh_filename, epsilon, function, output_filename): """ Performs vertex clustering on mesh. :param mesh_filename: input mesh filename :type mesh_filename: string :param epsilon: epsilon used in algorithm (see docs) :type epsilon: float :param function: representative method used in algorithm (see docs); must be one of: ["center", "mean", "median", "quadric"] :type function: string :param output_filename: output mesh filename :type output_filename: string """ if function not in _functions: raise ValueError("Function must be in: %s." % str(_functions)) epsilon = float(epsilon) if not mesh_filename.endswith('.obj') and not mesh_filename.endswith('.off'): raise ValueError("Supporting only .obj and .off files!") method = _functions_map[function] if mesh_filename.endswith('.obj'): mesh_loader = ObjLoader(mesh_filename) else: mesh_loader = OffLoader(mesh_filename) mesh = mesh_loader.to_polyhedron() cluster(mesh, epsilon, method, output_filename)
def test_cluster_count_correct(self): clusters = cluster([[1,12], [2,10], [1,8], [3,14], [2,9], [12,1], [10,2], [8,1], [14,3], [9,2]], 1) self.assertEqual(len(clusters), 1) clusters = cluster([[1,12], [2,10], [1,8], [3,14], [2,9], [12,1], [10,2], [8,1], [14,3], [9,2]], 2) self.assertEqual(len(clusters), 2) clusters = cluster([[1,12], [2,10], [1,8], [3,14], [2,9], [12,1], [10,2], [8,1], [14,3], [9,2]], 3) self.assertEqual(len(clusters), 3) clusters = cluster([[1,12], [2,10], [1,8], [3,14], [2,9], [12,1], [10,2], [8,1], [14,3], [9,2]], 4) self.assertEqual(len(clusters), 4)
def main(): """ Main function. Clusters the data and compare between clustering methods. Please note that in order to avoid a lot of figures on the screen, the next figure won't appear until the current figure is closed. :return: None """ # read and prepare the data data = data_set_preparations.prepare_data_set(3) # scale the data data = data_set_preparations.scale_the_data(data) # normalize the data data = normalize(data) # reduce dimension to 2d points = perform_pca(data) # number of real labels print('data 1 real labels', len(np.unique(fit_to_external_classification.get_real_labels(1)))) print('data 2 real labels', len(np.unique(fit_to_external_classification.get_real_labels(2)))) print('data 3 real labels', len(np.unique(fit_to_external_classification.get_real_labels(3)))) predict_nuber_of_clusters.perform_elbow_method(points, 'K means') predict_nuber_of_clusters.perform_elbow_method(points, 'Hierarchical') predict_nuber_of_clusters.perform_silhouette_method(points, 'GMM') predict_nuber_of_clusters.perform_silhouette_method(points, 'Fuzzy C Means') predict_nuber_of_clusters.perform_silhouette_method(points, 'Spectral') clustering.plot_clustering(points, clustering.cluster(points, 4, 'K means'), 'K means') clustering.plot_clustering(points, clustering.cluster(points, 4, 'GMM'), 'GMM') clustering.plot_clustering(points, clustering.cluster(points, 4, 'Fuzzy C Means'), 'Fuzzy C Means') clustering.plot_clustering(points, clustering.cluster(points, 4, 'Hierarchical'), 'Hierarchical') clustering.plot_clustering(points, clustering.cluster(points, 4, 'Spectral'), 'Spectral') # statistical tests # create a dictionary of method and nmi scores list algorithms_and_n_clusters = [['K means', 4], ['GMM', 4], ['Fuzzy C Means', 4], ['Spectral', 4]] algorithm_nmi_dictionary = {} for algorithm, n_clusters in algorithms_and_n_clusters: algorithm_nmi_dictionary[algorithm] = fit_to_external_classification.nmi_score( fit_to_external_classification.get_real_labels(3), points, n_clusters=n_clusters, method=algorithm) linkages = ['ward', 'average', 'complete', 'single'] for linkage in linkages: algorithm_nmi_dictionary['Hierarchical' + linkage] = fit_to_external_classification.nmi_score( fit_to_external_classification.get_real_labels(3), points, n_clusters=4, method='Hierarchical', linkage=linkage) print('u test') for key1 in algorithm_nmi_dictionary: for key2 in algorithm_nmi_dictionary: if key1 != key2: print('for', key1, 'and', key2, 'p value is', fit_to_external_classification.u_test(algorithm_nmi_dictionary[key1], algorithm_nmi_dictionary[key2]))
def test_cluster_count_correct(self): clusters = cluster([[1, 12], [2, 10], [1, 8], [3, 14], [2, 9], [12, 1], [10, 2], [8, 1], [14, 3], [9, 2]], 1) self.assertEqual(len(clusters), 1) clusters = cluster([[1, 12], [2, 10], [1, 8], [3, 14], [2, 9], [12, 1], [10, 2], [8, 1], [14, 3], [9, 2]], 2) self.assertEqual(len(clusters), 2) clusters = cluster([[1, 12], [2, 10], [1, 8], [3, 14], [2, 9], [12, 1], [10, 2], [8, 1], [14, 3], [9, 2]], 3) self.assertEqual(len(clusters), 3) clusters = cluster([[1, 12], [2, 10], [1, 8], [3, 14], [2, 9], [12, 1], [10, 2], [8, 1], [14, 3], [9, 2]], 4) self.assertEqual(len(clusters), 4)
def cluster(number_clusters, google_or=False): raw_inputs = {} raw_inputs["electricity"] = np.loadtxt("raw_inputs/electricity.csv") raw_inputs["dhw"] = np.loadtxt("raw_inputs/dhw.csv") raw_inputs["sh"] = np.loadtxt("raw_inputs/space_heating.csv") raw_inputs["heat"] = raw_inputs["dhw"] + raw_inputs["sh"] raw_inputs["solar_irrad"] = np.loadtxt("raw_inputs/solar_rad_35deg.csv") / 1000 raw_inputs["solar_irrad"] = np.maximum(raw_inputs["solar_irrad"], 0) raw_inputs["temperature"] = np.loadtxt("raw_inputs/temperature.csv") ############################################################################### # Clustering inputs_clustering = np.array([raw_inputs["electricity"], raw_inputs["heat"], raw_inputs["solar_irrad"]]) inputs_additional = np.array([raw_inputs["temperature"]]) clus_res = clustering.cluster(inputs_clustering, inputs_additional=inputs_additional, method="medoid", number_clusters=number_clusters, time_limit=600, mip_gap=0.0, google_or=google_or) (inputs, typ_inputs_add, nc, scaling_factors, z, times, obj, gap) = clus_res filename = ("results/res_googleOR_" + str(google_or) + "_" + str(number_clusters) + "days.pkl") with open(filename, "wb") as f_in: pickle.dump(times, f_in, pickle.HIGHEST_PROTOCOL) pickle.dump(obj, f_in, pickle.HIGHEST_PROTOCOL) pickle.dump(gap, f_in, pickle.HIGHEST_PROTOCOL)
def approximate_rank_order_clustering(vectors): """ Cluster the input vectors. """ clusters = cluster(vectors, n_neighbors=200, thresh=[1.8, 1.9, 2, 2.1, 2.2]) return clusters
def cluster(self): print('Finding distances and paths....') distances = floyd_warshall.find_distances(self.graph) print('Finding clusters of edges....') clusters = clustering.cluster(self.graph, self.k, distances) return clusters
def main(): def validFile(filename): return splitext(filename)[1] == '.txt' if not os.path.isdir(raw_data_path): raise "%s is not a folder" % raw_data_path examples = [example_from_file(raw_data_path + '/' + filename) for filename in os.listdir(raw_data_path) if validFile(filename)] # classifier = tree_classifier(examples) # filename = '/tmp/digits_classifier.joblib.pkl' # _ = joblib.dump(classifier, filename, compress=9) # # print classifier for e in examples: print cluster(e.feature_vectors)
def demessify(): file_names, vectors = get_file_embeddings() yhat = cluster(DBSCAN, np.stack(vectors)) print("Sorted files.") folders = create_folder(cluster_indices, yhat, file_names) named_folders = get_folder_name(folders) write_folders(named_folders) print("Done. Created", len([1 for f in named_folders.values() if len(f)]), "folders.")
def sync(): for key, date in storage.unprocessed(): try: logging.info(f"Processing data for {date}") df = storage.get_dataset(key) logging.info(f"Running clustering...") cluster = clustering.cluster(df) logging.info(f"Writing results...") storage.write(cluster, date) except Exception: logging.exception(f"Failed to process data for {date}")
def main(): identifiers = [] dreams = [] ldreams = [] with open('dreams.txt', encoding='ascii', errors='ignore') as dreamstxt: for i, line in enumerate(dreamstxt): if i % 3 == 0: identifiers.append(line[:-1]) elif (i - 1) % 3 == 0: dreams.append(line) lemmatize(identifiers, dreams) with open('lemmatized.txt', encoding='ascii') as lemmatized: for i, line in enumerate(lemmatized): if (i - 1) % 3 == 0: ldreams.append(line) data = cluster(identifiers, ldreams) result_path = os.path.join(os.getcwd(), 'clusters') if not os.path.exists(result_path): os.mkdir(result_path) else: #clear any previously stored files for root, dirs, files in os.walk(result_path, topdown=False): for name in files: os.remove(os.path.join(root, name)) os.chdir(result_path) for label in range(data.cluster_count): defining_features = [] for i, f_count in enumerate(data.feature_counts[label]): if f_count >= data.min_samples - 1: defining_features.append(i) if len(defining_features) > 20: defining_features = argsort(data.feature_counts[label])[-20:][::-1] common_words = list(map(lambda x: data.vocab[x], defining_features)) samples = list( map(lambda x: identifiers[x], data.sample_indeces[label])) print(label) print("Common features:\n", common_words) print("Dreams:\n", samples) with open(str(label) + '.txt', 'w') as results: results.write("Common features:\n") for word in common_words: results.write(word + "\n") results.write("\nDreams:\n") sample_dreams = list( map(lambda x: dreams[x], data.sample_indeces[label])) for i in range(len(samples)): results.write(samples[i] + "\n" + sample_dreams[i] + "\n\n")
def test_clusters_correct(self): clusters = cluster([[1,12], [2,10], [1,8], [3,14], [2,9], [12,1], [10,2], [8,1], [14,3], [9,2]], 2) self.assertTrue('1-12' in clusters[0]) self.assertTrue('2-10' in clusters[0]) self.assertTrue('1-8' in clusters[0]) self.assertTrue('3-14' in clusters[0]) self.assertTrue('2-9' in clusters[0]) self.assertTrue('12-1' in clusters[1]) self.assertTrue('10-2' in clusters[1]) self.assertTrue('8-1' in clusters[1]) self.assertTrue('14-3' in clusters[1]) self.assertTrue('9-2' in clusters[1])
def test_clusters_correct(self): clusters = cluster([[1, 12], [2, 10], [1, 8], [3, 14], [2, 9], [12, 1], [10, 2], [8, 1], [14, 3], [9, 2]], 2) self.assertTrue('1-12' in clusters[0]) self.assertTrue('2-10' in clusters[0]) self.assertTrue('1-8' in clusters[0]) self.assertTrue('3-14' in clusters[0]) self.assertTrue('2-9' in clusters[0]) self.assertTrue('12-1' in clusters[1]) self.assertTrue('10-2' in clusters[1]) self.assertTrue('8-1' in clusters[1]) self.assertTrue('14-3' in clusters[1]) self.assertTrue('9-2' in clusters[1])
def get_train_data(): from os import listdir,chdir,remove,rename from filtering import filter_xyz from clustering import cluster table=shape_dict() in_put=[] target=[] chdir('./Shapes') for i in listdir('.'): try: data=getdata_file(i) data=filter_xyz(data) data14=cluster(data,14); in_put.append(data14) target.append(table[i[0]]) except: print '\t\t\tERROR in file ',i,'removing ',i remove(i) pass chdir('..') return in_put,target
Event = namedtuple("Event", next(csvin)) for line in csvin: event = Event._make(line) events.append(event) inp.seek(0) next(inp) # LIFO - important for learning the correct sequences! events = reversed(events) # Cluster the events: if CLUSTERING: from clustering import cluster, scipy_cluster events, minLatitudes, maxLatitudes, minLongitudes, maxLongitudes = cluster(events) if PREDICT: import PREDICTmodel_params as model_params else: import TESTmodel_params as model_params if LOAD: model = ModelFactory.loadFromCheckpoint(MODELSTATE) else: model = ModelFactory.create(model_params.MODEL_PARAMS) if VISUALIZE: Patcher().patchCLAModel(model) model.enableInference({"predictedField": "event"}) # Predict not only event but also scalar! TODO # model.enableInference({"predictedField": "scalar"}) # model.enableInference({"predictedField": "timestamp"}) print "Model created!\n"
precision, recall, f1 = calculate_quality(_OUTPUT_PATTERN % metric, _REFERENCE_FILENAME) print("Metric %s:" % metric) print("\tPrecision: %f, recall: %f, f1: %f" % (precision, recall, f1)) except: pass time2 = time() print("Run for %f s." % (time2 - time1)) else: metric_txt = action metric = dice_metric if action == 'dice' else cosine_metric if action == 'cosine' else lcs_metric print("Preprocessing data...") print("Input: %s" % _INPUT_FILENAME) counter = 0 preprocessed = {} result = {} with open(_INPUT_FILENAME) as input: for line in input: preprocessed_line = process(line) preprocessed[line] = preprocessed_line if _DEBUG and counter % 50 == 0: print("%s => %s" % (line, preprocessed_line)) counter += 1 print("Clustering...") clusters = cluster(preprocessed, metric, _THRESHOLDS[metric_txt], _DEBUG) for line, preprocessed_line in preprocessed.items(): result[line] = clusters[preprocessed_line] print("Writing result...") write_result(result, _OUTPUT_PATTERN % metric_txt) time2 = time() print("Run for %f s." % (time2 - time1))
def test(): with tf.Graph().as_default(): with tf.device('/gpu:' + str(GPU_INDEX)): pointclouds_pl, labels_pl, sem_labels_pl = placeholder_inputs( BATCH_SIZE, NUM_POINT) is_training_pl = tf.placeholder(tf.bool, shape=()) # Get model pred_sem, pred_ins = get_model(pointclouds_pl, is_training_pl, NUM_CLASSES) pred_sem_softmax = tf.nn.softmax(pred_sem) pred_sem_label = tf.argmax(pred_sem_softmax, axis=2) loader = tf.train.Saver() # Create a session config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True config.log_device_placement = False sess = tf.Session(config=config) is_training = False # Restore variables from disk. loader.restore(sess, MODEL_PATH) logger.info("Model restored from {}".format(MODEL_PATH)) ops = { 'pointclouds_pl': pointclouds_pl, 'labels_pl': labels_pl, 'sem_labels_pl': sem_labels_pl, 'is_training_pl': is_training_pl, 'pred_ins': pred_ins, 'pred_sem_label': pred_sem_label, 'pred_sem_softmax': pred_sem_softmax } total_acc = 0.0 total_seen = 0 output_filelist_f = os.path.join(LOG_DIR, 'output_filelist.txt') fout_out_filelist = [] for shape_idx in range(len_pts_files): room_path = ROOM_PATH_LIST[shape_idx] out_data_label_filename = os.path.basename( room_path)[:-EXT_LEN] + '_pred.txt' out_data_label_filename = os.path.join(OUTPUT_DIR, out_data_label_filename) out_gt_label_filename = os.path.basename( room_path)[:-EXT_LEN] + '_gt.txt' out_gt_label_filename = os.path.join(OUTPUT_DIR, out_gt_label_filename) fout_data_label = [] fout_gt_label = [] fout_out_filelist.append(out_data_label_filename + '\n') logger.info('%d / %d ...' % (shape_idx, len_pts_files)) logger.info('Loading file ' + room_path) size_path = room_path if FILE_TYPE == 'hdf5': size_path = size_path.replace('indoor3d_ins_seg_hdf5', 'stanford_indoor3d_ins.sem') size_path = "{}.npy".format(size_path[:-3]) cur_data, cur_group, _, cur_sem = \ provider.loadDataFile_with_groupseglabel_stanfordindoor(room_path) elif FILE_TYPE == 'numpy': cur_data, cur_sem, cur_group = \ indoor3d_util.room2blocks_wrapper_normalized(room_path, NUM_POINT, block_size=1.0, stride=0.5, random_sample=False, sample_num=None) cur_data = cur_data[:, 0:NUM_POINT, :] cur_sem = np.squeeze(cur_sem) cur_group = np.squeeze(cur_group) # Get room dimension.. data_label = np.load(size_path) data = data_label[:, 0:6] max_room_x = max(data[:, 0]) max_room_y = max(data[:, 1]) max_room_z = max(data[:, 2]) cur_pred_sem = np.zeros_like(cur_sem) cur_pred_sem_softmax = np.zeros( [cur_sem.shape[0], cur_sem.shape[1], NUM_CLASSES]) group_output = np.zeros_like(cur_group) gap = 5e-3 volume_num = int(1. / gap) + 1 volume = -1 * np.ones([volume_num, volume_num, volume_num]).astype( np.int32) volume_seg = -1 * np.ones([volume_num, volume_num, volume_num ]).astype(np.int32) num_data = cur_data.shape[0] for j in range(num_data): logger.info("Processsing: Shape [%d] Block[%d]" % (shape_idx, j)) pts = cur_data[j, ...] group = cur_group[j] sem = cur_sem[j] feed_dict = { ops['pointclouds_pl']: np.expand_dims(pts, 0), ops['labels_pl']: np.expand_dims(group, 0), ops['sem_labels_pl']: np.expand_dims(sem, 0), ops['is_training_pl']: is_training } pred_ins_val, pred_sem_label_val, pred_sem_softmax_val = sess.run( [ ops['pred_ins'], ops['pred_sem_label'], ops['pred_sem_softmax'] ], feed_dict=feed_dict) pred_val = np.squeeze(pred_ins_val, axis=0) pred_sem = np.squeeze(pred_sem_label_val, axis=0) pred_sem_softmax = np.squeeze(pred_sem_softmax_val, axis=0) cur_pred_sem[j, :] = pred_sem cur_pred_sem_softmax[j, ...] = pred_sem_softmax # cluster group_seg = {} bandwidth = BANDWIDTH num_clusters, labels, cluster_centers = cluster( pred_val, bandwidth) for idx_cluster in range(num_clusters): tmp = (labels == idx_cluster) estimated_seg = int(stats.mode(pred_sem[tmp])[0]) group_seg[idx_cluster] = estimated_seg groupids_block = labels groupids = BlockMerging(volume, volume_seg, pts[:, 6:], groupids_block.astype(np.int32), group_seg, gap) group_output[j, :] = groupids total_acc += float(np.sum(pred_sem == sem)) / pred_sem.shape[0] total_seen += 1 group_pred = group_output.reshape(-1) seg_pred = cur_pred_sem.reshape(-1) seg_pred_softmax = cur_pred_sem_softmax.reshape([-1, NUM_CLASSES]) pts = cur_data.reshape([-1, 9]) # filtering x = (pts[:, 6] / gap).astype(np.int32) y = (pts[:, 7] / gap).astype(np.int32) z = (pts[:, 8] / gap).astype(np.int32) for i in range(group_pred.shape[0]): if volume[x[i], y[i], z[i]] != -1: group_pred[i] = volume[x[i], y[i], z[i]] seg_gt = cur_sem.reshape(-1) un = np.unique(group_pred) pts_in_pred = [[] for itmp in range(NUM_CLASSES)] group_pred_final = -1 * np.ones_like(group_pred) grouppred_cnt = 0 for ig, g in enumerate(un): # each object in prediction if g == -1: continue tmp = (group_pred == g) sem_seg_g = int(stats.mode(seg_pred[tmp])[0]) # if np.sum(tmp) > 500: if np.sum(tmp) > 0.25 * mean_num_pts_in_group[sem_seg_g]: group_pred_final[tmp] = grouppred_cnt pts_in_pred[sem_seg_g] += [tmp] grouppred_cnt += 1 pts[:, 6] *= max_room_x pts[:, 7] *= max_room_y pts[:, 8] *= max_room_z pts[:, 3:6] *= 255.0 ins = group_pred_final.astype(np.int32) sem = seg_pred.astype(np.int32) sem_softmax = seg_pred_softmax sem_gt = seg_gt ins_gt = cur_group.reshape(-1) for i in range(pts.shape[0]): fout_data_label.append( '%f %f %f %d %d %d %f %d %d\n' % (pts[i, 6], pts[i, 7], pts[i, 8], pts[i, 3], pts[i, 4], pts[i, 5], sem_softmax[i, sem[i]], sem[i], ins[i])) fout_gt_label.append('%d %d\n' % (sem_gt[i], ins_gt[i])) with open(out_data_label_filename, 'w') as fd: fd.writelines(fout_data_label) with open(out_gt_label_filename, 'w') as fd: fd.writelines(fout_gt_label) if output_verbose: # file name outfile_name = ROOM_PATH_LIST[shape_idx].split( '/')[-1][:-EXT_LEN] # Raw Point Cloud output_point_cloud_rgb( pts[:, 6:], pts[:, 3:6].astype(np.int32), os.path.join(VIS_DIR, '{}_raw.obj'.format(outfile_name))) logger.info('Saving file {}_raw.obj'.format(outfile_name)) # Instance Prediction output_color_point_cloud( pts[:, 6:], group_pred_final.astype(np.int32), os.path.join(VIS_DIR, '{}_pred_ins.obj'.format(outfile_name))) logger.info('Saving file {}_pred_ins.obj'.format(outfile_name)) # Semantic Prediction output_color_point_cloud( pts[:, 6:], seg_pred.astype(np.int32), os.path.join(VIS_DIR, '{}_pred_sem.obj'.format(outfile_name))) logger.info('Saving file {}_pred_sem.obj'.format(outfile_name)) # Instance Ground Truth output_color_point_cloud( pts[:, 6:], ins_gt, os.path.join(VIS_DIR, '{}_gt_ins.obj'.format(outfile_name))) logger.info('Saving file {}_gt_ins.obj'.format(outfile_name)) # Semantic Ground Truth output_color_point_cloud( pts[:, 6:], sem_gt, os.path.join(VIS_DIR, '{}_gt_sem.obj'.format(outfile_name))) logger.info('Saving file {}_gt_sem.obj'.format(outfile_name)) with open(output_filelist_f, 'w') as fd: fd.writelines(fout_out_filelist)