def write_distance_matrix_to_csv(): postgres_handle = PostgresHandle(smarttypes.connection_string) network = load_network_from_the_db(postgres_handle, 5) landmarks = get_landmarks(network) similarity_matrix = mk_similarity_matrix(network, landmarks) distance_matrix = dist_euclidean(similarity_matrix) write_similarity_matrix_to_csv(distance_matrix)
def test_metaNMDS(self): """l19 data should give stress below .13""" ptmtx = array( [ [7, 1, 0, 0, 0, 0, 0, 0, 0], [4, 2, 0, 0, 0, 1, 0, 0, 0], [2, 4, 0, 0, 0, 1, 0, 0, 0], [1, 7, 0, 0, 0, 0, 0, 0, 0], [0, 8, 0, 0, 0, 0, 0, 0, 0], [0, 7, 1, 0, 0, 0, 0, 0, 0], #idx 5 [0, 4, 2, 0, 0, 0, 2, 0, 0], [0, 2, 4, 0, 0, 0, 1, 0, 0], [0, 1, 7, 0, 0, 0, 0, 0, 0], [0, 0, 8, 0, 0, 0, 0, 0, 0], [0, 0, 7, 1, 0, 0, 0, 0, 0], #idx 10 [0, 0, 4, 2, 0, 0, 0, 3, 0], [0, 0, 2, 4, 0, 0, 0, 1, 0], [0, 0, 1, 7, 0, 0, 0, 0, 0], [0, 0, 0, 8, 0, 0, 0, 0, 0], [0, 0, 0, 7, 1, 0, 0, 0, 0], #idx 15 [0, 0, 0, 4, 2, 0, 0, 0, 4], [0, 0, 0, 2, 4, 0, 0, 0, 1], [0, 0, 0, 1, 7, 0, 0, 0, 0] ], 'float') distmtx = dist_euclidean(ptmtx) nm = metaNMDS(1, distmtx, verbosity=0) self.assertLessThan(nm.getStress(), .13)
def test_metaNMDS(self): """l19 data should give stress below .13""" ptmtx = array( [[7,1,0,0,0,0,0,0,0], [4,2,0,0,0,1,0,0,0], [2,4,0,0,0,1,0,0,0], [1,7,0,0,0,0,0,0,0], [0,8,0,0,0,0,0,0,0], [0,7,1,0,0,0,0,0,0],#idx 5 [0,4,2,0,0,0,2,0,0], [0,2,4,0,0,0,1,0,0], [0,1,7,0,0,0,0,0,0], [0,0,8,0,0,0,0,0,0], [0,0,7,1,0,0,0,0,0],#idx 10 [0,0,4,2,0,0,0,3,0], [0,0,2,4,0,0,0,1,0], [0,0,1,7,0,0,0,0,0], [0,0,0,8,0,0,0,0,0], [0,0,0,7,1,0,0,0,0],#idx 15 [0,0,0,4,2,0,0,0,4], [0,0,0,2,4,0,0,0,1], [0,0,0,1,7,0,0,0,0]], 'float') distmtx = dist_euclidean(ptmtx) nm = metaNMDS(1, distmtx, verbosity=0) self.assertLessThan(nm.getStress(), .13)
def test_3(self): """l19 data should give stress below .13 in multi-D""" ptmtx = array( [[7,1,0,0,0,0,0,0,0], [4,2,0,0,0,1,0,0,0], [2,4,0,0,0,1,0,0,0], [1,7,0,0,0,0,0,0,0], [0,8,0,0,0,0,0,0,0], [0,7,1,0,0,0,0,0,0],#idx 5 [0,4,2,0,0,0,2,0,0], [0,2,4,0,0,0,1,0,0], [0,1,7,0,0,0,0,0,0], [0,0,8,0,0,0,0,0,0], [0,0,7,1,0,0,0,0,0],#idx 10 [0,0,4,2,0,0,0,3,0], [0,0,2,4,0,0,0,1,0], [0,0,1,7,0,0,0,0,0], [0,0,0,8,0,0,0,0,0], [0,0,0,7,1,0,0,0,0],#idx 15 [0,0,0,4,2,0,0,0,4], [0,0,0,2,4,0,0,0,1], [0,0,0,1,7,0,0,0,0]], 'float') distmtx = dist_euclidean(ptmtx) for dim in range(3,18): nm = NMDS(distmtx, verbosity=0, dimension=dim) self.assertLessThan(nm.getStress(), .13)
def test_3(self): """l19 data should give stress below .13 in multi-D""" ptmtx = array( [ [7, 1, 0, 0, 0, 0, 0, 0, 0], [4, 2, 0, 0, 0, 1, 0, 0, 0], [2, 4, 0, 0, 0, 1, 0, 0, 0], [1, 7, 0, 0, 0, 0, 0, 0, 0], [0, 8, 0, 0, 0, 0, 0, 0, 0], [0, 7, 1, 0, 0, 0, 0, 0, 0], #idx 5 [0, 4, 2, 0, 0, 0, 2, 0, 0], [0, 2, 4, 0, 0, 0, 1, 0, 0], [0, 1, 7, 0, 0, 0, 0, 0, 0], [0, 0, 8, 0, 0, 0, 0, 0, 0], [0, 0, 7, 1, 0, 0, 0, 0, 0], #idx 10 [0, 0, 4, 2, 0, 0, 0, 3, 0], [0, 0, 2, 4, 0, 0, 0, 1, 0], [0, 0, 1, 7, 0, 0, 0, 0, 0], [0, 0, 0, 8, 0, 0, 0, 0, 0], [0, 0, 0, 7, 1, 0, 0, 0, 0], #idx 15 [0, 0, 0, 4, 2, 0, 0, 0, 4], [0, 0, 0, 2, 4, 0, 0, 0, 1], [0, 0, 0, 1, 7, 0, 0, 0, 0] ], 'float') distmtx = dist_euclidean(ptmtx) for dim in range(3, 18): nm = NMDS(distmtx, verbosity=0, dimension=dim) self.assertLessThan(nm.getStress(), .13)
def distance(args): json_data = open(args.matrix_file) data = json.load(json_data) json_data.close() datasets = [] for i in data['columns']: #print i['id'] datasets.append(i['id']) z = np.array(data['data']) dm = np.transpose(z) if args.metric == 'bray_curtis': distance_matrix = dt.dist_bray_curtis(dm) elif args.metric == 'morisita_horn': distance_matrix = dt.dist_morisita_horn(dm) elif args.metric == 'canberra': distance_matrix = dt.dist_canberra(dm) elif args.metric == 'chisq': distance_matrix = dt.dist_chisq(dm) elif args.metric == 'chord': distance_matrix = dt.dist_chord(dm) elif args.metric == 'euclidean': distance_matrix = dt.dist_euclidean(dm) elif args.metric == 'gower': distance_matrix = dt.dist_gower(dm) elif args.metric == 'hellinger': distance_matrix = dt.dist_hellinger(dm) elif args.metric == 'kulczynski': distance_matrix = dt.dist_kulczynski(dm) elif args.metric == 'manhattan': distance_matrix = dt.dist_manhattan(dm) elif args.metric == 'abund_jaccard': distance_matrix = dt.dist_abund_jaccard(dm) elif args.metric == 'binary_jaccard': distance_matrix = dt.binary_dist_jaccard(dm) elif args.metric == 'pearson': distance_matrix = dt.dist_pearson(dm) elif args.metric == 'soergel': distance_matrix = dt.dist_soergel(dm) elif args.metric == 'spearman': distance_matrix = dt.dist_spearman_approx(dm) else: # default distance_matrix = dt.dist_bray_curtis(dm) dist = {} for i,x in enumerate(distance_matrix): for n,d in enumerate(distance_matrix[i]): if i < n: # only needs one copy dist[ (datasets[i],datasets[n]) ] = d #np.savetxt(os.path.join(args.output_dir, args.file_prefix+'_distance.mtx'), distance_matrix) if args.to_output == 'distance': print(distance_matrix) return dist
def distance(args): json_data = open(args.matrix_file) data = json.load(json_data) json_data.close() datasets = [] for i in data['columns']: #print i['id'] datasets.append(i['id']) z = np.array(data['data']) dm = np.transpose(z) if args.metric == 'bray_curtis': distance_matrix = dt.dist_bray_curtis(dm) elif args.metric == 'morisita_horn': distance_matrix = dt.dist_morisita_horn(dm) elif args.metric == 'canberra': distance_matrix = dt.dist_canberra(dm) elif args.metric == 'chisq': distance_matrix = dt.dist_chisq(dm) elif args.metric == 'chord': distance_matrix = dt.dist_chord(dm) elif args.metric == 'euclidean': distance_matrix = dt.dist_euclidean(dm) elif args.metric == 'gower': distance_matrix = dt.dist_gower(dm) elif args.metric == 'hellinger': distance_matrix = dt.dist_hellinger(dm) elif args.metric == 'kulczynski': distance_matrix = dt.dist_kulczynski(dm) elif args.metric == 'manhattan': distance_matrix = dt.dist_manhattan(dm) elif args.metric == 'abund_jaccard': distance_matrix = dt.dist_abund_jaccard(dm) elif args.metric == 'binary_jaccard': distance_matrix = dt.binary_dist_jaccard(dm) elif args.metric == 'pearson': distance_matrix = dt.dist_pearson(dm) elif args.metric == 'soergel': distance_matrix = dt.dist_soergel(dm) elif args.metric == 'spearman': distance_matrix = dt.dist_spearman_approx(dm) else: # default distance_matrix = dt.dist_bray_curtis(dm) dist = {} for i, x in enumerate(distance_matrix): for n, d in enumerate(distance_matrix[i]): if i < n: # only needs one copy dist[(datasets[i], datasets[n])] = d #np.savetxt(os.path.join(args.output_dir, args.file_prefix+'_distance.mtx'), distance_matrix) if args.to_output == 'distance': print(distance_matrix) return dist
def calculate_distance(args): if args.file_format == 'json': try: json_data = open('./tmp/'+args.in_file) except IOError: json_data = open(args.in_file) except: print("NO FILE FOUND ERROR") sys.exit() data = json.load(json_data) json_data.close() else: # csv file # this doesn't work now with open('./tmp/'+args.in_file, 'rb') as csvfile: csv_data = csv.reader(csvfile, delimiter=',', quotechar='"') for row in csv_data: pass datasets = [] for i in data['columns']: datasets.append(i['name']) z = np.array(data['data']) dm = np.transpose(z) if args.metric == 'bray_curtis': dist = dt.dist_bray_curtis(dm) elif args.metric == 'morisita_horn': dist = dt.dist_morisita_horn(dm) elif args.metric == 'canberra': dist = dt.dist_canberra(dm) elif args.metric == 'chisq': dist = dt.dist_chisq(dm) elif args.metric == 'chord': dist = dt.dist_chord(dm) elif args.metric == 'euclidean': dist = dt.dist_euclidean(dm) elif args.metric == 'gower': dist = dt.dist_gower(dm) elif args.metric == 'hellinger': dist = dt.dist_hellinger(dm) elif args.metric == 'kulczynski': dist = dt.dist_kulczynski(dm) elif args.metric == 'manhattan': dist = dt.dist_manhattan(dm) elif args.metric == 'abund_jaccard': dist = dt.dist_abund_jaccard(dm) elif args.metric == 'binary_jaccard': dist = dt.binary_dist_jaccard(dm) elif args.metric == 'pearson': dist = dt.dist_pearson(dm) elif args.metric == 'soergel': dist = dt.dist_soergel(dm) elif args.metric == 'spearman': dist = dt.dist_spearman_approx(dm) else: # default dist = dt.dist_bray_curtis(dm) distance_matrix1 = {} distance_matrix2 = {} mat = [] out_fp = open(args.out_file,'w') file_header_line = ','.join([x['name'] for x in data['columns']]) + '\n' out_fp.write(file_header_line) for row,line in enumerate(data['columns']): name = line['name'] distance_matrix1[name] = {} file_data_line = name+',' for col,d in enumerate(dist[row]): file_data_line += str(dist[row][col])+',' distance_matrix1[name][data['columns'][col]['name']] = dist[row][col] distance_matrix2[(name, data['columns'][col]['name'])] = dist[row][col] file_data_line = file_data_line[:-1]+'\n' out_fp.write(file_data_line) out_fp.close() #if args.function == 'distance' or args.function == 'heatmap': print(json.dumps(distance_matrix1)) arr = [] for ds1 in distance_matrix1: print(ds1) tmp = [] for ds2 in distance_matrix1[ds1]: val = distance_matrix1[ds1][ds2] tmp.append(val) arr.append(tmp) #np.array(arr) linkage_matrix = linkage(arr, "single") dendrogram(linkage_matrix, color_threshold=1, show_leaf_counts=True) #image_file = '/Users/avoorhis/node_projects/vamps-node.js/public/tmp_images/'+args.prefix+'.png' image_file = 'public/tmp_images/'+args.prefix+'.png' plt.savefig(image_file)