Example #1
0
def write_distance_matrix_to_csv():
  postgres_handle = PostgresHandle(smarttypes.connection_string)
  network = load_network_from_the_db(postgres_handle, 5)
  landmarks = get_landmarks(network)
  similarity_matrix = mk_similarity_matrix(network, landmarks)
  distance_matrix = dist_euclidean(similarity_matrix)
  write_similarity_matrix_to_csv(distance_matrix)
Example #2
0
 def test_metaNMDS(self):
     """l19 data should give stress below .13"""
     ptmtx = array(
         [
             [7, 1, 0, 0, 0, 0, 0, 0, 0],
             [4, 2, 0, 0, 0, 1, 0, 0, 0],
             [2, 4, 0, 0, 0, 1, 0, 0, 0],
             [1, 7, 0, 0, 0, 0, 0, 0, 0],
             [0, 8, 0, 0, 0, 0, 0, 0, 0],
             [0, 7, 1, 0, 0, 0, 0, 0, 0],  #idx 5
             [0, 4, 2, 0, 0, 0, 2, 0, 0],
             [0, 2, 4, 0, 0, 0, 1, 0, 0],
             [0, 1, 7, 0, 0, 0, 0, 0, 0],
             [0, 0, 8, 0, 0, 0, 0, 0, 0],
             [0, 0, 7, 1, 0, 0, 0, 0, 0],  #idx 10
             [0, 0, 4, 2, 0, 0, 0, 3, 0],
             [0, 0, 2, 4, 0, 0, 0, 1, 0],
             [0, 0, 1, 7, 0, 0, 0, 0, 0],
             [0, 0, 0, 8, 0, 0, 0, 0, 0],
             [0, 0, 0, 7, 1, 0, 0, 0, 0],  #idx 15
             [0, 0, 0, 4, 2, 0, 0, 0, 4],
             [0, 0, 0, 2, 4, 0, 0, 0, 1],
             [0, 0, 0, 1, 7, 0, 0, 0, 0]
         ],
         'float')
     distmtx = dist_euclidean(ptmtx)
     nm = metaNMDS(1, distmtx, verbosity=0)
     self.assertLessThan(nm.getStress(), .13)
Example #3
0
def write_distance_matrix_to_csv():
    postgres_handle = PostgresHandle(smarttypes.connection_string)
    network = load_network_from_the_db(postgres_handle, 5)
    landmarks = get_landmarks(network)
    similarity_matrix = mk_similarity_matrix(network, landmarks)
    distance_matrix = dist_euclidean(similarity_matrix)
    write_similarity_matrix_to_csv(distance_matrix)
Example #4
0
 def test_metaNMDS(self):
     """l19 data should give stress below .13"""
     ptmtx = array(
         [[7,1,0,0,0,0,0,0,0],
         [4,2,0,0,0,1,0,0,0],
         [2,4,0,0,0,1,0,0,0],
         [1,7,0,0,0,0,0,0,0],
         [0,8,0,0,0,0,0,0,0],
         [0,7,1,0,0,0,0,0,0],#idx 5
         [0,4,2,0,0,0,2,0,0],
         [0,2,4,0,0,0,1,0,0],
         [0,1,7,0,0,0,0,0,0],
         [0,0,8,0,0,0,0,0,0],
         [0,0,7,1,0,0,0,0,0],#idx 10
         [0,0,4,2,0,0,0,3,0],
         [0,0,2,4,0,0,0,1,0],
         [0,0,1,7,0,0,0,0,0],
         [0,0,0,8,0,0,0,0,0],
         [0,0,0,7,1,0,0,0,0],#idx 15
         [0,0,0,4,2,0,0,0,4],
         [0,0,0,2,4,0,0,0,1],
         [0,0,0,1,7,0,0,0,0]], 'float')
     distmtx = dist_euclidean(ptmtx)
     nm = metaNMDS(1, distmtx, verbosity=0)
     self.assertLessThan(nm.getStress(), .13)
Example #5
0
 def test_3(self):
     """l19 data should give stress below .13 in multi-D"""
     ptmtx = array(
         [[7,1,0,0,0,0,0,0,0],
         [4,2,0,0,0,1,0,0,0],
         [2,4,0,0,0,1,0,0,0],
         [1,7,0,0,0,0,0,0,0],
         [0,8,0,0,0,0,0,0,0],
         [0,7,1,0,0,0,0,0,0],#idx 5
         [0,4,2,0,0,0,2,0,0],
         [0,2,4,0,0,0,1,0,0],
         [0,1,7,0,0,0,0,0,0],
         [0,0,8,0,0,0,0,0,0],
         [0,0,7,1,0,0,0,0,0],#idx 10
         [0,0,4,2,0,0,0,3,0],
         [0,0,2,4,0,0,0,1,0],
         [0,0,1,7,0,0,0,0,0],
         [0,0,0,8,0,0,0,0,0],
         [0,0,0,7,1,0,0,0,0],#idx 15
         [0,0,0,4,2,0,0,0,4],
         [0,0,0,2,4,0,0,0,1],
         [0,0,0,1,7,0,0,0,0]], 'float')
     distmtx = dist_euclidean(ptmtx)
     for dim in range(3,18):
         nm = NMDS(distmtx, verbosity=0, dimension=dim)
         self.assertLessThan(nm.getStress(), .13)
Example #6
0
 def test_3(self):
     """l19 data should give stress below .13 in multi-D"""
     ptmtx = array(
         [
             [7, 1, 0, 0, 0, 0, 0, 0, 0],
             [4, 2, 0, 0, 0, 1, 0, 0, 0],
             [2, 4, 0, 0, 0, 1, 0, 0, 0],
             [1, 7, 0, 0, 0, 0, 0, 0, 0],
             [0, 8, 0, 0, 0, 0, 0, 0, 0],
             [0, 7, 1, 0, 0, 0, 0, 0, 0],  #idx 5
             [0, 4, 2, 0, 0, 0, 2, 0, 0],
             [0, 2, 4, 0, 0, 0, 1, 0, 0],
             [0, 1, 7, 0, 0, 0, 0, 0, 0],
             [0, 0, 8, 0, 0, 0, 0, 0, 0],
             [0, 0, 7, 1, 0, 0, 0, 0, 0],  #idx 10
             [0, 0, 4, 2, 0, 0, 0, 3, 0],
             [0, 0, 2, 4, 0, 0, 0, 1, 0],
             [0, 0, 1, 7, 0, 0, 0, 0, 0],
             [0, 0, 0, 8, 0, 0, 0, 0, 0],
             [0, 0, 0, 7, 1, 0, 0, 0, 0],  #idx 15
             [0, 0, 0, 4, 2, 0, 0, 0, 4],
             [0, 0, 0, 2, 4, 0, 0, 0, 1],
             [0, 0, 0, 1, 7, 0, 0, 0, 0]
         ],
         'float')
     distmtx = dist_euclidean(ptmtx)
     for dim in range(3, 18):
         nm = NMDS(distmtx, verbosity=0, dimension=dim)
         self.assertLessThan(nm.getStress(), .13)
def distance(args):


	json_data = open(args.matrix_file)
	data = json.load(json_data)
	json_data.close()

	datasets = []
	for i in data['columns']:
		#print i['id']
		datasets.append(i['id'])

	z = np.array(data['data'])
	dm = np.transpose(z)

	if args.metric == 'bray_curtis':
		distance_matrix = dt.dist_bray_curtis(dm)
	elif args.metric == 'morisita_horn':
		distance_matrix = dt.dist_morisita_horn(dm)
	elif args.metric == 'canberra':
		distance_matrix = dt.dist_canberra(dm)
	elif args.metric == 'chisq':
		distance_matrix = dt.dist_chisq(dm)
	elif args.metric == 'chord':
		distance_matrix = dt.dist_chord(dm)
	elif args.metric == 'euclidean':
		distance_matrix = dt.dist_euclidean(dm)
	elif args.metric == 'gower':
		distance_matrix = dt.dist_gower(dm)
	elif args.metric == 'hellinger':
		distance_matrix = dt.dist_hellinger(dm)
	elif args.metric == 'kulczynski':
		distance_matrix = dt.dist_kulczynski(dm)
	elif args.metric == 'manhattan':
		distance_matrix = dt.dist_manhattan(dm)
	elif args.metric == 'abund_jaccard':
		distance_matrix = dt.dist_abund_jaccard(dm)
	elif args.metric == 'binary_jaccard':
		distance_matrix = dt.binary_dist_jaccard(dm)
	elif args.metric == 'pearson':
		distance_matrix = dt.dist_pearson(dm)
	elif args.metric == 'soergel':
		distance_matrix = dt.dist_soergel(dm)
	elif args.metric == 'spearman':
		distance_matrix = dt.dist_spearman_approx(dm)
	else:  # default
		distance_matrix = dt.dist_bray_curtis(dm)


	dist = {}
	for i,x in enumerate(distance_matrix):
		for n,d in enumerate(distance_matrix[i]):
			if i < n: # only needs one copy
					dist[ (datasets[i],datasets[n]) ] = d

	#np.savetxt(os.path.join(args.output_dir, args.file_prefix+'_distance.mtx'), distance_matrix)
	if args.to_output == 'distance':
		print(distance_matrix)
	return dist
def distance(args):

    json_data = open(args.matrix_file)
    data = json.load(json_data)
    json_data.close()

    datasets = []
    for i in data['columns']:
        #print i['id']
        datasets.append(i['id'])

    z = np.array(data['data'])
    dm = np.transpose(z)

    if args.metric == 'bray_curtis':
        distance_matrix = dt.dist_bray_curtis(dm)
    elif args.metric == 'morisita_horn':
        distance_matrix = dt.dist_morisita_horn(dm)
    elif args.metric == 'canberra':
        distance_matrix = dt.dist_canberra(dm)
    elif args.metric == 'chisq':
        distance_matrix = dt.dist_chisq(dm)
    elif args.metric == 'chord':
        distance_matrix = dt.dist_chord(dm)
    elif args.metric == 'euclidean':
        distance_matrix = dt.dist_euclidean(dm)
    elif args.metric == 'gower':
        distance_matrix = dt.dist_gower(dm)
    elif args.metric == 'hellinger':
        distance_matrix = dt.dist_hellinger(dm)
    elif args.metric == 'kulczynski':
        distance_matrix = dt.dist_kulczynski(dm)
    elif args.metric == 'manhattan':
        distance_matrix = dt.dist_manhattan(dm)
    elif args.metric == 'abund_jaccard':
        distance_matrix = dt.dist_abund_jaccard(dm)
    elif args.metric == 'binary_jaccard':
        distance_matrix = dt.binary_dist_jaccard(dm)
    elif args.metric == 'pearson':
        distance_matrix = dt.dist_pearson(dm)
    elif args.metric == 'soergel':
        distance_matrix = dt.dist_soergel(dm)
    elif args.metric == 'spearman':
        distance_matrix = dt.dist_spearman_approx(dm)
    else:  # default
        distance_matrix = dt.dist_bray_curtis(dm)

    dist = {}
    for i, x in enumerate(distance_matrix):
        for n, d in enumerate(distance_matrix[i]):
            if i < n:  # only needs one copy
                dist[(datasets[i], datasets[n])] = d

    #np.savetxt(os.path.join(args.output_dir, args.file_prefix+'_distance.mtx'), distance_matrix)
    if args.to_output == 'distance':
        print(distance_matrix)
    return dist
Example #9
0
def calculate_distance(args):

	if args.file_format == 'json':
		try:
			json_data = open('./tmp/'+args.in_file)
		except IOError:
			json_data = open(args.in_file)
		except:
			print("NO FILE FOUND ERROR")
			sys.exit()

		data = json.load(json_data)
		json_data.close()
	else: # csv file
		# this doesn't work now
		with open('./tmp/'+args.in_file, 'rb') as csvfile:
			csv_data = csv.reader(csvfile, delimiter=',', quotechar='"')
			for row in csv_data:
				pass

	datasets = []
	for i in data['columns']:

		datasets.append(i['name'])

	z = np.array(data['data'])
	dm = np.transpose(z)

	if args.metric == 'bray_curtis':
		dist = dt.dist_bray_curtis(dm)
	elif args.metric == 'morisita_horn':
		dist = dt.dist_morisita_horn(dm)
	elif args.metric == 'canberra':
		dist = dt.dist_canberra(dm)
	elif args.metric == 'chisq':
		dist = dt.dist_chisq(dm)
	elif args.metric == 'chord':
		dist = dt.dist_chord(dm)
	elif args.metric == 'euclidean':
		dist = dt.dist_euclidean(dm)
	elif args.metric == 'gower':
		dist = dt.dist_gower(dm)
	elif args.metric == 'hellinger':
		dist = dt.dist_hellinger(dm)
	elif args.metric == 'kulczynski':
		dist = dt.dist_kulczynski(dm)
	elif args.metric == 'manhattan':
		dist = dt.dist_manhattan(dm)
	elif args.metric == 'abund_jaccard':
		dist = dt.dist_abund_jaccard(dm)
	elif args.metric == 'binary_jaccard':
		dist = dt.binary_dist_jaccard(dm)
	elif args.metric == 'pearson':
		dist = dt.dist_pearson(dm)
	elif args.metric == 'soergel':
		dist = dt.dist_soergel(dm)
	elif args.metric == 'spearman':
		dist = dt.dist_spearman_approx(dm)
	else:  # default
		dist = dt.dist_bray_curtis(dm)

	distance_matrix1 = {}
	distance_matrix2 = {}
	mat = []
	out_fp = open(args.out_file,'w')

	file_header_line = ','.join([x['name'] for x in data['columns']]) + '\n'

	out_fp.write(file_header_line)


	for row,line in enumerate(data['columns']):
		name = line['name']
		distance_matrix1[name] = {}
		file_data_line = name+','
		for col,d in enumerate(dist[row]):
			file_data_line += str(dist[row][col])+','
			distance_matrix1[name][data['columns'][col]['name']]  = dist[row][col]
			distance_matrix2[(name, data['columns'][col]['name'])]  = dist[row][col]
		file_data_line = file_data_line[:-1]+'\n'
		out_fp.write(file_data_line)

	out_fp.close()
	#if args.function == 'distance' or args.function == 'heatmap':
	print(json.dumps(distance_matrix1))

	arr = []
	for ds1 in distance_matrix1:
		print(ds1)
		tmp = []
		for ds2 in distance_matrix1[ds1]:
			val = distance_matrix1[ds1][ds2]
			tmp.append(val)
		arr.append(tmp)
	#np.array(arr)

	linkage_matrix = linkage(arr,  "single")
	dendrogram(linkage_matrix,           color_threshold=1,                show_leaf_counts=True)
	#image_file = '/Users/avoorhis/node_projects/vamps-node.js/public/tmp_images/'+args.prefix+'.png'
	image_file = 'public/tmp_images/'+args.prefix+'.png'
	plt.savefig(image_file)