import os, time
import numpy as np
import cluster_methods as clust



cancers = ["brca","coad","gbm","kirc","luad","lusc","ov","ucec"]
source_path = os.getcwd()+"/Output/"
source_path = "/Users/rathikannan/Documents/hm_27k_profile/Output/" 

for cancer in cancers:

	# Read probe dict from txt
	probe_dict = clust.read_probe_dict(source_path+cancer+"/probe_dict_dis.txt")	
	
	# Convert dictionary to matrix
	# Rows are samples
	# Columns are probe IDs
	matrix_1 = clust.dict_to_matrix_1(dict)
	
	# Calculate hamming distance between samples
	hamming_1 = clust.hamming_full_csv(matrix_1,out_path+cancer+"/probe_dis_hamming_samples.txt")

 

		

	
	
Beispiel #2
0
for c in range(0,len(sub_cancers)):
	
	cancer = sub_cancers[c]
	
	c_file = cancers[c]
	
	print "Started: "+str(cancer)
	
	probe_dict = clust.read_probe_dict(f_path+c_file+"/"+cancer+".txt")
	
	final_ls = [None]*24981
	
	# rows index probes
	# columns index samples
	cancer_m = clust.dict_to_matrix_2(probe_dict)
	for i in range(0,len(cancer_m)):
		final_ls[i] = clust.resolve(cancer_m[i])
	
	all_sub[c] = final_ls


	
# Generate hamming distances and write to file
all_sub_hamm = clust.hamming_full_csv(all_sub,f_path+"all_sub_hamming.csv")