import os, time import numpy as np import cluster_methods as clust cancers = ["brca","coad","gbm","kirc","luad","lusc","ov","ucec"] source_path = os.getcwd()+"/Output/" source_path = "/Users/rathikannan/Documents/hm_27k_profile/Output/" for cancer in cancers: # Read probe dict from txt probe_dict = clust.read_probe_dict(source_path+cancer+"/probe_dict_dis.txt") # Convert dictionary to matrix # Rows are samples # Columns are probe IDs matrix_1 = clust.dict_to_matrix_1(dict) # Calculate hamming distance between samples hamming_1 = clust.hamming_full_csv(matrix_1,out_path+cancer+"/probe_dis_hamming_samples.txt")
for c in range(0,len(sub_cancers)): cancer = sub_cancers[c] c_file = cancers[c] print "Started: "+str(cancer) probe_dict = clust.read_probe_dict(f_path+c_file+"/"+cancer+".txt") final_ls = [None]*24981 # rows index probes # columns index samples cancer_m = clust.dict_to_matrix_2(probe_dict) for i in range(0,len(cancer_m)): final_ls[i] = clust.resolve(cancer_m[i]) all_sub[c] = final_ls # Generate hamming distances and write to file all_sub_hamm = clust.hamming_full_csv(all_sub,f_path+"all_sub_hamming.csv")