def get_dist(mtx, biom_data): cols = [i['id'] for i in biom_data['columns']] if args.create_splits: dtvar_jc = dt.binary_dist_jaccard(mtx, strict=False) dtvar_bc = dt.dist_bray_curtis(mtx, strict=False) dtvar_mh = dt.dist_morisita_horn(mtx, strict=False) dtvar_cb = dt.dist_canberra(mtx, strict=False) dtvar_kz = dt.dist_kulczynski(mtx, strict=False) # there are 10 combos np_jc_l = np.tril(dtvar_jc) np_bc_u = np.triu(dtvar_bc) np_mh_u = np.triu(dtvar_mh) np_mh_l = np.tril(dtvar_mh) np_cb_u = np.triu(dtvar_cb) np_cb_l = np.tril(dtvar_cb) np_kz_u = np.triu(dtvar_kz) np_kz_l = np.tril(dtvar_kz) matrices = { 'jc_kz': np_jc_l + np_kz_u, 'jc_cb': np_jc_l + np_cb_u, 'jc_mh': np_jc_l + np_mh_u, 'jc_bc': np_jc_l + np_bc_u, 'kz_cb': np_kz_l + np_cb_u, 'kz_mh': np_kz_l + np_mh_u, 'kz_bc': np_kz_l + np_bc_u, 'cb_mh': np_cb_l + np_mh_u, 'cb_bc': np_cb_l + np_bc_u, 'mh_bc': np_mh_l + np_bc_u } #print(matrices['cb_mh']) # convention: first(left) is lower left on heatmap for spl in matrices: df = pd.DataFrame(data=matrices[spl], index=cols, columns=cols) #dict = df.to_dict() out_file = os.path.join(args.basedir, args.prefix + '_distance_' + spl + '.tsv') df.to_csv(out_file, sep='\t', encoding='utf-8') #print(dict) #sys.exit() return {} else: if args.metric == 'bray_curtis': dtvar = dt.dist_bray_curtis(mtx, strict=False) elif args.metric == 'morisita_horn': dtvar = dt.dist_morisita_horn(mtx, strict=False) elif args.metric == 'canberra': dtvar = dt.dist_canberra(mtx, strict=False) elif args.metric == 'jaccard': dtvar = dt.binary_dist_jaccard(mtx, strict=False) elif args.metric == 'kulczynski': dtvar = dt.dist_kulczynski(mtx, strict=False) else: # default dtvar = dt.dist_bray_curtis(mtx, strict=False) #sys.exit() dist = distance.squareform(dtvar) return dist
def test_dist_kulczynski(self): """tests dist_kulczynski tests inputs of empty mtx, zeros, and mtx1 compared with calcs done by hand""" assert_allclose(dist_kulczynski(self.zeromtx), zeros((4, 4) * 1, "d")) mtx1expected = array( [ [0, 1.0 - 1.0 / 2.0 * (3.0 / 4.0 + 3.0 / 23.1)], [1.0 - 1.0 / 2.0 * (3.0 / 4.0 + 3.0 / 23.1), 0], ], "d", ) assert_allclose(dist_kulczynski(self.mtx1), mtx1expected)
def get_dist(metric, mtx): if metric == 'bray_curtis': dtvar = dt.dist_bray_curtis(mtx, strict=False) elif metric == 'morisita_horn': dtvar = dt.dist_morisita_horn(mtx, strict=False) elif metric == 'canberra': dtvar = dt.dist_canberra(mtx, strict=False) elif metric == 'jaccard': dtvar = dt.binary_dist_jaccard(mtx, strict=False) elif metric == 'kulczynski': dtvar = dt.dist_kulczynski(mtx, strict=False) else: # default dtvar = dt.dist_bray_curtis(mtx, strict=False) dist = distance.squareform(dtvar) return dist
def get_dist(metric, mtx): if metric == 'bray_curtis': dtvar = dt.dist_bray_curtis(mtx, strict=False) elif metric == 'morisita_horn': dtvar = dt.dist_morisita_horn(mtx, strict=False) elif metric == 'canberra': dtvar = dt.dist_canberra(mtx, strict=False) elif metric == 'jaccard': dtvar = dt.binary_dist_jaccard(mtx, strict=False) elif metric == 'kulczynski': dtvar = dt.dist_kulczynski(mtx, strict=False) else: # default dtvar = dt.dist_bray_curtis(mtx, strict=False) dist = distance.squareform( dtvar ) return dist
def get_dist(mtx, biom_data): cols = [ i['id'] for i in biom_data['columns']] if args.create_splits: dtvar_jc = dt.binary_dist_jaccard(mtx, strict=False) dtvar_bc = dt.dist_bray_curtis(mtx, strict=False) dtvar_mh = dt.dist_morisita_horn(mtx, strict=False) dtvar_cb = dt.dist_canberra(mtx, strict=False) dtvar_kz = dt.dist_kulczynski(mtx, strict=False) # there are 10 combos np_jc_l = np.tril(dtvar_jc) np_bc_u = np.triu(dtvar_bc) np_mh_u = np.triu(dtvar_mh) np_mh_l = np.tril(dtvar_mh) np_cb_u = np.triu(dtvar_cb) np_cb_l = np.tril(dtvar_cb) np_kz_u = np.triu(dtvar_kz) np_kz_l = np.tril(dtvar_kz) matrices = {'jc_kz':np_jc_l + np_kz_u,'jc_cb':np_jc_l + np_cb_u,'jc_mh':np_jc_l + np_mh_u,'jc_bc':np_jc_l + np_bc_u ,'kz_cb':np_kz_l + np_cb_u,'kz_mh':np_kz_l + np_mh_u,'kz_bc':np_kz_l + np_bc_u,'cb_mh':np_cb_l + np_mh_u,'cb_bc':np_cb_l + np_bc_u,'mh_bc':np_mh_l + np_bc_u} #print(matrices['cb_mh']) # convention: first(left) is lower left on heatmap for spl in matrices: df = pd.DataFrame(data=matrices[spl],index=cols,columns=cols) #dict = df.to_dict() out_file = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_'+spl+'.tsv') df.to_csv(out_file, sep='\t', encoding='utf-8') #out_file = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_'+spl+'.json') #out_fp = open(out_file,'w') #out_fp.write(json.dumps(dict)) #out_fp.close() # dtvar = np_jc_l + np_cb_u # df = pd.DataFrame(data=dtvar,index=cols,columns=cols) # out_file = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_jc_cb.tsv') # df.to_csv(out_file, sep='\t', encoding='utf-8') # # # dtvar = np_jc_l + np_mh_u # df = pd.DataFrame(data=dtvar,index=cols,columns=cols) # out_file = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_jc_mh.tsv') # df.to_csv(out_file, sep='\t', encoding='utf-8') # # # dtvar = np_jc_l + np_bc_u # df = pd.DataFrame(data=dtvar,index=cols,columns=cols) # out_file = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_jc_bc.tsv') # df.to_csv(out_file, sep='\t', encoding='utf-8') # # # dtvar = np_kz_l + np_cb_u # df = pd.DataFrame(data=dtvar,index=cols,columns=cols) # out_file = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_kz_cb.tsv') # df.to_csv(out_file, sep='\t', encoding='utf-8') # # # dtvar = np_kz_l + np_mh_u # df = pd.DataFrame(data=dtvar,index=cols,columns=cols) # out_file = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_kz_mh.tsv') # df.to_csv(out_file, sep='\t', encoding='utf-8') # # # dtvar = np_kz_l + np_bc_u # df = pd.DataFrame(data=dtvar,index=cols,columns=cols) # out_file = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_kz_bc.tsv') # df.to_csv(out_file, sep='\t', encoding='utf-8') # # # dtvar = np_cb_l + np_mh_u # df = pd.DataFrame(data=dtvar,index=cols,columns=cols) # out_file = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_cb_mh.tsv') # df.to_csv(out_file, sep='\t', encoding='utf-8') # # # dtvar = np_cb_l + np_bc_u # df = pd.DataFrame(data=dtvar,index=cols,columns=cols) # out_file = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_cb_bc.tsv') # df.to_csv(out_file, sep='\t', encoding='utf-8') # # # dtvar = np_mh_l + np_bc_u # df = pd.DataFrame(data=dtvar,index=cols,columns=cols) # dict = df.to_dict() #df.to_csv(out_file, sep='\t', encoding='utf-8') #print(dict) #sys.exit() return {} else: if args.metric == 'bray_curtis': dtvar = dt.dist_bray_curtis(mtx, strict=False) elif args.metric == 'morisita_horn': dtvar = dt.dist_morisita_horn(mtx, strict=False) elif args.metric == 'canberra': dtvar = dt.dist_canberra(mtx, strict=False) elif args.metric == 'jaccard': dtvar = dt.binary_dist_jaccard(mtx, strict=False) elif args.metric == 'kulczynski': dtvar = dt.dist_kulczynski(mtx, strict=False) else: # default dtvar = dt.dist_bray_curtis(mtx, strict=False) #sys.exit() dist = distance.squareform( dtvar ) return dist