def main(): for cell in cells: for stim in stims: CLS_D = mio.load(os.path.join( path, "%s.%s.%s.combined.csv" % (cell, stim, "cls")), delimit_c=",") DCOR_D = mio.load(os.path.join( path, "%s.%s.%s.combined.csv" % (cell, stim, "dcor")), delimit_c=",") stem = "%s-%s-%s-Network" % (team, cell, stim) save_files(CLS_D, DCOR_D, 0.5, os.path.join(out_path_exp, stem)) fp = open(os.path.join(out_path_exp, "%s-Network-Writeup.txt" % team), "w") fp.write(" ") fp.close() CLS_D = mio.load(os.path.join(path, "insilico.all.cls.combined.csv")) DCOR_D = mio.load(os.path.join(path, "insilico.all.dcor.combined.csv")) #TeamName-Network-Insilico.sif #TeamName-Network-Insilico.eda stem = "%s-Network-Insilico" % (team) save_files(CLS_D, DCOR_D, 0.5, os.path.join(out_path_insil, stem)) fp = open( os.path.join(out_path_insil, "%s-Network-Insilico-Writeup.txt" % team), "w") fp.write(" ") fp.close()
def main(): make_dir(out_path_exp) for cell in CELLS: for stim in STIMS: CLS_D = mio.load(os.path.join(path,"%s.%s.cls.tab" % (cell, stim)), delimit_c="\t") DCOR_D = mio.load(os.path.join(path,"%s.%s.dcor.tab" % (cell, stim)), delimit_c="\t") DB_D = mio.load(os.path.join(path,"%s.%s.db.tab" % (cell, stim)), delimit_c="\t") ADJ_D = mio.load(os.path.join(path,"%s.%s.adj.tab" % (cell, stim)), delimit_c="\t") assert CLS_D['M'].shape==DCOR_D['M'].shape assert CLS_D['M'].shape[0]==CLS_D['M'].shape[1] assert CLS_D['row_ids']==DCOR_D['row_ids'] assert CLS_D['row_ids']==CLS_D['col_ids'] assert DCOR_D['row_ids']==DCOR_D['col_ids'] assert ADJ_D['row_ids']==DCOR_D['col_ids'] assert DB_D['row_ids']==DCOR_D['col_ids'] stem = "%s-%s-%s-Network" % (team,cell,stim) adj_to_contest(D=ADJ_D, path_stem=os.path.join(out_path_exp,stem)) dot_file = "pretty_sep16_%s_%s.dot"%(cell,stim) pretty_dot(ADJ_D, DB_D, DCOR_D, CLS_D, os.path.join(path, dot_file)) # required as part of submission fp = open(os.path.join(out_path_exp,"%s-Network-Writeup.txt"%team),"w") fp.write(" ") fp.close()
def main(): # Experimental Data # ---------------------------------------- make_dir(out_path_exp) for cell in cells: for stim in stims: CLS_D = mio.load(os.path.join(path,"%s.%s.cls.tab" % (cell, stim)), delimit_c="\t") DCOR_D = mio.load(os.path.join(path,"%s.%s.dcor.tab" % (cell, stim)), delimit_c="\t") #BT20.EGF.xnet.adj.tab XNET_D = mio.load(os.path.join(path,"%s.%s.xnet.adj.tab" % (cell, stim)), delimit_c="\t") ADJ_D = mio.load(os.path.join(path,"%s.%s.inh-combined.adj.tab" % (cell, stim)), delimit_c="\t") stem = "%s-%s-%s-Network" % (team,cell,stim) adj_to_contest(D=ADJ_D, path_stem=os.path.join(out_path_exp,stem)) dot_file = "pretty_sep9_%s_%s.dot"%(cell,stim) pretty_dot(ADJ_D, XNET_D, DCOR_D, CLS_D, os.path.join(path, dot_file)) # required as part of submission fp = open(os.path.join(out_path_exp,"%s-Network-Writeup.txt"%team),"w") fp.write(" ") fp.close() # In Silico Data # ---------------------------------------- make_dir(out_path_insil) ADJ_D = mio.load(os.path.join(path,"insilico.all.sep9.adj.tab"), delimit_c="\t") #TeamName-Network-Insilico.sif #TeamName-Network-Insilico.eda stem = "%s-Network-Insilico"%team adj_to_contest(ADJ_D, os.path.join(out_path_insil,stem)) fp = open(os.path.join(out_path_insil,"%s-Network-Insilico-Writeup.txt"%team),"w") fp.write(" ") fp.close()
def main(): script.main(fname="D.expr.gold.CLS.apr.19.tab") D = mio.load("D.expr.gold.CLS.apr.19.tab") CLS = all_pairs_bool_dist(D['M']) CHECK = mio.load("gold.R.dists.tab")['M'] print CLS print CHECK assert np.all(CLS == CHECK)
def load_extend_training_datas(): extend_features_dict = matrix_io.load('extend_train_features.pkl', float) extend_labels_dict = matrix_io.load('extend_train_labels.pkl', np.int32) extend_features = extend_features_dict['M'] extend_labels = extend_labels_dict['M'] features_mat = np.mat(extend_features) / 255.0 labels_mat = np.zeros([len(extend_labels), 10]) for i in xrange(len(extend_labels)): labels_mat[i, extend_labels[i]] = 1 return features_mat, labels_mat
def main(): GOLD_D = mio.load("data/gold_standard_network.csv", dtype=str) MIM_D = mio.load("data/mim_msa_cov.csv", dtype=str) # HACK for biovis: load no weaks YATES_D = mio.load("data/biovis_gold/gold_0.32_dot_noweak_noclust.adj.csv", dtype=str) Y2_D = mio.load("data/biovis_gold/gold_0.32_dot_noweak_noclust.adj.csv", dtype=str) #YATES_D = mio.load("data/gold_0.32_dot_nw.adj.csv", dtype=str) #Y2_D = mio.load("data/gold.paths.dcor0.32.k2.tab", dtype=str) assert GOLD_D["row_ids"] == GOLD_D["col_ids"] assert MIM_D["row_ids"] == MIM_D["col_ids"] assert YATES_D["row_ids"] == YATES_D["col_ids"] assert GOLD_D["row_ids"] == MIM_D["row_ids"] assert Y2_D["row_ids"] == Y2_D["col_ids"] assert Y2_D["row_ids"] == YATES_D["row_ids"] # align YATES and GOLD GOLDi = [ YATES_D['row_ids'].index(x) if x in YATES_D['row_ids'] else None for x in GOLD_D['row_ids'] ] GOLDiy = filter(lambda s: s is not None, GOLDi) GOLDin = [i for i,s in enumerate(GOLDi) if s is not None] #print np.array(YATES_D['row_ids'])[GOLDiy] #print np.array(GOLD_D['row_ids'])[GOLDin] #print MIM_D['M'] #print GOLD_D['M'] #r = test(GOLD_D['M'], MIM_D['M'], GOLD_D['row_ids']) #print r # fp = open("/Users/z/Desktop/mim-cov.csv", "w") # fp.write(",".join([""]+GOLD_D['col_ids'])+"\n") # for i,row in enumerate(r['R']): # fp.write(GOLD_D['row_ids'][i]+",") # fp.write(",".join(row)+"\n") # fp.close() # COMPARE GOLD WITH MIM print "GOLD VS MIM" R_G_MIM = test_mods(GOLD_D['M'], MIM_D['M'], GOLD_D['row_ids']) print # COMPARE GOLD WITH YATES assert list(np.array(YATES_D['row_ids'])[GOLDiy]) == list(np.array(GOLD_D['row_ids'])[GOLDin]) row_ids=list(np.array(GOLD_D['row_ids'])[GOLDin]) print G = GOLD_D['M'][GOLDin,:][:,GOLDin] Y = YATES_D['M'][GOLDiy,:][:,GOLDiy] Y2 = Y2_D['M'][GOLDiy,:][:,GOLDiy] print "GOLD VS YATES EDGES" R_G_Y = test_mods(G,Y, row_ids) print print "GOLD VS YATES WITH PATHS" R_G_Y2 = test_mods(G,Y2,row_ids) print
def main(fname=None, pkl=True, **kwds): assert fname if isinstance(pkl, basestring) and pkl.lower() in ('f','false','none'): pkl = False if 'b' in kwds: kwds['b'] = float(kwds['b']) if 'z_th' in kwds: kwds['z_th'] = float(kwds['z_th']) if 'err_th' in kwds: kwds['err_th'] = float(kwds['err_th']) if 'd_th' in kwds: kwds['d_th'] = float(kwds['d_th']) if 'r_th' in kwds: kwds['r_th'] = float(kwds['r_th']) print "Loading data..." D = mio.load(fname) print "Computing all pairs boolean class for a (%d x %d) data matrix (%d x %d result matrix)..." % \ (D['M'].shape[0], D['M'].shape[1], D['M'].shape[0], D['M'].shape[0]) CLS, steps, b = compute_all_bool(D['M'], **kwds) z = kwds.get('z_th', 3.0) r = kwds.get('r_th', 2/3) if r < 0.5: print "WARNING: r<0.5, (r=%f)... are you sure?" % r err = kwds.get('err_th', 0.1) fname_out = '%s.b%.4f.z%.2f.r%.2f.err%.2f.bool.tab' % (fname, b, z, r, err) print "Saving %s..." % (fname_out) mio.save(CLS, fname_out, fmt="%d", row_ids=D['row_ids'], col_ids=D['row_ids']) steps_fname = fname+".steps.txt" print "Saving high/low thresholds to %s in original row order..." % steps_fname open(steps_fname,"w").write("\n".join(("%f"%x for x in steps))) if pkl: fname_pkl_out = fname_out.rpartition('.')[0]+'.pkl' print "Saving %s..." % (fname_pkl_out) pickle.dump(CLS, open(fname_pkl_out,"w"), protocol=-1)
def main(fname=None, pkl=True, **kwds): assert fname if isinstance(pkl, basestring) and pkl.lower() in ('f', 'false', 'none'): pkl = False if 'err' in kwds: kwds['err'] = int(kwds['err']) if 'th' in kwds: kwds['th'] = float(kwds['th']) D = mio.load(fname) print "Computing all pairs weak boolean class..." print "Computing all pairs weak class for a (%d x %d) data matrix (%d x %d result matrix)..." % \ (D['M'].shape[0], D['M'].shape[1], D['M'].shape[0], D['M'].shape[0]) WEAK, err, th = compute_all_weak(D['M'], **kwds) print "Used parameters err=%d, cutoff th=%f" % (err, th) fname_out = "%s.err%d.th%.4f.weak.tab" % (fname, err, th) print "Saving %s..." % (fname_out) mio.save(WEAK, fname_out, fmt="%d", row_ids=D['row_ids'], col_ids=D['row_ids']) if pkl: fname_pkl_out = fname_out.rpartition('.')[0] + '.pkl' print "Saving %s..." % (fname_pkl_out) pickle.dump(WEAK, open(fname_pkl_out, "w"), protocol=-1) return WEAK
def main(fname=None, pkl=True, algorithm="3", outtag="", **kwds): assert fname if isinstance(pkl, basestring) and pkl.lower() in ('f','false','none'): pkl = False print "Loading data from %s..." % fname D = mio.load(fname) print "Computing all pairs (euclidean) distance correlation from a (%d x %d) data matrix to a (%d x %d) result matrix..." % (D['M'].shape[0], D['M'].shape[1], D['M'].shape[0], D['M'].shape[0]) print "Computing all pairs (euclidean) distance correlation..." if algorithm == "1": print "Using Algorithm 1: single dot product, n^2*m memory" DCOR = compute_all_dcor(D['M'], **kwds) elif algorithm == "2": print "Using Algorithm 2: multiple dot products, n*m memory" DCOR = compute_all_dcor_2(D['M'], **kwds) elif algorithm == "3": print "Using Algorithm 3: multiple dot products, n*m memory, n choose 2 savings" DCOR = compute_all_dcor_3(D['M'], **kwds) else: raise Exception, "Unknown algorithm %s" % algorithm if outtag and outtag[-1] != ".": outtag += "." fname_out = '%s.%sdcor.tab' % (fname, outtag) print "Saving %s..." % (fname_out) mio.save(DCOR, fname_out, fmt="%.4f", row_ids=D['row_ids'], col_ids=D['row_ids']) if pkl: fname_pkl_out = fname_out.rpartition('.')[0]+'.pkl' print "Saving %s..." % (fname_pkl_out) pickle.dump(DCOR, open(fname_pkl_out,"w"), protocol=-1) return DCOR
def main(fname=None, pkl=True, **kwds): assert fname if isinstance(pkl, basestring) and pkl.lower() in ('f', 'false', 'none'): pkl = False if 'b' in kwds: kwds['b'] = float(kwds['b']) if 'z_th' in kwds: kwds['z_th'] = float(kwds['z_th']) if 'err_th' in kwds: kwds['err_th'] = float(kwds['err_th']) if 'd_th' in kwds: kwds['d_th'] = float(kwds['d_th']) if 'r_th' in kwds: kwds['r_th'] = float(kwds['r_th']) print "Loading data..." D = mio.load(fname) print "Computing all pairs boolean class for a (%d x %d) data matrix (%d x %d result matrix)..." % \ (D['M'].shape[0], D['M'].shape[1], D['M'].shape[0], D['M'].shape[0]) CLS, steps, b = compute_all_bool(D['M'], **kwds) z = kwds.get('z_th', 3.0) r = kwds.get('r_th', 2 / 3) if r < 0.5: print "WARNING: r<0.5, (r=%f)... are you sure?" % r err = kwds.get('err_th', 0.1) fname_out = '%s.b%.4f.z%.2f.r%.2f.err%.2f.bool.tab' % (fname, b, z, r, err) print "Saving %s..." % (fname_out) mio.save(CLS, fname_out, fmt="%d", row_ids=D['row_ids'], col_ids=D['row_ids']) steps_fname = fname + ".steps.txt" print "Saving high/low thresholds to %s in original row order..." % steps_fname open(steps_fname, "w").write("\n".join(("%f" % x for x in steps))) if pkl: fname_pkl_out = fname_out.rpartition('.')[0] + '.pkl' print "Saving %s..." % (fname_pkl_out) pickle.dump(CLS, open(fname_pkl_out, "w"), protocol=-1)
def main(): # Experimental Data # ---------------------------------------- make_dir(out_path_exp) for cell in cells: for stim in stims: CLS_D = mio.load(os.path.join(path, "%s.%s.cls.tab" % (cell, stim)), delimit_c="\t") DCOR_D = mio.load(os.path.join(path, "%s.%s.dcor.tab" % (cell, stim)), delimit_c="\t") #BT20.EGF.xnet.adj.tab XNET_D = mio.load(os.path.join(path, "%s.%s.xnet.adj.tab" % (cell, stim)), delimit_c="\t") ADJ_D = mio.load(os.path.join( path, "%s.%s.inh-combined.adj.tab" % (cell, stim)), delimit_c="\t") stem = "%s-%s-%s-Network" % (team, cell, stim) adj_to_contest(D=ADJ_D, path_stem=os.path.join(out_path_exp, stem)) dot_file = "pretty_sep9_%s_%s.dot" % (cell, stim) pretty_dot(ADJ_D, XNET_D, DCOR_D, CLS_D, os.path.join(path, dot_file)) # required as part of submission fp = open(os.path.join(out_path_exp, "%s-Network-Writeup.txt" % team), "w") fp.write(" ") fp.close() # In Silico Data # ---------------------------------------- make_dir(out_path_insil) ADJ_D = mio.load(os.path.join(path, "insilico.all.sep9.adj.tab"), delimit_c="\t") #TeamName-Network-Insilico.sif #TeamName-Network-Insilico.eda stem = "%s-Network-Insilico" % team adj_to_contest(ADJ_D, os.path.join(out_path_insil, stem)) fp = open( os.path.join(out_path_insil, "%s-Network-Insilico-Writeup.txt" % team), "w") fp.write(" ") fp.close()
def main(weighted=True, min_d=0.4, colored=False): if isinstance(weighted, basestring) and weighted.lower() in ("f", "false", "0", "na","null"): weighted = False if isinstance(colored, basestring) and colored.lower() in ("f", "false", "0", "na","null"): colored = False min_d = float(min_d) CLS = mio.load("data/gold.celegans.gse2180.cls.csv") DCOR = mio.load("data/gold.celegans.gse2180.dcor.csv") if colored: colors = load_colors("data/gold.celegans.gse2180.phase.colors.txt") else: colors = None print "digraph {" print FONT_STRING if colors: for k,v in colors.items(): print '"%s"[color="%s",style=filled,fontcolor=white]' % (k, v) for edge in csv_to_graphvis(CLS, DCOR, min_d, weighted=weighted): print edge print "}"
def main(): for cell in cells: for stim in stims: CLS_D = mio.load(os.path.join(path,"%s.%s.%s.combined.csv" % (cell, stim, "cls")), delimit_c=",") DCOR_D = mio.load(os.path.join(path,"%s.%s.%s.combined.csv" % (cell, stim, "dcor")), delimit_c=",") stem = "%s-%s-%s-Network" % (team,cell,stim) save_files(CLS_D, DCOR_D, 0.5, os.path.join(out_path_exp,stem)) fp = open(os.path.join(out_path_exp,"%s-Network-Writeup.txt"%team),"w") fp.write(" ") fp.close() CLS_D = mio.load(os.path.join(path,"insilico.all.cls.combined.csv")) DCOR_D = mio.load(os.path.join(path,"insilico.all.dcor.combined.csv")) #TeamName-Network-Insilico.sif #TeamName-Network-Insilico.eda stem = "%s-Network-Insilico" % (team) save_files(CLS_D, DCOR_D, 0.5, os.path.join(out_path_insil,stem)) fp = open(os.path.join(out_path_insil,"%s-Network-Insilico-Writeup.txt"%team),"w") fp.write(" ") fp.close()
def main(): ADJ_D = mio.load("data/gold_0.32_dot_nw.adj.csv") M = np.array(ADJ_D['M'],dtype=int) print M P = np.zeros(M.shape, dtype=np.int) for i in xrange(M.shape[1]): js = list(get_connected(M, i, k=2)) P[js,i] = 1 print P print M==P assert ADJ_D['row_ids'] == ADJ_D['col_ids'] mio.save(P, open("data/gold.paths.dcor0.32.k2.tab","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d")
def main(): make_dir(out_path_exp) for cell in CELLS: for stim in STIMS: CLS_D = mio.load(os.path.join(path, "%s.%s.cls.tab" % (cell, stim)), delimit_c="\t") DCOR_D = mio.load(os.path.join(path, "%s.%s.dcor.tab" % (cell, stim)), delimit_c="\t") DB_D = mio.load(os.path.join(path, "%s.%s.db.tab" % (cell, stim)), delimit_c="\t") ADJ_D = mio.load(os.path.join(path, "%s.%s.adj.tab" % (cell, stim)), delimit_c="\t") assert CLS_D['M'].shape == DCOR_D['M'].shape assert CLS_D['M'].shape[0] == CLS_D['M'].shape[1] assert CLS_D['row_ids'] == DCOR_D['row_ids'] assert CLS_D['row_ids'] == CLS_D['col_ids'] assert DCOR_D['row_ids'] == DCOR_D['col_ids'] assert ADJ_D['row_ids'] == DCOR_D['col_ids'] assert DB_D['row_ids'] == DCOR_D['col_ids'] stem = "%s-%s-%s-Network" % (team, cell, stim) adj_to_contest(D=ADJ_D, path_stem=os.path.join(out_path_exp, stem)) dot_file = "pretty_sep16_%s_%s.dot" % (cell, stim) pretty_dot(ADJ_D, DB_D, DCOR_D, CLS_D, os.path.join(path, dot_file)) # required as part of submission fp = open(os.path.join(out_path_exp, "%s-Network-Writeup.txt" % team), "w") fp.write(" ") fp.close()
def main(fname=None, as_rows=True, use_weak=False): assert fname if isinstance(as_rows, basestring) and as_rows.lower() in ('f', 'false', 'none'): as_rows = False if not use_weak: print "Loading boolean class enumeration matrix", fname else: print "Loading weak class enumeration matrix", fname D = mio.load(fname, dtype=np.int) M = D['M'] # verify that enumeration matrices look credible if not use_weak: Z = np.in1d(M, np.array([0, 1, 2, 3, 4, 5, 6, 7])) if not np.all(Z): print "%d invalid values in M." % (np.sum(~Z)) print "up to 20 unrecognized values include..." zz = M[Z] print np.unique(zz)[1:np.min(20, len(zz))] else: assert np.all(np.in1d(M, np.array([0, 1, 2, 3, 4, 5]))) if not as_rows: print "Computing distance between all pairs of columns..." M = np.transpose(M) else: print "Computing distance between all pairs of rows..." if not use_weak: print "Computing Boolean Class distance" DIST = all_pairs_bool_dist(M) fname_out = fname + '.booldist.tab' else: print "Computing Weak Class distance" DIST = all_pairs_weak_dist(M) fname_out = fname + '.weakdist.tab' print "Saving boolean class distance matrix as", fname_out if as_rows: ids = D.get('row_ids', None) mio.save(DIST, fp=fname_out, row_ids=ids, col_ids=ids, fmt="%d") else: ids = D.get('col_ids', None) mio.save(DIST, fp=fname_out, row_ids=ids, col_ids=ids, fmt="%d") return fname_out
def main(): # 1: load adj matrix ADJ_D = mio.load(ADJ_FNAME, dtype=np.int, force_row_ids=True, force_col_ids=True) assert len(ADJ_D['row_ids']) == len(ADJ_D['col_ids']) assert len(ADJ_D['row_ids']) == ADJ_D['M'].shape[0] assert ADJ_D['M'].shape[0] == ADJ_D['M'].shape[1] # 2.1: find paths k=3 P3 = paths.fill_paths(ADJ_D["M"], k=3) mio.save(Pinf, open("data/all_k61_0.5_dot_nw.adj.paths.k3.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") # 2.2: find paths k=2 P2 = paths.fill_paths(ADJ_D["M"], k=2) mio.save(Pinf, open("data/all_k61_0.5_dot_nw.adj.paths.k2.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") # 3: load ranks ranks = load_ranks(open(RANKS_FNAME)) print ranks
def main(fname=None, as_rows=True, use_weak=False): assert fname if isinstance(as_rows,basestring) and as_rows.lower() in ('f','false','none'): as_rows = False if not use_weak: print "Loading boolean class enumeration matrix", fname else: print "Loading weak class enumeration matrix", fname D = mio.load(fname, dtype=np.int) M = D['M'] # verify that enumeration matrices look credible if not use_weak: Z = np.in1d(M,np.array([0,1,2,3,4,5,6,7])) if not np.all(Z): print "%d invalid values in M." % (np.sum(~Z)) print "up to 20 unrecognized values include..." zz = M[Z] print np.unique(zz)[1:np.min(20, len(zz))] else: assert np.all(np.in1d(M,np.array([0,1,2,3,4,5]))) if not as_rows: print "Computing distance between all pairs of columns..." M = np.transpose(M) else: print "Computing distance between all pairs of rows..." if not use_weak: print "Computing Boolean Class distance" DIST = all_pairs_bool_dist(M) fname_out = fname+'.booldist.tab' else: print "Computing Weak Class distance" DIST = all_pairs_weak_dist(M) fname_out = fname+'.weakdist.tab' print "Saving boolean class distance matrix as", fname_out if as_rows: ids = D.get('row_ids',None) mio.save(DIST, fp=fname_out, row_ids=ids, col_ids=ids, fmt="%d") else: ids = D.get('col_ids',None) mio.save(DIST, fp=fname_out, row_ids=ids, col_ids=ids, fmt="%d") return fname_out
def main(fname=None, pkl=True, **kwds): assert fname if isinstance(pkl, basestring) and pkl.lower() in ('f','false','none'): pkl = False if 'err' in kwds: kwds['err'] = int(kwds['err']) if 'th' in kwds: kwds['th'] = float(kwds['th']) D = mio.load(fname) print "Computing all pairs weak boolean class..." print "Computing all pairs weak class for a (%d x %d) data matrix (%d x %d result matrix)..." % \ (D['M'].shape[0], D['M'].shape[1], D['M'].shape[0], D['M'].shape[0]) WEAK, err, th = compute_all_weak(D['M'], **kwds) print "Used parameters err=%d, cutoff th=%f" % (err, th) fname_out = "%s.err%d.th%.4f.weak.tab" % (fname, err, th) print "Saving %s..." % (fname_out) mio.save(WEAK, fname_out, fmt="%d", row_ids=D['row_ids'], col_ids=D['row_ids']) if pkl: fname_pkl_out = fname_out.rpartition('.')[0]+'.pkl' print "Saving %s..." % (fname_pkl_out) pickle.dump(WEAK, open(fname_pkl_out,"w"), protocol=-1) return WEAK
def main(): DCOR = script.main(fname=FNAME) COR = compute_all_ppc_numpy(M) SCI = compute_all_pcc_scipy(M) print np.all(np.abs(COR - SCI) < 0.0000000000001) DCOR = compute_all_dcor(M) DCORL = loop_dcor(M) print np.all(np.abs(DCOR - DCORL) < 0.0000000000001) DCOR2 = compute_all_dcor_2(M) print np.all(np.abs(DCOR - DCOR2) < 0.0000000000001) DCOR3 = compute_all_dcor_3(M) print np.all(np.abs(DCOR - DCOR3) < 0.0000000000001) print np.all(np.abs(DCOR - DCOR3) < 0.1) KUN = mio.load("kungold.tab")['M'] print KUN print "kun", np.all(np.abs(DCOR - KUN) < 0.001) print "kun", np.all(np.abs(DCORL - KUN) < 0.001) print DCOR[5, 3], DCOR[3, 5] print DCOR3[5, 3], DCOR3[3, 5]
def main(): v = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) H = np.matrix([[0, 0, 0, 1], [1, 1, 1, 0], [0, 0, 0, 1]]) L = np.matrix([[1, 1, 0, 0], [0, 0, 0, 1], [1, 1, 1, 0]]) print stepfit(v) D = mio.load("nice.may3.Eg.expr.gold.celegans.csv") M = D['M'] Steps = [] for row in M: Steps.append(stepfit(row)[0]) b = 0.3 #CLS = all_pairs_bool(M, Steps, b, z_th=2.7) CLS = all_pairs_bool(M, Steps, b) print CLS print D.keys() mio.save(CLS, "nice.test.tab", fmt="%d", row_ids=D['row_ids'], col_ids=D['row_ids']) print "OK: saved result to nice.test.tab"
def main(): DCOR = script.main(fname=FNAME) COR = compute_all_ppc_numpy(M) SCI = compute_all_pcc_scipy(M) print np.all(np.abs(COR-SCI) < 0.0000000000001) DCOR = compute_all_dcor(M) DCORL = loop_dcor(M) print np.all(np.abs(DCOR-DCORL) < 0.0000000000001) DCOR2 = compute_all_dcor_2(M) print np.all(np.abs(DCOR-DCOR2) < 0.0000000000001) DCOR3 = compute_all_dcor_3(M) print np.all(np.abs(DCOR-DCOR3) < 0.0000000000001) print np.all(np.abs(DCOR-DCOR3) < 0.1) KUN = mio.load("kungold.tab")['M'] print KUN print "kun", np.all(np.abs(DCOR-KUN) < 0.001) print "kun", np.all(np.abs(DCORL-KUN) < 0.001) print DCOR[5,3], DCOR[3,5] print DCOR3[5,3], DCOR3[3,5]
def main(fname=None, n=100000, dep="dcor", do_abs=False): assert fname n = int(n) if isinstance(do_abs, basestring): do_abs = not do_abs.lower() in ('f','false','none') D = mio.load(fname) M = D['M'] #POOL = multiprocessing.Pool() if dep=="dcor": import dcor d = dcor.dcor f = lambda x: f_dep(M,d) hist_bins = 20 hist_range = (0,1) elif dep=="pcc": import scipy.stats d = lambda a,b: scipy.stats.pearsonr(a,b)[0] f = lambda x: f_dep(M,d) if do_abs: hist_bins = 20 hist_range = (0,1) else: hist_bins = 40 hist_range = (-1,1) else: raise Exception, "Unrecognized dependency measure '%s'" % dep #Z = np.array(POOL.map(f, xrange(n))) Z = np.array(map(f, xrange(n))) if do_abs: Z = np.abs(Z) Z.sort() i = 1 print "n=%d" % n print while i<=n: print i, i/n, Z[-i] i *= 10 print print np.histogram(Z, range=hist_range, bins=hist_bins)
def main(): # In Silico Data Only # ---------------------------------------- INFILE = os.path.join(path,"insilico.manual.sep9.adj.tab") make_dir(out_path_insil) ADJ_D = mio.load(INFILE, delimit_c="\t") stem = "%s-Network-Insilico"%team #submission files into submission directory #RM = directed_unweighted_trifilt(ADJ_D) #print np.sum(RM) #ADJ_D['M'][RM==1] = 0 adj_to_contest(ADJ_D, os.path.join(out_path_insil,stem)) # nice signed directed network visualization directed_dot(ADJ_D, outfile=INFILE+".dot") print "Wrote .dot visualization to %s" % INFILE+".dot" # token writeup into submission directory fp = open(os.path.join(out_path_insil,"%s-Network-Insilico-Writeup.txt"%team),"w") fp.write(" ") fp.close() print "Wrote submission files to %s" % out_path_insil
def main(): # In Silico Data Only # ---------------------------------------- INFILE = os.path.join(path, "insilico.manual.sep9.adj.tab") make_dir(out_path_insil) ADJ_D = mio.load(INFILE, delimit_c="\t") stem = "%s-Network-Insilico" % team #submission files into submission directory #RM = directed_unweighted_trifilt(ADJ_D) #print np.sum(RM) #ADJ_D['M'][RM==1] = 0 adj_to_contest(ADJ_D, os.path.join(out_path_insil, stem)) # nice signed directed network visualization directed_dot(ADJ_D, outfile=INFILE + ".dot") print "Wrote .dot visualization to %s" % INFILE + ".dot" # token writeup into submission directory fp = open( os.path.join(out_path_insil, "%s-Network-Insilico-Writeup.txt" % team), "w") fp.write(" ") fp.close() print "Wrote submission files to %s" % out_path_insil
def main(fname=None, pkl=True, algorithm="3", outtag="", **kwds): assert fname if isinstance(pkl, basestring) and pkl.lower() in ('f', 'false', 'none'): pkl = False print "Loading data from %s..." % fname D = mio.load(fname) print "Computing all pairs (euclidean) distance correlation from a (%d x %d) data matrix to a (%d x %d) result matrix..." % ( D['M'].shape[0], D['M'].shape[1], D['M'].shape[0], D['M'].shape[0]) print "Computing all pairs (euclidean) distance correlation..." if algorithm == "1": print "Using Algorithm 1: single dot product, n^2*m memory" DCOR = compute_all_dcor(D['M'], **kwds) elif algorithm == "2": print "Using Algorithm 2: multiple dot products, n*m memory" DCOR = compute_all_dcor_2(D['M'], **kwds) elif algorithm == "3": print "Using Algorithm 3: multiple dot products, n*m memory, n choose 2 savings" DCOR = compute_all_dcor_3(D['M'], **kwds) else: raise Exception, "Unknown algorithm %s" % algorithm if outtag and outtag[-1] != ".": outtag += "." fname_out = '%s.%sdcor.tab' % (fname, outtag) print "Saving %s..." % (fname_out) mio.save(DCOR, fname_out, fmt="%.4f", row_ids=D['row_ids'], col_ids=D['row_ids']) if pkl: fname_pkl_out = fname_out.rpartition('.')[0] + '.pkl' print "Saving %s..." % (fname_pkl_out) pickle.dump(DCOR, open(fname_pkl_out, "w"), protocol=-1) return DCOR
def load_extend_test_datas(): extend_features_dict = matrix_io.load('extend_test_features.pkl', float) extend_features = extend_features_dict['M'] features_mat = np.mat(extend_features) / 255.0 return features_mat
def matrix_files_to_flat_graphviz_file(cls_fname=None, dcor_fname=None, out_fname=None, ignore_fname=None, color_fname=None, weak_fname=None, min_d=0, weighted=True, plot_na=False, rank_cluster_fname=None, ignore_nodes=None, do_rank_clust=True, rank_clust_names=False, **kwds): """From matrix file names and parameters, write resulting graphviz output to file.""" assert cls_fname and dcor_fname and out_fname weighted = str_true_false(weighted) plot_na = str_true_false(plot_na) min_d = float(min_d) assert min_d >= 0 if ignore_nodes is not None: ignore_nodes = ignore_nodes.split(',') if do_rank_clust in ("F",'f','false','FALSE','None', "False", False, None): do_rank_clust = False else: do_rank_clust = True CLS_D = mio.load(cls_fname) DCOR_D = mio.load(dcor_fname) if color_fname: node_styles = load_colors_as_node_style_dict(open(color_fname)) else: node_styles = None if rank_clust_names in ("F",'f','false','FALSE','None', False, None): rank_clust_names = None else: rank_clust_names = CLS_D['row_ids'] if rank_cluster_fname is not None: rank_clusters = load_rank_clusters(open(rank_cluster_fname), rank_clust_names) else: rank_clusters = None print rank_clusters if not do_rank_clust: print "kill ranks..." rank_clusters = None assert CLS_D['row_ids'] == CLS_D['col_ids'] assert DCOR_D['row_ids'] == DCOR_D['col_ids'] assert CLS_D['row_ids'] == DCOR_D['row_ids'] names = CLS_D['row_ids'] CLS, DCOR = CLS_D['M'], DCOR_D['M'] assert np.size(CLS,0) == np.size(CLS,1) assert np.shape(CLS) == np.shape(DCOR) if weak_fname: WEAK_D = mio.load(weak_fname) assert WEAK_D['row_ids'] == WEAK_D['col_ids'] assert WEAK_D['row_ids'] == names WEAK = WEAK_D["M"] assert np.shape(WEAK) == np.shape(CLS) else: WEAK = None if ignore_fname: IGNORE_D = mio.load(ignore_fname, force_row_ids=True, force_col_ids=True) assert IGNORE_D['row_ids'] == IGNORE_D['col_ids'] try: assert IGNORE_D['row_ids'] == names except AssertionError: print IGNORE_D['row_ids'] print names raise IGNORE = IGNORE_D["M"] assert np.shape(IGNORE) == np.shape(CLS) else: IGNORE = None out = open(out_fname, "w") G = print_graphviz(names=names, out=out, CLS=CLS, DCOR=DCOR, WEAK=WEAK, IGNORE=IGNORE, node_styles=node_styles, min_d=min_d, weighted=weighted, plot_na=plot_na, rank_clusters=rank_clusters, ignore_nodes=ignore_nodes, **kwds) out.close() return G
from __init__ import * import matrix_io as mio from clusters_prototype_lib import * import subprocess min_d=0.13 TAB_PREFIX="/Users/z/Dropbox/biostat/brca/GSE7307.e2fnets.jun19/tab/" cls_fname=TAB_PREFIX+"jun20.R.GSE7307.TF.BOOL.syms.tab" dcor_fname=TAB_PREFIX+"jun20.R.GSE7307.TF.DCOR.syms.tab" weak_fname=TAB_PREFIX+"jun20.R.GSE7307.TF.WEAK.syms.tab" graphviz_cmd="dot" outpath_prefix="/Users/z/Desktop/brca_e2f_custom" clusts_fname="/Users/z/Dropbox/biostat/brca/GSE7307.e2fnets.jun19/tab/jun19.GSE7307.k299.gsplom.clust.names.e2fcustom.txt" out_path="/Users/z/Desktop/" CLS_D = mio.load(cls_fname) DCOR_D = mio.load(dcor_fname) WEAK_D = mio.load(weak_fname) CLS = CLS_D['M'] DCOR = DCOR_D['M'] WEAK = WEAK_D['M'] node_names = DCOR_D['row_ids'] CLUSTS = load_clusters(open(clusts_fname)) print CLS.shape, DCOR.shape, WEAK.shape # Convert cluster names into row ID indices (indexed from zero) # NOTE: C is a list, not a dict. It is ordered in same order as clust_names C, clust_names = clust_names_to_row_num_list(CLUSTS, node_names) print C print clust_names
def main(pkl_fname=None, row_fname=None, col_fname=None, outdir=None, sig=None, doabs=False, diag=1): """ pkl_fname: path to pickled numpy dependency matrix row_fname: path to labeled text matrix with row ids, maybe col ids col_fname: optional path to labeled text matrix with col ids sig: float of minimum significance doabs: flag of whether to use absolute value for significance testing diag: if matrix is symmetric, the value of the diagonal """ assert pkl_fname and row_fname and outdir make_dir(outdir) if doabs: abstxt = "T" else: abstxt = "F" out_fname = os.path.join(outdir, os.path.basename(pkl_fname.rpartition('.')[0])) if sig: out_fname += ".sig%f" % sig if doabs: out_fname += ".absT" out_fname += ".tab" print "Text matrix will be saved to: %s" % out_fname M = pickle.load(open(pkl_fname)) # Get row and column labels. try: D_row = mio.load(row_fname) row_names = np.array(D_row['row_ids']) except AssertionError: row_names = np.array([s.strip('\n\r') for s in open(row_fname)]) if col_fname is None: col_names = np.array(D_row['col_ids']) else: if row_fname == col_fname: col_names = row_names else: try: D_col = mio.load(col_fname) col_names = np.array(D_col['row_ids']) # Use row IDs as column IDs in Dependency Matrix except AssertionError: col_names = np.array([s.strip('\n\r') for s in open(col_fname)]) if len(row_names) == np.size(M,0) and len(col_names) == np.size(M,1): print "Number of rows(%d) and column(%d) names fit matrix size (%d,%d)." % \ (len(row_names), len(col_names), np.size(M,0), np.size(M,1)) else: n = len(row_names) if np.size(M,0) == n*(n-1)//2: print "Matrix seems to be n choose 2 upper triangle matrix. Converting to full matrix..." M = distance.squareform(M) if diag is not None: print "Forcing diagonal to be:", diag for i in xrange(n): M[i,i] = diag else: raise Exception, "Unknown matrix size %s given #row_ids(%d), #col_ids(%d)" % \ (np.shape(M), len(row_names), len(col_names)) # Remove insignificant rows and columns; align row/col names original_dim = M.shape if sig is not None: sig = float(sig) if not doabs: col_max = np.amax(M,0) row_max = np.amax(M,1) else: col_max = np.amax(np.abs(M),0) row_max = np.amax(np.abs(M),1) M = M[row_max>=sig,:][:,col_max>=sig] row_names = row_names[row_max>=sig] col_names = col_names[col_max>=sig] new_dim = M.shape # Dump to text now_timestamp = datetime.datetime.now().isoformat('_') header = ["Generated on %s from pickled matrix file %s" % (now_timestamp, pkl_fname), "Original dimensions: %s, New dimensions: %s" % (original_dim, new_dim), "sig: %s, abs: %s" % (str(sig), str(abstxt))] print "\n".join(header) fp = open(out_fname, "w") mio.save(M, fp, ftype="txt", delimit_c="\t", row_ids=list(row_names), col_ids=list(col_names), headers=header) fp.close() print "Tab matrix saved to %s." % out_fname return out_fname
#!/usr/bin/python """Hack script and notes to generate Jun19 BRCA network-of-networks. python script.py min_d=0.30 cls_fname=data/D.expr.gold.CLS.apr.19.tab dcor_fname=data/D.expr.gold.DCOR.apr.19.tab color_fname=data/gold.celegans.phase.colors.genes.txt graphviz_cmd=dot weak_fname=data/gold.weak.tab weighted=False outpath_prefix=~/Desktop/gold_0.30_dot_nw """ from clusters_prototype_lib import * import matrix_io as mio import subprocess DIR = "/Users/z/Dropbox/biostat/brca/GSE7307.e2fnets/" PDF_PTN = DIR + "jun19_e2f_clust_pdfs/c%s.fdp.dot.pdf" SUFFIXES = ['117', '150', '83', '82', '80', '51'] INTER_DCOR_D = mio.load(DIR+"GSE7307.TF.R.299.interest.inter.DCOR.jun19.tab") INTER_BOOL_D = mio.load(DIR+"GSE7307.TF.R.299.interest.inter.BOOL.jun19.tab") INTER_WEAK_D = mio.load(DIR+"GSE7307.TF.R.299.interest.inter.WEAK.jun19.tab") print INTER_DCOR_D['row_ids'] print SUFFIXES assert INTER_DCOR_D['row_ids'] == SUFFIXES assert INTER_DCOR_D['row_ids'] == INTER_BOOL_D['row_ids'] assert INTER_DCOR_D['row_ids'] == INTER_WEAK_D['row_ids'] assert INTER_DCOR_D['row_ids'] == INTER_DCOR_D['col_ids'] C_plots = [] for c in SUFFIXES: print PDF_PTN%c w,h = get_pdf_file_size(PDF_PTN%c) C_plots.append((w,h)) clust_out = DIR+"e2f.jun20.clusts.dot" fp = open(clust_out,"w")
from script_weak import * import matrix_io as mio D = mio.load("nice.may3.Eg.expr.gold.celegans.csv") WEAK = main("nice.may3.Eg.expr.gold.celegans.csv") print WEAK print WEAK[3,6], WEAK[6,3] r3 = np.array(D['M']>0.2,dtype=np.int)[3,] r6 = np.array(D['M']>0.2,dtype=np.int)[6,] print D['row_ids'][3], r3 print D['row_ids'][6], r6 print r3-r6
In [12]: %timeit DCOR = compute_all_dcor(M) 100 loops, best of 3: 5.01 ms per loop In [19]: %timeit DCORL = test.loop_dcor(M) 10 loops, best of 3: 56.4 ms per loop """ from __init__ import * import matrix_io as mio import script import numpy as np from scipy.spatial.distance import pdist from scipy.spatial.distance import squareform import dcor FNAME = "nice.may3.Eg.expr.gold.celegans.csv" M = mio.load(FNAME)['M'] def loop_dcor(M): m = M.shape[0] D = np.zeros((m, m)) for i, rowi in enumerate(M): for j, rowj in enumerate(M): D[i, j] = dcor.dcor(rowi, rowj) return D def main(): DCOR = script.main(fname=FNAME) COR = compute_all_ppc_numpy(M) SCI = compute_all_pcc_scipy(M)
In [12]: %timeit DCOR = compute_all_dcor(M) 100 loops, best of 3: 5.01 ms per loop In [19]: %timeit DCORL = test.loop_dcor(M) 10 loops, best of 3: 56.4 ms per loop """ from __init__ import * import matrix_io as mio import script import numpy as np from scipy.spatial.distance import pdist from scipy.spatial.distance import squareform import dcor FNAME="nice.may3.Eg.expr.gold.celegans.csv" M = mio.load(FNAME)['M'] def loop_dcor(M): m = M.shape[0] D = np.zeros((m,m)) for i,rowi in enumerate(M): for j,rowj in enumerate(M): D[i,j] = dcor.dcor(rowi,rowj) return D def main(): DCOR = script.main(fname=FNAME) COR = compute_all_ppc_numpy(M) SCI = compute_all_pcc_scipy(M) print np.all(np.abs(COR-SCI) < 0.0000000000001) DCOR = compute_all_dcor(M)
def main(): # 1: load adj matrix ADJ_D = mio.load(ADJ_FNAME, dtype=np.int, force_row_ids=True, force_col_ids=True) assert len(ADJ_D['row_ids']) == len(ADJ_D['col_ids']) assert len(ADJ_D['row_ids']) == ADJ_D['M'].shape[0] assert ADJ_D['M'].shape[0] == ADJ_D['M'].shape[1] # 2: find all paths Pinf = paths.fill_paths(ADJ_D["M"], k=None) mio.save(Pinf, open("data/all_k61_0.5_dot_nw.adj.paths.kinf.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") print np.sum(ADJ_D["M"]!=Pinf) P2 = paths.fill_paths(ADJ_D["M"], k=2) mio.save(P2, open("data/all_k61_0.5_dot_nw.adj.paths.k2.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") print np.sum(P2!=Pinf) P3 = paths.fill_paths(ADJ_D["M"], k=3) mio.save(P3, open("data/all_k61_0.5_dot_nw.adj.paths.k3.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") print np.sum(P3!=Pinf) P4 = paths.fill_paths(ADJ_D["M"], k=4) mio.save(P4, open("data/all_k61_0.5_dot_nw.adj.paths.k4.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") print np.sum(P4!=Pinf) P5 = paths.fill_paths(ADJ_D["M"], k=5) mio.save(P5, open("data/all_k61_0.5_dot_nw.adj.paths.k5.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") print np.sum(P5!=Pinf) P6 = paths.fill_paths(ADJ_D["M"], k=6) mio.save(P6, open("data/all_k61_0.5_dot_nw.adj.paths.k6.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") print np.sum(P6!=Pinf) P7 = paths.fill_paths(ADJ_D["M"], k=7) mio.save(P7, open("data/all_k61_0.5_dot_nw.adj.paths.k7.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") print np.sum(P7!=Pinf) P8 = paths.fill_paths(ADJ_D["M"], k=8) mio.save(P8, open("data/all_k61_0.5_dot_nw.adj.paths.k8.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") print np.sum(P8!=Pinf) P9 = paths.fill_paths(ADJ_D["M"], k=9) mio.save(P9, open("data/all_k61_0.5_dot_nw.adj.paths.k9.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") print np.sum(P9!=Pinf) P10 = paths.fill_paths(ADJ_D["M"], k=10) mio.save(P10, open("data/all_k61_0.5_dot_nw.adj.paths.k10.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") print np.sum(P10!=Pinf) P11 = paths.fill_paths(ADJ_D["M"], k=11) mio.save(P11, open("data/all_k61_0.5_dot_nw.adj.paths.k11.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") print np.sum(P11!=Pinf) P12 = paths.fill_paths(ADJ_D["M"], k=12) mio.save(P12, open("data/all_k61_0.5_dot_nw.adj.paths.k12.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") print np.sum(P12!=Pinf) P13 = paths.fill_paths(ADJ_D["M"], k=13) mio.save(P13, open("data/all_k61_0.5_dot_nw.adj.paths.k13.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") print np.sum(P13!=Pinf) P14 = paths.fill_paths(ADJ_D["M"], k=14) mio.save(P14, open("data/all_k61_0.5_dot_nw.adj.paths.k14.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") print np.sum(P14!=Pinf) P15 = paths.fill_paths(ADJ_D["M"], k=15) mio.save(P15, open("data/all_k61_0.5_dot_nw.adj.paths.k15.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") print np.sum(P15!=Pinf) P16 = paths.fill_paths(ADJ_D["M"], k=16) mio.save(P15, open("data/all_k61_0.5_dot_nw.adj.paths.k16.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") print np.sum(P16!=Pinf)
from script_weak import * import matrix_io as mio D = mio.load("nice.may3.Eg.expr.gold.celegans.csv") WEAK = main("nice.may3.Eg.expr.gold.celegans.csv") print WEAK print WEAK[3, 6], WEAK[6, 3] r3 = np.array(D['M'] > 0.2, dtype=np.int)[3, ] r6 = np.array(D['M'] > 0.2, dtype=np.int)[6, ] print D['row_ids'][3], r3 print D['row_ids'][6], r6 print r3 - r6
""" digraph { graph [fontname = "helvetica", nodesep=0.300000, splines=ortho, ranksep=0.400000, rank=same]; node [fontname = "helvetica", color="#000000", style=filled, fillcolor="#ffffff"]; edge [fontname = "helvetica", penwidth=1]; "154" -> "107"[color="#222222", penwidth=4.512063]; "107" -> "343"[color="#4197c7", constraint=false, dir=none, penwidth=6.054689,style=dashed]; } """ import matrix_io as mio import numpy as np D = mio.load("KO.adj.matrix.tab", dtype=np.int) M = D['M'] assert M.shape[0]==M.shape[1], M.shape n = M.shape[0] for i in xrange(n): for j in xrange(n): options = {} if M[i,j] == 1: # weak activator options.update({'color':'#339900'}) elif M[i,j] == 2: # strong activator options.update({'color':'#00ff00'}) elif M[i,j] == -1: # weak repressor options.update({'color':'orange', 'arrowhead':'tee'}) elif M[i,j] == -2: # strong repressor options.update({'color':'red', 'arrowhead':'tee'}) else: continue opts = ", ".join(['%s="%s"'%(k,v) for k,v in options.items()]) print '"%s" -> "%s"[%s]' % (D['col_ids'][j], D['row_ids'][i], opts)
def main(): # 1: load adj matrix ADJ_D = mio.load(ADJ_FNAME, dtype=np.int, force_row_ids=True, force_col_ids=True) DCOR_D = mio.load(DCOR_FNAME, force_row_ids=True, force_col_ids=True) assert len(ADJ_D['row_ids']) == len(ADJ_D['col_ids']) assert len(ADJ_D['row_ids']) == ADJ_D['M'].shape[0] assert DCOR_D["row_ids"] == ADJ_D['row_ids'] assert DCOR_D["row_ids"] == DCOR_D["col_ids"] assert ADJ_D['M'].shape[0] == ADJ_D['M'].shape[1] n = ADJ_D['M'].shape[0] # 2.1: find paths k=2 P2 = paths.fill_paths(ADJ_D["M"], k=2) mio.save(P2, open(P2_FNAME,"w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") # 2.2: find paths k=3 P3 = paths.fill_paths(ADJ_D["M"], k=3) mio.save(P3, open(P3_FNAME,"w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",") name_order = ADJ_D['row_ids'] # 3.1: load ranks # (I compiled this list manually) Note that rank elements are 0 indexed in ADJ matrix ranks = load_ranks_named(open(RANKS_FNAME), name_order) #print ranks #print name_order #sys.exit(1) node_clusters = [] # 4: find clusters in same rank at k=2 Ignore_Clust = np.zeros(ADJ_D['M'].shape, dtype=np.bool) for lvl, r in enumerate(ranks): CC = group_in_same_rank(r, P2) rnp = np.array(r) c = [map(str,rnp[cc]+1) for cc in CC] node_clusters.append(c) # Ignore edges in equal rank clusters for node in r: adj = set(np.nonzero(ADJ_D['M'][:,node])[0]) for e in (adj & set(r)): Ignore_Clust[node,e] = True # Also remove corresponding opposite direction if ADJ_D['M'][node,e] == ADJ_D['M'][e,node] == 1: Ignore_Clust[e,node] = True print "Clusters" for c in node_clusters: print c print # 4.5 attempt to hide lower strength edges without disconnecting nodes DCOR = DCOR_D['M'] AD = ADJ_D['M'].copy() Ignore_Low = np.zeros(ADJ_D['M'].shape, dtype=np.bool) for i in range(n): for j in range(n): if i == j: continue if DCOR[i,j] < DCOR_TH and AD[j,i]: # edge exists from i to j and it is under dCor thresh. can we remove it? AD[j,i] = 0 # undirected edge if AD[i,j]: AD[i,j] = 0 if np.sum(AD[:,i]) == 0 or np.sum(AD[j:,]) == 0 or \ np.sum(AD[:,j]) == 0 or np.sum(AD[i:,]) == 0: # no, it disconnects something AD[i,j] = 1 AD[j,i] = 1 else: Ignore_Low[i,j] = True Ignore_Low[j,i] = True # directed edge else: if np.sum(AD[:,i]) == 0 or np.sum(AD[j:,]) == 0: # no, it disconnects something AD[j,i] = 1 else: Ignore_Low[j,i] = True NL = count_edges(Ignore_Low) assert np.sum(Ignore_Low & ADJ_D['M']) == np.sum(Ignore_Low) print "Too low:", NL # 5: look for redudant directed edges between levels at least 2 levels apart # remove edge if path of equal length already exists Ignore_Far = np.zeros(ADJ_D['M'].shape, dtype=np.bool) A = ADJ_D['M'].copy() A = A & (~Ignore_Low) n_far_edges = 0 for lvl in xrange(len(ranks)-2): this_rank = ranks[lvl] for dlvl in xrange(lvl+2,len(ranks)): delta = dlvl-lvl that_rank = ranks[dlvl] for top in this_rank: for low in that_rank: if A[low,top]: # adj is col->row n_far_edges += 1 A[low,top] = 0 # try removing this link # is there an alternate path of equal length to this node? conn = paths.is_path(A,top,delta+1) if not low in conn: A[low,top] = 1 # I guess we need this edge... else: Ignore_Far[low,top] = True # also remove an associated undirected edge if A[top,low]: Ignore_Far[top,low] = True A[top,low] = 0 print "# Far edges", n_far_edges # 6: Print Stats assert np.sum(Ignore_Clust & Ignore_Far)==0 NT = count_edges(ADJ_D['M']) NS = count_edges(Ignore_Clust) NF = count_edges(Ignore_Far) print "Total:", NT print "Same Level:", NS print "Redundant Far:", NF n_rm = NS['total']+NF['total']+NL['total'] # this is wrong #$print "removed:", n_rm #print "reduction:", n_rm/NT['total'] # 7: Save Edge Ignore Matrix Ignore = Ignore_Clust | Ignore_Far | Ignore_Low NI = count_edges(Ignore) print "Ignored", NI print np.sum(Ignore) print np.sum(Ignore_Clust | Ignore_Far) print "Save Ignore Matrix at:", IGNORE_FNAME mio.save(Ignore, open(IGNORE_FNAME, "w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")