def main():
    for cell in cells:
        for stim in stims:
            CLS_D = mio.load(os.path.join(
                path, "%s.%s.%s.combined.csv" % (cell, stim, "cls")),
                             delimit_c=",")
            DCOR_D = mio.load(os.path.join(
                path, "%s.%s.%s.combined.csv" % (cell, stim, "dcor")),
                              delimit_c=",")
            stem = "%s-%s-%s-Network" % (team, cell, stim)
            save_files(CLS_D, DCOR_D, 0.5, os.path.join(out_path_exp, stem))
    fp = open(os.path.join(out_path_exp, "%s-Network-Writeup.txt" % team), "w")
    fp.write(" ")
    fp.close()

    CLS_D = mio.load(os.path.join(path, "insilico.all.cls.combined.csv"))
    DCOR_D = mio.load(os.path.join(path, "insilico.all.dcor.combined.csv"))
    #TeamName-Network-Insilico.sif
    #TeamName-Network-Insilico.eda
    stem = "%s-Network-Insilico" % (team)
    save_files(CLS_D, DCOR_D, 0.5, os.path.join(out_path_insil, stem))
    fp = open(
        os.path.join(out_path_insil, "%s-Network-Insilico-Writeup.txt" % team),
        "w")
    fp.write(" ")
    fp.close()
def main():
  
  make_dir(out_path_exp)
  for cell in CELLS:
    for stim in STIMS:

      CLS_D = mio.load(os.path.join(path,"%s.%s.cls.tab" % (cell, stim)), delimit_c="\t")
      DCOR_D = mio.load(os.path.join(path,"%s.%s.dcor.tab" % (cell, stim)), delimit_c="\t")
      DB_D = mio.load(os.path.join(path,"%s.%s.db.tab" % (cell, stim)), delimit_c="\t")
      ADJ_D = mio.load(os.path.join(path,"%s.%s.adj.tab" % (cell, stim)), delimit_c="\t")

      assert CLS_D['M'].shape==DCOR_D['M'].shape
      assert CLS_D['M'].shape[0]==CLS_D['M'].shape[1]
      assert CLS_D['row_ids']==DCOR_D['row_ids']
      assert CLS_D['row_ids']==CLS_D['col_ids']
      assert DCOR_D['row_ids']==DCOR_D['col_ids']
      assert ADJ_D['row_ids']==DCOR_D['col_ids']
      assert DB_D['row_ids']==DCOR_D['col_ids']
      
      stem = "%s-%s-%s-Network" % (team,cell,stim)
      adj_to_contest(D=ADJ_D, path_stem=os.path.join(out_path_exp,stem))
      dot_file = "pretty_sep16_%s_%s.dot"%(cell,stim)
      pretty_dot(ADJ_D, DB_D, DCOR_D, CLS_D, os.path.join(path, dot_file))

  # required as part of submission
  fp = open(os.path.join(out_path_exp,"%s-Network-Writeup.txt"%team),"w")
  fp.write(" ")
  fp.close()
def main():

  # Experimental Data
  # ----------------------------------------
  make_dir(out_path_exp)
  for cell in cells:
    for stim in stims:
      CLS_D = mio.load(os.path.join(path,"%s.%s.cls.tab" % (cell, stim)), delimit_c="\t")
      DCOR_D = mio.load(os.path.join(path,"%s.%s.dcor.tab" % (cell, stim)), delimit_c="\t")
      #BT20.EGF.xnet.adj.tab
      XNET_D = mio.load(os.path.join(path,"%s.%s.xnet.adj.tab" % (cell, stim)), delimit_c="\t")
      ADJ_D = mio.load(os.path.join(path,"%s.%s.inh-combined.adj.tab" % (cell, stim)), delimit_c="\t")
      stem = "%s-%s-%s-Network" % (team,cell,stim)
      adj_to_contest(D=ADJ_D, path_stem=os.path.join(out_path_exp,stem))
      dot_file = "pretty_sep9_%s_%s.dot"%(cell,stim)
      pretty_dot(ADJ_D, XNET_D, DCOR_D, CLS_D, os.path.join(path, dot_file))
  # required as part of submission
  fp = open(os.path.join(out_path_exp,"%s-Network-Writeup.txt"%team),"w")
  fp.write(" ")
  fp.close()

  # In Silico Data
  # ----------------------------------------
  make_dir(out_path_insil)
  ADJ_D = mio.load(os.path.join(path,"insilico.all.sep9.adj.tab"), delimit_c="\t")
  #TeamName-Network-Insilico.sif
  #TeamName-Network-Insilico.eda
  stem = "%s-Network-Insilico"%team
  adj_to_contest(ADJ_D, os.path.join(out_path_insil,stem))
  fp = open(os.path.join(out_path_insil,"%s-Network-Insilico-Writeup.txt"%team),"w")
  fp.write(" ")
  fp.close()
Exemple #4
0
def main():
  script.main(fname="D.expr.gold.CLS.apr.19.tab")
  D = mio.load("D.expr.gold.CLS.apr.19.tab")
  CLS = all_pairs_bool_dist(D['M'])
  CHECK = mio.load("gold.R.dists.tab")['M']
  print CLS
  print CHECK
  assert np.all(CLS == CHECK)
Exemple #5
0
def load_extend_training_datas():
    extend_features_dict = matrix_io.load('extend_train_features.pkl', float)
    extend_labels_dict = matrix_io.load('extend_train_labels.pkl', np.int32)

    extend_features = extend_features_dict['M']
    extend_labels = extend_labels_dict['M']

    features_mat = np.mat(extend_features) / 255.0
    labels_mat = np.zeros([len(extend_labels), 10])
    for i in xrange(len(extend_labels)):
        labels_mat[i, extend_labels[i]] = 1
    return features_mat, labels_mat
def main():
  GOLD_D = mio.load("data/gold_standard_network.csv", dtype=str)
  MIM_D = mio.load("data/mim_msa_cov.csv", dtype=str)
  # HACK for biovis: load no weaks
  YATES_D = mio.load("data/biovis_gold/gold_0.32_dot_noweak_noclust.adj.csv", dtype=str)
  Y2_D = mio.load("data/biovis_gold/gold_0.32_dot_noweak_noclust.adj.csv", dtype=str)
  #YATES_D = mio.load("data/gold_0.32_dot_nw.adj.csv", dtype=str)
  #Y2_D = mio.load("data/gold.paths.dcor0.32.k2.tab", dtype=str)

  assert GOLD_D["row_ids"] == GOLD_D["col_ids"]
  assert MIM_D["row_ids"] == MIM_D["col_ids"]
  assert YATES_D["row_ids"] == YATES_D["col_ids"]
  assert GOLD_D["row_ids"] == MIM_D["row_ids"]
  assert Y2_D["row_ids"] == Y2_D["col_ids"]
  assert Y2_D["row_ids"] == YATES_D["row_ids"]

  # align YATES and GOLD
  GOLDi = [ YATES_D['row_ids'].index(x) if x in YATES_D['row_ids'] else None for x in GOLD_D['row_ids'] ]
  GOLDiy = filter(lambda s: s is not None, GOLDi)
  GOLDin = [i for i,s in enumerate(GOLDi) if s is not None]
  #print np.array(YATES_D['row_ids'])[GOLDiy]
  #print np.array(GOLD_D['row_ids'])[GOLDin]
  #print MIM_D['M']
  #print GOLD_D['M']

  #r = test(GOLD_D['M'], MIM_D['M'], GOLD_D['row_ids'])
  #print r
  # fp = open("/Users/z/Desktop/mim-cov.csv", "w")
  # fp.write(",".join([""]+GOLD_D['col_ids'])+"\n")
  # for i,row in enumerate(r['R']):
  #   fp.write(GOLD_D['row_ids'][i]+",")
  #   fp.write(",".join(row)+"\n")
  # fp.close()

  # COMPARE GOLD WITH MIM
  print "GOLD VS MIM"
  R_G_MIM = test_mods(GOLD_D['M'], MIM_D['M'], GOLD_D['row_ids'])
  print

  # COMPARE GOLD WITH YATES
  assert list(np.array(YATES_D['row_ids'])[GOLDiy]) == list(np.array(GOLD_D['row_ids'])[GOLDin])
  row_ids=list(np.array(GOLD_D['row_ids'])[GOLDin])
  print
  G = GOLD_D['M'][GOLDin,:][:,GOLDin]
  Y = YATES_D['M'][GOLDiy,:][:,GOLDiy]
  Y2 = Y2_D['M'][GOLDiy,:][:,GOLDiy]
  print "GOLD VS YATES EDGES"
  R_G_Y = test_mods(G,Y, row_ids)
  print
  
  print "GOLD VS YATES WITH PATHS"
  R_G_Y2 = test_mods(G,Y2,row_ids)
  print
def main(fname=None, pkl=True, **kwds):
  assert fname
  if isinstance(pkl, basestring) and pkl.lower() in ('f','false','none'): pkl = False
  if 'b' in kwds: kwds['b'] = float(kwds['b'])
  if 'z_th' in kwds: kwds['z_th'] = float(kwds['z_th'])
  if 'err_th' in kwds: kwds['err_th'] = float(kwds['err_th'])
  if 'd_th' in kwds: kwds['d_th'] = float(kwds['d_th'])
  if 'r_th' in kwds: kwds['r_th'] = float(kwds['r_th'])
  print "Loading data..."
  D = mio.load(fname)
  print "Computing all pairs boolean class for a (%d x %d) data matrix (%d x %d result matrix)..." % \
      (D['M'].shape[0], D['M'].shape[1], D['M'].shape[0], D['M'].shape[0])
  CLS, steps, b = compute_all_bool(D['M'], **kwds)
  z = kwds.get('z_th', 3.0)
  r = kwds.get('r_th', 2/3)
  if r < 0.5:
    print "WARNING: r<0.5, (r=%f)... are you sure?" % r
  err = kwds.get('err_th', 0.1)
  fname_out = '%s.b%.4f.z%.2f.r%.2f.err%.2f.bool.tab' % (fname, b, z, r, err)
  print "Saving %s..." % (fname_out)
  mio.save(CLS, fname_out, fmt="%d", row_ids=D['row_ids'], col_ids=D['row_ids'])
  steps_fname = fname+".steps.txt"
  print "Saving high/low thresholds to %s in original row order..." % steps_fname
  open(steps_fname,"w").write("\n".join(("%f"%x for x in steps)))
  if pkl:
    fname_pkl_out = fname_out.rpartition('.')[0]+'.pkl'
    print "Saving %s..." % (fname_pkl_out)
    pickle.dump(CLS, open(fname_pkl_out,"w"), protocol=-1)
def main(fname=None, pkl=True, **kwds):
    assert fname
    if isinstance(pkl, basestring) and pkl.lower() in ('f', 'false', 'none'):
        pkl = False
    if 'err' in kwds: kwds['err'] = int(kwds['err'])
    if 'th' in kwds: kwds['th'] = float(kwds['th'])

    D = mio.load(fname)
    print "Computing all pairs weak boolean class..."
    print "Computing all pairs weak class for a (%d x %d) data matrix (%d x %d result matrix)..." % \
        (D['M'].shape[0], D['M'].shape[1], D['M'].shape[0], D['M'].shape[0])
    WEAK, err, th = compute_all_weak(D['M'], **kwds)
    print "Used parameters err=%d, cutoff th=%f" % (err, th)

    fname_out = "%s.err%d.th%.4f.weak.tab" % (fname, err, th)
    print "Saving %s..." % (fname_out)
    mio.save(WEAK,
             fname_out,
             fmt="%d",
             row_ids=D['row_ids'],
             col_ids=D['row_ids'])
    if pkl:
        fname_pkl_out = fname_out.rpartition('.')[0] + '.pkl'
        print "Saving %s..." % (fname_pkl_out)
        pickle.dump(WEAK, open(fname_pkl_out, "w"), protocol=-1)
    return WEAK
def main(fname=None, pkl=True, algorithm="3", outtag="", **kwds):
  assert fname
  if isinstance(pkl, basestring) and pkl.lower() in ('f','false','none'): pkl = False
  print "Loading data from %s..." % fname
  D = mio.load(fname)
  print "Computing all pairs (euclidean) distance correlation from a (%d x %d) data matrix to a (%d x %d) result matrix..." % (D['M'].shape[0], D['M'].shape[1], D['M'].shape[0], D['M'].shape[0])
  print "Computing all pairs (euclidean) distance correlation..."
  
  if algorithm == "1":
    print "Using Algorithm 1: single dot product, n^2*m memory"
    DCOR = compute_all_dcor(D['M'], **kwds)
  elif algorithm == "2":
    print "Using Algorithm 2: multiple dot products, n*m memory"
    DCOR = compute_all_dcor_2(D['M'], **kwds)
  elif algorithm == "3":
    print "Using Algorithm 3: multiple dot products, n*m memory, n choose 2 savings"
    DCOR = compute_all_dcor_3(D['M'], **kwds)
  else:
    raise Exception, "Unknown algorithm %s" % algorithm

  if outtag and outtag[-1] != ".":
    outtag += "."
  fname_out = '%s.%sdcor.tab' % (fname, outtag)
  print "Saving %s..." % (fname_out)
  mio.save(DCOR, fname_out, fmt="%.4f", row_ids=D['row_ids'], col_ids=D['row_ids'])
  if pkl:
    fname_pkl_out = fname_out.rpartition('.')[0]+'.pkl'
    print "Saving %s..." % (fname_pkl_out)
    pickle.dump(DCOR, open(fname_pkl_out,"w"), protocol=-1)
  return DCOR
def main(fname=None, pkl=True, **kwds):
    assert fname
    if isinstance(pkl, basestring) and pkl.lower() in ('f', 'false', 'none'):
        pkl = False
    if 'b' in kwds: kwds['b'] = float(kwds['b'])
    if 'z_th' in kwds: kwds['z_th'] = float(kwds['z_th'])
    if 'err_th' in kwds: kwds['err_th'] = float(kwds['err_th'])
    if 'd_th' in kwds: kwds['d_th'] = float(kwds['d_th'])
    if 'r_th' in kwds: kwds['r_th'] = float(kwds['r_th'])
    print "Loading data..."
    D = mio.load(fname)
    print "Computing all pairs boolean class for a (%d x %d) data matrix (%d x %d result matrix)..." % \
        (D['M'].shape[0], D['M'].shape[1], D['M'].shape[0], D['M'].shape[0])
    CLS, steps, b = compute_all_bool(D['M'], **kwds)
    z = kwds.get('z_th', 3.0)
    r = kwds.get('r_th', 2 / 3)
    if r < 0.5:
        print "WARNING: r<0.5, (r=%f)... are you sure?" % r
    err = kwds.get('err_th', 0.1)
    fname_out = '%s.b%.4f.z%.2f.r%.2f.err%.2f.bool.tab' % (fname, b, z, r, err)
    print "Saving %s..." % (fname_out)
    mio.save(CLS,
             fname_out,
             fmt="%d",
             row_ids=D['row_ids'],
             col_ids=D['row_ids'])
    steps_fname = fname + ".steps.txt"
    print "Saving high/low thresholds to %s in original row order..." % steps_fname
    open(steps_fname, "w").write("\n".join(("%f" % x for x in steps)))
    if pkl:
        fname_pkl_out = fname_out.rpartition('.')[0] + '.pkl'
        print "Saving %s..." % (fname_pkl_out)
        pickle.dump(CLS, open(fname_pkl_out, "w"), protocol=-1)
def main():

    # Experimental Data
    # ----------------------------------------
    make_dir(out_path_exp)
    for cell in cells:
        for stim in stims:
            CLS_D = mio.load(os.path.join(path,
                                          "%s.%s.cls.tab" % (cell, stim)),
                             delimit_c="\t")
            DCOR_D = mio.load(os.path.join(path,
                                           "%s.%s.dcor.tab" % (cell, stim)),
                              delimit_c="\t")
            #BT20.EGF.xnet.adj.tab
            XNET_D = mio.load(os.path.join(path, "%s.%s.xnet.adj.tab" %
                                           (cell, stim)),
                              delimit_c="\t")
            ADJ_D = mio.load(os.path.join(
                path, "%s.%s.inh-combined.adj.tab" % (cell, stim)),
                             delimit_c="\t")
            stem = "%s-%s-%s-Network" % (team, cell, stim)
            adj_to_contest(D=ADJ_D, path_stem=os.path.join(out_path_exp, stem))
            dot_file = "pretty_sep9_%s_%s.dot" % (cell, stim)
            pretty_dot(ADJ_D, XNET_D, DCOR_D, CLS_D,
                       os.path.join(path, dot_file))
    # required as part of submission
    fp = open(os.path.join(out_path_exp, "%s-Network-Writeup.txt" % team), "w")
    fp.write(" ")
    fp.close()

    # In Silico Data
    # ----------------------------------------
    make_dir(out_path_insil)
    ADJ_D = mio.load(os.path.join(path, "insilico.all.sep9.adj.tab"),
                     delimit_c="\t")
    #TeamName-Network-Insilico.sif
    #TeamName-Network-Insilico.eda
    stem = "%s-Network-Insilico" % team
    adj_to_contest(ADJ_D, os.path.join(out_path_insil, stem))
    fp = open(
        os.path.join(out_path_insil, "%s-Network-Insilico-Writeup.txt" % team),
        "w")
    fp.write(" ")
    fp.close()
def main(weighted=True, min_d=0.4, colored=False):
  if isinstance(weighted, basestring) and weighted.lower() in ("f", "false", "0", "na","null"):
    weighted = False
  if isinstance(colored, basestring) and colored.lower() in ("f", "false", "0", "na","null"):
    colored = False
  min_d = float(min_d)
  CLS = mio.load("data/gold.celegans.gse2180.cls.csv")
  DCOR = mio.load("data/gold.celegans.gse2180.dcor.csv")
  if colored:
    colors = load_colors("data/gold.celegans.gse2180.phase.colors.txt")
  else:
    colors = None
  print "digraph {"
  print FONT_STRING
  if colors:
    for k,v in colors.items():
      print '"%s"[color="%s",style=filled,fontcolor=white]' % (k, v)
  for edge in csv_to_graphvis(CLS, DCOR, min_d, weighted=weighted):
    print edge
  print "}"
def main():
  for cell in cells:
    for stim in stims:
      CLS_D = mio.load(os.path.join(path,"%s.%s.%s.combined.csv" % (cell, stim, "cls")), delimit_c=",")
      DCOR_D = mio.load(os.path.join(path,"%s.%s.%s.combined.csv" % (cell, stim, "dcor")), delimit_c=",")
      stem = "%s-%s-%s-Network" % (team,cell,stim)
      save_files(CLS_D, DCOR_D, 0.5, os.path.join(out_path_exp,stem))
  fp = open(os.path.join(out_path_exp,"%s-Network-Writeup.txt"%team),"w")
  fp.write(" ")
  fp.close()
        
  CLS_D = mio.load(os.path.join(path,"insilico.all.cls.combined.csv"))
  DCOR_D = mio.load(os.path.join(path,"insilico.all.dcor.combined.csv"))
  #TeamName-Network-Insilico.sif
  #TeamName-Network-Insilico.eda
  stem = "%s-Network-Insilico" % (team)
  save_files(CLS_D, DCOR_D, 0.5, os.path.join(out_path_insil,stem))
  fp = open(os.path.join(out_path_insil,"%s-Network-Insilico-Writeup.txt"%team),"w")
  fp.write(" ")
  fp.close()
def main():
  ADJ_D = mio.load("data/gold_0.32_dot_nw.adj.csv")
  M = np.array(ADJ_D['M'],dtype=int)
  print M
  P = np.zeros(M.shape, dtype=np.int)
  for i in xrange(M.shape[1]):
    js = list(get_connected(M, i, k=2))
    P[js,i] = 1
  print P
  print M==P
  assert ADJ_D['row_ids'] == ADJ_D['col_ids']
  mio.save(P, open("data/gold.paths.dcor0.32.k2.tab","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d")
Exemple #15
0
def main():

    make_dir(out_path_exp)
    for cell in CELLS:
        for stim in STIMS:

            CLS_D = mio.load(os.path.join(path,
                                          "%s.%s.cls.tab" % (cell, stim)),
                             delimit_c="\t")
            DCOR_D = mio.load(os.path.join(path,
                                           "%s.%s.dcor.tab" % (cell, stim)),
                              delimit_c="\t")
            DB_D = mio.load(os.path.join(path, "%s.%s.db.tab" % (cell, stim)),
                            delimit_c="\t")
            ADJ_D = mio.load(os.path.join(path,
                                          "%s.%s.adj.tab" % (cell, stim)),
                             delimit_c="\t")

            assert CLS_D['M'].shape == DCOR_D['M'].shape
            assert CLS_D['M'].shape[0] == CLS_D['M'].shape[1]
            assert CLS_D['row_ids'] == DCOR_D['row_ids']
            assert CLS_D['row_ids'] == CLS_D['col_ids']
            assert DCOR_D['row_ids'] == DCOR_D['col_ids']
            assert ADJ_D['row_ids'] == DCOR_D['col_ids']
            assert DB_D['row_ids'] == DCOR_D['col_ids']

            stem = "%s-%s-%s-Network" % (team, cell, stim)
            adj_to_contest(D=ADJ_D, path_stem=os.path.join(out_path_exp, stem))
            dot_file = "pretty_sep16_%s_%s.dot" % (cell, stim)
            pretty_dot(ADJ_D, DB_D, DCOR_D, CLS_D,
                       os.path.join(path, dot_file))

    # required as part of submission
    fp = open(os.path.join(out_path_exp, "%s-Network-Writeup.txt" % team), "w")
    fp.write(" ")
    fp.close()
Exemple #16
0
def main(fname=None, as_rows=True, use_weak=False):
    assert fname
    if isinstance(as_rows,
                  basestring) and as_rows.lower() in ('f', 'false', 'none'):
        as_rows = False
    if not use_weak:
        print "Loading boolean class enumeration matrix", fname
    else:
        print "Loading weak class enumeration matrix", fname
    D = mio.load(fname, dtype=np.int)
    M = D['M']
    # verify that enumeration matrices look credible
    if not use_weak:
        Z = np.in1d(M, np.array([0, 1, 2, 3, 4, 5, 6, 7]))
        if not np.all(Z):
            print "%d invalid values in M." % (np.sum(~Z))
            print "up to 20 unrecognized values include..."
            zz = M[Z]
            print np.unique(zz)[1:np.min(20, len(zz))]
    else:
        assert np.all(np.in1d(M, np.array([0, 1, 2, 3, 4, 5])))

    if not as_rows:
        print "Computing distance between all pairs of columns..."
        M = np.transpose(M)
    else:
        print "Computing distance between all pairs of rows..."

    if not use_weak:
        print "Computing Boolean Class distance"
        DIST = all_pairs_bool_dist(M)
        fname_out = fname + '.booldist.tab'
    else:
        print "Computing Weak Class distance"
        DIST = all_pairs_weak_dist(M)
        fname_out = fname + '.weakdist.tab'

    print "Saving boolean class distance matrix as", fname_out
    if as_rows:
        ids = D.get('row_ids', None)
        mio.save(DIST, fp=fname_out, row_ids=ids, col_ids=ids, fmt="%d")
    else:
        ids = D.get('col_ids', None)
        mio.save(DIST, fp=fname_out, row_ids=ids, col_ids=ids, fmt="%d")
    return fname_out
def main():
  # 1: load adj matrix
  ADJ_D = mio.load(ADJ_FNAME, dtype=np.int, force_row_ids=True, force_col_ids=True)
  assert len(ADJ_D['row_ids']) == len(ADJ_D['col_ids'])
  assert len(ADJ_D['row_ids']) == ADJ_D['M'].shape[0]
  assert ADJ_D['M'].shape[0] == ADJ_D['M'].shape[1]

  # 2.1: find paths k=3
  P3 = paths.fill_paths(ADJ_D["M"], k=3)
  mio.save(Pinf, open("data/all_k61_0.5_dot_nw.adj.paths.k3.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")

  # 2.2: find paths k=2
  P2 = paths.fill_paths(ADJ_D["M"], k=2)
  mio.save(Pinf, open("data/all_k61_0.5_dot_nw.adj.paths.k2.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")

  # 3: load ranks
  ranks = load_ranks(open(RANKS_FNAME))
  print ranks
Exemple #18
0
def main(fname=None, as_rows=True, use_weak=False):
  assert fname
  if isinstance(as_rows,basestring) and as_rows.lower() in ('f','false','none'): as_rows = False
  if not use_weak:
    print "Loading boolean class enumeration matrix", fname
  else:
    print "Loading weak class enumeration matrix", fname
  D = mio.load(fname, dtype=np.int)
  M = D['M']
  # verify that enumeration matrices look credible
  if not use_weak:
    Z = np.in1d(M,np.array([0,1,2,3,4,5,6,7]))
    if not np.all(Z):
      print "%d invalid values in M." % (np.sum(~Z))
      print "up to 20 unrecognized values include..."
      zz = M[Z]
      print np.unique(zz)[1:np.min(20, len(zz))]
  else:
    assert np.all(np.in1d(M,np.array([0,1,2,3,4,5])))
  
  if not as_rows:
    print "Computing distance between all pairs of columns..."
    M = np.transpose(M)
  else:
    print "Computing distance between all pairs of rows..."

  if not use_weak:
    print "Computing Boolean Class distance"
    DIST = all_pairs_bool_dist(M)
    fname_out = fname+'.booldist.tab'
  else:
    print "Computing Weak Class distance"
    DIST = all_pairs_weak_dist(M)
    fname_out = fname+'.weakdist.tab'

  print "Saving boolean class distance matrix as", fname_out
  if as_rows:
    ids = D.get('row_ids',None)
    mio.save(DIST, fp=fname_out, row_ids=ids, col_ids=ids, fmt="%d")
  else:
    ids = D.get('col_ids',None)
    mio.save(DIST, fp=fname_out, row_ids=ids, col_ids=ids, fmt="%d")
  return fname_out
def main(fname=None, pkl=True, **kwds):
  assert fname
  if isinstance(pkl, basestring) and pkl.lower() in ('f','false','none'): pkl = False
  if 'err' in kwds: kwds['err'] = int(kwds['err'])
  if 'th' in kwds: kwds['th'] = float(kwds['th'])

  D = mio.load(fname)
  print "Computing all pairs weak boolean class..."
  print "Computing all pairs weak class for a (%d x %d) data matrix (%d x %d result matrix)..." % \
      (D['M'].shape[0], D['M'].shape[1], D['M'].shape[0], D['M'].shape[0])
  WEAK, err, th = compute_all_weak(D['M'], **kwds)
  print "Used parameters err=%d, cutoff th=%f" % (err, th)

  fname_out = "%s.err%d.th%.4f.weak.tab" % (fname, err, th)
  print "Saving %s..." % (fname_out)
  mio.save(WEAK, fname_out, fmt="%d", row_ids=D['row_ids'], col_ids=D['row_ids'])
  if pkl:
    fname_pkl_out = fname_out.rpartition('.')[0]+'.pkl'
    print "Saving %s..." % (fname_pkl_out)
    pickle.dump(WEAK, open(fname_pkl_out,"w"), protocol=-1)
  return WEAK
Exemple #20
0
def main():
    DCOR = script.main(fname=FNAME)
    COR = compute_all_ppc_numpy(M)
    SCI = compute_all_pcc_scipy(M)
    print np.all(np.abs(COR - SCI) < 0.0000000000001)
    DCOR = compute_all_dcor(M)
    DCORL = loop_dcor(M)
    print np.all(np.abs(DCOR - DCORL) < 0.0000000000001)
    DCOR2 = compute_all_dcor_2(M)
    print np.all(np.abs(DCOR - DCOR2) < 0.0000000000001)
    DCOR3 = compute_all_dcor_3(M)
    print np.all(np.abs(DCOR - DCOR3) < 0.0000000000001)
    print np.all(np.abs(DCOR - DCOR3) < 0.1)

    KUN = mio.load("kungold.tab")['M']
    print KUN
    print "kun", np.all(np.abs(DCOR - KUN) < 0.001)
    print "kun", np.all(np.abs(DCORL - KUN) < 0.001)

    print DCOR[5, 3], DCOR[3, 5]
    print DCOR3[5, 3], DCOR3[3, 5]
Exemple #21
0
def main():
    v = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    H = np.matrix([[0, 0, 0, 1], [1, 1, 1, 0], [0, 0, 0, 1]])
    L = np.matrix([[1, 1, 0, 0], [0, 0, 0, 1], [1, 1, 1, 0]])
    print stepfit(v)
    D = mio.load("nice.may3.Eg.expr.gold.celegans.csv")
    M = D['M']
    Steps = []
    for row in M:
        Steps.append(stepfit(row)[0])
    b = 0.3
    #CLS = all_pairs_bool(M, Steps, b, z_th=2.7)
    CLS = all_pairs_bool(M, Steps, b)
    print CLS
    print D.keys()
    mio.save(CLS,
             "nice.test.tab",
             fmt="%d",
             row_ids=D['row_ids'],
             col_ids=D['row_ids'])
    print "OK: saved result to nice.test.tab"
Exemple #22
0
def main():
  DCOR = script.main(fname=FNAME)
  COR = compute_all_ppc_numpy(M)
  SCI = compute_all_pcc_scipy(M)
  print np.all(np.abs(COR-SCI) < 0.0000000000001)
  DCOR = compute_all_dcor(M)
  DCORL = loop_dcor(M)
  print np.all(np.abs(DCOR-DCORL) < 0.0000000000001)
  DCOR2 = compute_all_dcor_2(M)
  print np.all(np.abs(DCOR-DCOR2) < 0.0000000000001)
  DCOR3 = compute_all_dcor_3(M)
  print np.all(np.abs(DCOR-DCOR3) < 0.0000000000001)
  print np.all(np.abs(DCOR-DCOR3) < 0.1)

  KUN = mio.load("kungold.tab")['M']
  print KUN
  print "kun", np.all(np.abs(DCOR-KUN) < 0.001)
  print "kun", np.all(np.abs(DCORL-KUN) < 0.001)
  
  print DCOR[5,3], DCOR[3,5]
  print DCOR3[5,3], DCOR3[3,5]
def main(fname=None, n=100000, dep="dcor", do_abs=False):
  assert fname
  n = int(n)
  if isinstance(do_abs, basestring): 
    do_abs = not do_abs.lower() in ('f','false','none')
  D = mio.load(fname)
  M = D['M']
  #POOL = multiprocessing.Pool()
  if dep=="dcor":
    import dcor
    d = dcor.dcor
    f = lambda x: f_dep(M,d)
    hist_bins = 20
    hist_range = (0,1)
  elif dep=="pcc":
    import scipy.stats
    d = lambda a,b: scipy.stats.pearsonr(a,b)[0]
    f = lambda x: f_dep(M,d)
    if do_abs:
      hist_bins = 20
      hist_range = (0,1)
    else:
      hist_bins = 40
      hist_range = (-1,1)
  else:
    raise Exception, "Unrecognized dependency measure '%s'" % dep
  #Z = np.array(POOL.map(f, xrange(n))) 
  Z = np.array(map(f, xrange(n)))
  if do_abs:
    Z = np.abs(Z)
  Z.sort()
  i = 1
  print "n=%d" % n
  print
  while i<=n:
    print i, i/n, Z[-i]
    i *= 10
  print
  print np.histogram(Z, range=hist_range, bins=hist_bins)
def main():
  
  # In Silico Data Only
  # ----------------------------------------
  INFILE = os.path.join(path,"insilico.manual.sep9.adj.tab")
  make_dir(out_path_insil)
  ADJ_D = mio.load(INFILE, delimit_c="\t")
  stem = "%s-Network-Insilico"%team
  #submission files into submission directory
  #RM = directed_unweighted_trifilt(ADJ_D)
  #print np.sum(RM)
  #ADJ_D['M'][RM==1] = 0
  adj_to_contest(ADJ_D, os.path.join(out_path_insil,stem))
  # nice signed directed network visualization
  directed_dot(ADJ_D, outfile=INFILE+".dot")
  print "Wrote .dot visualization to %s" % INFILE+".dot"

  
  # token writeup into submission directory
  fp = open(os.path.join(out_path_insil,"%s-Network-Insilico-Writeup.txt"%team),"w")
  fp.write(" ")
  fp.close()
  print "Wrote submission files to %s" % out_path_insil
def main():

    # In Silico Data Only
    # ----------------------------------------
    INFILE = os.path.join(path, "insilico.manual.sep9.adj.tab")
    make_dir(out_path_insil)
    ADJ_D = mio.load(INFILE, delimit_c="\t")
    stem = "%s-Network-Insilico" % team
    #submission files into submission directory
    #RM = directed_unweighted_trifilt(ADJ_D)
    #print np.sum(RM)
    #ADJ_D['M'][RM==1] = 0
    adj_to_contest(ADJ_D, os.path.join(out_path_insil, stem))
    # nice signed directed network visualization
    directed_dot(ADJ_D, outfile=INFILE + ".dot")
    print "Wrote .dot visualization to %s" % INFILE + ".dot"

    # token writeup into submission directory
    fp = open(
        os.path.join(out_path_insil, "%s-Network-Insilico-Writeup.txt" % team),
        "w")
    fp.write(" ")
    fp.close()
    print "Wrote submission files to %s" % out_path_insil
Exemple #26
0
def main(fname=None, pkl=True, algorithm="3", outtag="", **kwds):
    assert fname
    if isinstance(pkl, basestring) and pkl.lower() in ('f', 'false', 'none'):
        pkl = False
    print "Loading data from %s..." % fname
    D = mio.load(fname)
    print "Computing all pairs (euclidean) distance correlation from a (%d x %d) data matrix to a (%d x %d) result matrix..." % (
        D['M'].shape[0], D['M'].shape[1], D['M'].shape[0], D['M'].shape[0])
    print "Computing all pairs (euclidean) distance correlation..."

    if algorithm == "1":
        print "Using Algorithm 1: single dot product, n^2*m memory"
        DCOR = compute_all_dcor(D['M'], **kwds)
    elif algorithm == "2":
        print "Using Algorithm 2: multiple dot products, n*m memory"
        DCOR = compute_all_dcor_2(D['M'], **kwds)
    elif algorithm == "3":
        print "Using Algorithm 3: multiple dot products, n*m memory, n choose 2 savings"
        DCOR = compute_all_dcor_3(D['M'], **kwds)
    else:
        raise Exception, "Unknown algorithm %s" % algorithm

    if outtag and outtag[-1] != ".":
        outtag += "."
    fname_out = '%s.%sdcor.tab' % (fname, outtag)
    print "Saving %s..." % (fname_out)
    mio.save(DCOR,
             fname_out,
             fmt="%.4f",
             row_ids=D['row_ids'],
             col_ids=D['row_ids'])
    if pkl:
        fname_pkl_out = fname_out.rpartition('.')[0] + '.pkl'
        print "Saving %s..." % (fname_pkl_out)
        pickle.dump(DCOR, open(fname_pkl_out, "w"), protocol=-1)
    return DCOR
Exemple #27
0
def load_extend_test_datas():
    extend_features_dict = matrix_io.load('extend_test_features.pkl', float)
    extend_features = extend_features_dict['M']
    features_mat = np.mat(extend_features) / 255.0
    return features_mat
Exemple #28
0
def matrix_files_to_flat_graphviz_file(cls_fname=None, dcor_fname=None, out_fname=None, ignore_fname=None, color_fname=None, weak_fname=None, min_d=0, weighted=True, plot_na=False, rank_cluster_fname=None, ignore_nodes=None, do_rank_clust=True, rank_clust_names=False, **kwds):
  """From matrix file names and parameters, write resulting graphviz output to file."""
  assert cls_fname and dcor_fname and out_fname
  weighted = str_true_false(weighted)
  plot_na = str_true_false(plot_na)
  min_d = float(min_d)
  assert min_d >= 0
  if ignore_nodes is not None:
    ignore_nodes = ignore_nodes.split(',')
  if do_rank_clust in ("F",'f','false','FALSE','None', "False", False, None):
    do_rank_clust = False
  else:
    do_rank_clust = True
    
  
  CLS_D = mio.load(cls_fname)
  DCOR_D = mio.load(dcor_fname)
  if color_fname:
    node_styles = load_colors_as_node_style_dict(open(color_fname))
  else:
    node_styles = None

  if rank_clust_names in ("F",'f','false','FALSE','None', False, None):
    rank_clust_names = None
  else:
    rank_clust_names = CLS_D['row_ids']

  if rank_cluster_fname is not None:
    rank_clusters = load_rank_clusters(open(rank_cluster_fname), rank_clust_names)
  else:
    rank_clusters = None
  print rank_clusters
  if not do_rank_clust:
    print "kill ranks..."
    rank_clusters = None
  
  assert CLS_D['row_ids'] == CLS_D['col_ids']
  assert DCOR_D['row_ids'] == DCOR_D['col_ids']
  assert CLS_D['row_ids'] == DCOR_D['row_ids']
  names = CLS_D['row_ids']
  CLS, DCOR = CLS_D['M'], DCOR_D['M']
  assert np.size(CLS,0) == np.size(CLS,1)
  assert np.shape(CLS) == np.shape(DCOR)
  
  if weak_fname:
    WEAK_D = mio.load(weak_fname)
    assert WEAK_D['row_ids'] == WEAK_D['col_ids']
    assert WEAK_D['row_ids'] == names
    WEAK = WEAK_D["M"]
    assert np.shape(WEAK) == np.shape(CLS)
  else:
    WEAK = None
  
  if ignore_fname:
    IGNORE_D = mio.load(ignore_fname, force_row_ids=True, force_col_ids=True)
    assert IGNORE_D['row_ids'] == IGNORE_D['col_ids']
    try:
      assert IGNORE_D['row_ids'] == names
    except AssertionError:
      print IGNORE_D['row_ids']
      print names
      raise
    IGNORE = IGNORE_D["M"]
    assert np.shape(IGNORE) == np.shape(CLS)
  else:
    IGNORE = None
  
  out = open(out_fname, "w")
  G = print_graphviz(names=names, out=out, CLS=CLS, DCOR=DCOR, WEAK=WEAK, IGNORE=IGNORE, node_styles=node_styles, min_d=min_d, weighted=weighted, plot_na=plot_na, rank_clusters=rank_clusters, ignore_nodes=ignore_nodes, **kwds)
  out.close()
  return G
from __init__ import *
import matrix_io as mio
from clusters_prototype_lib import *
import subprocess

min_d=0.13
TAB_PREFIX="/Users/z/Dropbox/biostat/brca/GSE7307.e2fnets.jun19/tab/"
cls_fname=TAB_PREFIX+"jun20.R.GSE7307.TF.BOOL.syms.tab"
dcor_fname=TAB_PREFIX+"jun20.R.GSE7307.TF.DCOR.syms.tab"
weak_fname=TAB_PREFIX+"jun20.R.GSE7307.TF.WEAK.syms.tab"
graphviz_cmd="dot"
outpath_prefix="/Users/z/Desktop/brca_e2f_custom"
clusts_fname="/Users/z/Dropbox/biostat/brca/GSE7307.e2fnets.jun19/tab/jun19.GSE7307.k299.gsplom.clust.names.e2fcustom.txt"
out_path="/Users/z/Desktop/"

CLS_D = mio.load(cls_fname)
DCOR_D = mio.load(dcor_fname)
WEAK_D = mio.load(weak_fname)
CLS = CLS_D['M']
DCOR = DCOR_D['M']
WEAK = WEAK_D['M']
node_names = DCOR_D['row_ids']
CLUSTS = load_clusters(open(clusts_fname))
print CLS.shape, DCOR.shape, WEAK.shape

# Convert cluster names into row ID indices (indexed from zero)
# NOTE: C is a list, not a dict. It is ordered in same order as clust_names
C, clust_names = clust_names_to_row_num_list(CLUSTS, node_names)
print C
print clust_names
def main(pkl_fname=None, row_fname=None, col_fname=None, outdir=None, sig=None, doabs=False, diag=1):
  """
  pkl_fname: path to pickled numpy dependency matrix
  row_fname: path to labeled text matrix with row ids, maybe col ids
  col_fname: optional path to labeled text matrix with col ids
  sig: float of minimum significance
  doabs: flag of whether to use absolute value for significance testing
  diag: if matrix is symmetric, the value of the diagonal
  """
  assert pkl_fname and row_fname and outdir
  make_dir(outdir)
  if doabs:
    abstxt = "T"
  else:
    abstxt = "F"
  out_fname = os.path.join(outdir, os.path.basename(pkl_fname.rpartition('.')[0]))
  if sig:
    out_fname += ".sig%f" % sig
  if doabs:
    out_fname += ".absT"
  out_fname += ".tab"

  print "Text matrix will be saved to: %s" % out_fname
  M = pickle.load(open(pkl_fname))

  # Get row and column labels.
  try:
    D_row = mio.load(row_fname)
    row_names = np.array(D_row['row_ids'])
  except AssertionError:
    row_names = np.array([s.strip('\n\r') for s in open(row_fname)])
  if col_fname is None:
    col_names = np.array(D_row['col_ids'])
  else:
    if row_fname == col_fname:
      col_names = row_names
    else:
      try:
        D_col = mio.load(col_fname)
        col_names = np.array(D_col['row_ids']) # Use row IDs as column IDs in Dependency Matrix
      except AssertionError:
        col_names = np.array([s.strip('\n\r') for s in open(col_fname)])

  if len(row_names) == np.size(M,0) and len(col_names) == np.size(M,1):
    print "Number of rows(%d) and column(%d) names fit matrix size (%d,%d)." % \
        (len(row_names), len(col_names), np.size(M,0), np.size(M,1))
  else:
    n = len(row_names)
    if np.size(M,0) == n*(n-1)//2:
      print "Matrix seems to be n choose 2 upper triangle matrix. Converting to full matrix..."
      M = distance.squareform(M)
      if diag is not None:
        print "Forcing diagonal to be:", diag
        for i in xrange(n):
          M[i,i] = diag
    else:
      raise Exception, "Unknown matrix size %s given #row_ids(%d), #col_ids(%d)" % \
          (np.shape(M), len(row_names), len(col_names))
  

  # Remove insignificant rows and columns; align row/col names
  original_dim = M.shape
  if sig is not None:
    sig = float(sig)
    if not doabs:
      col_max = np.amax(M,0)
      row_max = np.amax(M,1)
    else:
      col_max = np.amax(np.abs(M),0)
      row_max = np.amax(np.abs(M),1)
    M = M[row_max>=sig,:][:,col_max>=sig]
    row_names = row_names[row_max>=sig]
    col_names = col_names[col_max>=sig]
  new_dim = M.shape

  # Dump to text
  now_timestamp = datetime.datetime.now().isoformat('_')
  header = ["Generated on %s from pickled matrix file %s" % (now_timestamp, pkl_fname),
            "Original dimensions: %s, New dimensions: %s" % (original_dim, new_dim),
            "sig: %s, abs: %s" % (str(sig), str(abstxt))]
  print "\n".join(header)
  fp = open(out_fname, "w")
  mio.save(M, fp, ftype="txt", delimit_c="\t", row_ids=list(row_names), col_ids=list(col_names), headers=header)
  fp.close()
  print "Tab matrix saved to %s." % out_fname
  
  return out_fname
#!/usr/bin/python
"""Hack script and notes to generate Jun19 BRCA network-of-networks.

python script.py min_d=0.30 cls_fname=data/D.expr.gold.CLS.apr.19.tab dcor_fname=data/D.expr.gold.DCOR.apr.19.tab color_fname=data/gold.celegans.phase.colors.genes.txt graphviz_cmd=dot weak_fname=data/gold.weak.tab weighted=False outpath_prefix=~/Desktop/gold_0.30_dot_nw

"""
from clusters_prototype_lib import *
import matrix_io as mio
import subprocess

DIR = "/Users/z/Dropbox/biostat/brca/GSE7307.e2fnets/"
PDF_PTN = DIR + "jun19_e2f_clust_pdfs/c%s.fdp.dot.pdf"
SUFFIXES = ['117', '150', '83', '82', '80', '51']
INTER_DCOR_D = mio.load(DIR+"GSE7307.TF.R.299.interest.inter.DCOR.jun19.tab")
INTER_BOOL_D = mio.load(DIR+"GSE7307.TF.R.299.interest.inter.BOOL.jun19.tab")
INTER_WEAK_D = mio.load(DIR+"GSE7307.TF.R.299.interest.inter.WEAK.jun19.tab")
print INTER_DCOR_D['row_ids']
print SUFFIXES
assert INTER_DCOR_D['row_ids'] == SUFFIXES
assert INTER_DCOR_D['row_ids'] == INTER_BOOL_D['row_ids']
assert INTER_DCOR_D['row_ids'] == INTER_WEAK_D['row_ids']
assert INTER_DCOR_D['row_ids'] == INTER_DCOR_D['col_ids']

C_plots = []
for c in SUFFIXES:
  print PDF_PTN%c
  w,h = get_pdf_file_size(PDF_PTN%c)
  C_plots.append((w,h))

clust_out = DIR+"e2f.jun20.clusts.dot"
fp = open(clust_out,"w")
from script_weak import *
import matrix_io as mio

D = mio.load("nice.may3.Eg.expr.gold.celegans.csv")
WEAK = main("nice.may3.Eg.expr.gold.celegans.csv")
print WEAK
print WEAK[3,6], WEAK[6,3]
r3 = np.array(D['M']>0.2,dtype=np.int)[3,]
r6 = np.array(D['M']>0.2,dtype=np.int)[6,]
print D['row_ids'][3], r3
print D['row_ids'][6], r6
print r3-r6
Exemple #33
0
In [12]: %timeit DCOR =  compute_all_dcor(M)
100 loops, best of 3: 5.01 ms per loop

In [19]: %timeit DCORL = test.loop_dcor(M)
10 loops, best of 3: 56.4 ms per loop
"""
from __init__ import *
import matrix_io as mio
import script
import numpy as np
from scipy.spatial.distance import pdist
from scipy.spatial.distance import squareform
import dcor

FNAME = "nice.may3.Eg.expr.gold.celegans.csv"
M = mio.load(FNAME)['M']


def loop_dcor(M):
    m = M.shape[0]
    D = np.zeros((m, m))
    for i, rowi in enumerate(M):
        for j, rowj in enumerate(M):
            D[i, j] = dcor.dcor(rowi, rowj)
    return D


def main():
    DCOR = script.main(fname=FNAME)
    COR = compute_all_ppc_numpy(M)
    SCI = compute_all_pcc_scipy(M)
Exemple #34
0
In [12]: %timeit DCOR =  compute_all_dcor(M)
100 loops, best of 3: 5.01 ms per loop

In [19]: %timeit DCORL = test.loop_dcor(M)
10 loops, best of 3: 56.4 ms per loop
"""
from __init__ import *
import matrix_io as mio
import script
import numpy as np
from scipy.spatial.distance import pdist
from scipy.spatial.distance import squareform
import dcor

FNAME="nice.may3.Eg.expr.gold.celegans.csv"
M = mio.load(FNAME)['M']

def loop_dcor(M):
  m = M.shape[0]
  D = np.zeros((m,m))
  for i,rowi in enumerate(M):
    for j,rowj in enumerate(M):
      D[i,j] = dcor.dcor(rowi,rowj)
  return D

def main():
  DCOR = script.main(fname=FNAME)
  COR = compute_all_ppc_numpy(M)
  SCI = compute_all_pcc_scipy(M)
  print np.all(np.abs(COR-SCI) < 0.0000000000001)
  DCOR = compute_all_dcor(M)
def main():
  # 1: load adj matrix
  ADJ_D = mio.load(ADJ_FNAME, dtype=np.int, force_row_ids=True, force_col_ids=True)
  assert len(ADJ_D['row_ids']) == len(ADJ_D['col_ids'])
  assert len(ADJ_D['row_ids']) == ADJ_D['M'].shape[0]
  assert ADJ_D['M'].shape[0] == ADJ_D['M'].shape[1]

  # 2: find all paths
  Pinf = paths.fill_paths(ADJ_D["M"], k=None)
  mio.save(Pinf, open("data/all_k61_0.5_dot_nw.adj.paths.kinf.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")
  print np.sum(ADJ_D["M"]!=Pinf)
  
  P2 = paths.fill_paths(ADJ_D["M"], k=2)
  mio.save(P2, open("data/all_k61_0.5_dot_nw.adj.paths.k2.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")
  print np.sum(P2!=Pinf)
  
  P3 = paths.fill_paths(ADJ_D["M"], k=3)
  mio.save(P3, open("data/all_k61_0.5_dot_nw.adj.paths.k3.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")
  print np.sum(P3!=Pinf)
  
  P4 = paths.fill_paths(ADJ_D["M"], k=4)
  mio.save(P4, open("data/all_k61_0.5_dot_nw.adj.paths.k4.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")
  print np.sum(P4!=Pinf)
  
  P5 = paths.fill_paths(ADJ_D["M"], k=5)
  mio.save(P5, open("data/all_k61_0.5_dot_nw.adj.paths.k5.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")
  print np.sum(P5!=Pinf)

  P6 = paths.fill_paths(ADJ_D["M"], k=6)
  mio.save(P6, open("data/all_k61_0.5_dot_nw.adj.paths.k6.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")
  print np.sum(P6!=Pinf)

  P7 = paths.fill_paths(ADJ_D["M"], k=7)
  mio.save(P7, open("data/all_k61_0.5_dot_nw.adj.paths.k7.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")
  print np.sum(P7!=Pinf)

  P8 = paths.fill_paths(ADJ_D["M"], k=8)
  mio.save(P8, open("data/all_k61_0.5_dot_nw.adj.paths.k8.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")
  print np.sum(P8!=Pinf)

  P9 = paths.fill_paths(ADJ_D["M"], k=9)
  mio.save(P9, open("data/all_k61_0.5_dot_nw.adj.paths.k9.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")
  print np.sum(P9!=Pinf)

  P10 = paths.fill_paths(ADJ_D["M"], k=10)
  mio.save(P10, open("data/all_k61_0.5_dot_nw.adj.paths.k10.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")
  print np.sum(P10!=Pinf)

  P11 = paths.fill_paths(ADJ_D["M"], k=11)
  mio.save(P11, open("data/all_k61_0.5_dot_nw.adj.paths.k11.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")
  print np.sum(P11!=Pinf)

  P12 = paths.fill_paths(ADJ_D["M"], k=12)
  mio.save(P12, open("data/all_k61_0.5_dot_nw.adj.paths.k12.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")
  print np.sum(P12!=Pinf)

  P13 = paths.fill_paths(ADJ_D["M"], k=13)
  mio.save(P13, open("data/all_k61_0.5_dot_nw.adj.paths.k13.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")
  print np.sum(P13!=Pinf)

  P14 = paths.fill_paths(ADJ_D["M"], k=14)
  mio.save(P14, open("data/all_k61_0.5_dot_nw.adj.paths.k14.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")
  print np.sum(P14!=Pinf)

  P15 = paths.fill_paths(ADJ_D["M"], k=15)
  mio.save(P15, open("data/all_k61_0.5_dot_nw.adj.paths.k15.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")
  print np.sum(P15!=Pinf)

  P16 = paths.fill_paths(ADJ_D["M"], k=16)
  mio.save(P15, open("data/all_k61_0.5_dot_nw.adj.paths.k16.csv","w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")
  print np.sum(P16!=Pinf)
Exemple #36
0
from script_weak import *
import matrix_io as mio

D = mio.load("nice.may3.Eg.expr.gold.celegans.csv")
WEAK = main("nice.may3.Eg.expr.gold.celegans.csv")
print WEAK
print WEAK[3, 6], WEAK[6, 3]
r3 = np.array(D['M'] > 0.2, dtype=np.int)[3, ]
r6 = np.array(D['M'] > 0.2, dtype=np.int)[6, ]
print D['row_ids'][3], r3
print D['row_ids'][6], r6
print r3 - r6
"""
digraph {
  graph [fontname = "helvetica", nodesep=0.300000, splines=ortho, ranksep=0.400000, rank=same];
  node [fontname = "helvetica", color="#000000", style=filled, fillcolor="#ffffff"];
  edge [fontname = "helvetica", penwidth=1];
  "154" -> "107"[color="#222222", penwidth=4.512063];
  "107" -> "343"[color="#4197c7", constraint=false, dir=none, penwidth=6.054689,style=dashed];
}
"""
import matrix_io as mio
import numpy as np

D = mio.load("KO.adj.matrix.tab", dtype=np.int)
M = D['M']
assert M.shape[0]==M.shape[1], M.shape
n = M.shape[0]
for i in xrange(n):
  for j in xrange(n):
    options = {}
    if M[i,j] == 1: # weak activator
      options.update({'color':'#339900'})
    elif M[i,j] == 2: # strong activator
      options.update({'color':'#00ff00'})
    elif M[i,j] == -1: # weak repressor
      options.update({'color':'orange', 'arrowhead':'tee'})
    elif M[i,j] == -2: # strong repressor
      options.update({'color':'red', 'arrowhead':'tee'})
    else:
      continue
    opts = ", ".join(['%s="%s"'%(k,v) for k,v in options.items()])
    print '"%s" -> "%s"[%s]' % (D['col_ids'][j], D['row_ids'][i], opts)
def main():
  # 1: load adj matrix
  ADJ_D = mio.load(ADJ_FNAME, dtype=np.int, force_row_ids=True, force_col_ids=True)
  DCOR_D = mio.load(DCOR_FNAME, force_row_ids=True, force_col_ids=True)
  assert len(ADJ_D['row_ids']) == len(ADJ_D['col_ids'])
  assert len(ADJ_D['row_ids']) == ADJ_D['M'].shape[0]
  assert DCOR_D["row_ids"] == ADJ_D['row_ids']
  assert DCOR_D["row_ids"] == DCOR_D["col_ids"]
  assert ADJ_D['M'].shape[0] == ADJ_D['M'].shape[1]
  n = ADJ_D['M'].shape[0]
  
  # 2.1: find paths k=2
  P2 = paths.fill_paths(ADJ_D["M"], k=2)
  mio.save(P2, open(P2_FNAME,"w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")
  # 2.2: find paths k=3
  P3 = paths.fill_paths(ADJ_D["M"], k=3)
  mio.save(P3, open(P3_FNAME,"w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")
  name_order = ADJ_D['row_ids']

  # 3.1: load ranks
  # (I compiled this list manually) Note that rank elements are 0 indexed in ADJ matrix
  ranks = load_ranks_named(open(RANKS_FNAME), name_order)
  #print ranks
  #print name_order
  #sys.exit(1)
  
  node_clusters = []
  # 4: find clusters in same rank at k=2
  Ignore_Clust = np.zeros(ADJ_D['M'].shape, dtype=np.bool)
  for lvl, r in enumerate(ranks):
    CC = group_in_same_rank(r, P2)
    rnp = np.array(r)
    c = [map(str,rnp[cc]+1) for cc in CC]
    node_clusters.append(c)
    # Ignore edges in equal rank clusters
    for node in r:
      adj = set(np.nonzero(ADJ_D['M'][:,node])[0])
      for e in (adj & set(r)):
        Ignore_Clust[node,e] = True
        # Also remove corresponding opposite direction
        if ADJ_D['M'][node,e] == ADJ_D['M'][e,node] == 1:
          Ignore_Clust[e,node] = True

  print "Clusters"
  for c in node_clusters:
    print c
  print

  # 4.5 attempt to hide lower strength edges without disconnecting nodes
  DCOR = DCOR_D['M']
  AD = ADJ_D['M'].copy()
  Ignore_Low = np.zeros(ADJ_D['M'].shape, dtype=np.bool)
  for i in range(n):
    for j in range(n):
      if i == j: continue
      if DCOR[i,j] < DCOR_TH and AD[j,i]:
        # edge exists from i to j and it is under dCor thresh. can we remove it?
        AD[j,i] = 0
        # undirected edge
        if AD[i,j]:
          AD[i,j] = 0
          if np.sum(AD[:,i]) == 0 or np.sum(AD[j:,]) == 0 or \
             np.sum(AD[:,j]) == 0 or np.sum(AD[i:,]) == 0:
            # no, it disconnects something
            AD[i,j] = 1 
            AD[j,i] = 1
          else:
            Ignore_Low[i,j] = True
            Ignore_Low[j,i] = True
        # directed edge
        else:
          if np.sum(AD[:,i]) == 0 or np.sum(AD[j:,]) == 0:
            # no, it disconnects something
            AD[j,i] = 1
          else:
            Ignore_Low[j,i] = True
  NL = count_edges(Ignore_Low)
  assert np.sum(Ignore_Low & ADJ_D['M']) == np.sum(Ignore_Low)
  print "Too low:", NL
  

  # 5: look for redudant directed edges between levels at least 2 levels apart
  # remove edge if path of equal length already exists
  Ignore_Far = np.zeros(ADJ_D['M'].shape, dtype=np.bool)
  A = ADJ_D['M'].copy()
  A = A & (~Ignore_Low)
  n_far_edges = 0
  for lvl in xrange(len(ranks)-2):
    this_rank = ranks[lvl]
    for dlvl in xrange(lvl+2,len(ranks)):
      delta = dlvl-lvl
      that_rank = ranks[dlvl]
      for top in this_rank:
        for low in that_rank:
          if A[low,top]:     # adj is col->row
            n_far_edges += 1
            A[low,top] = 0 # try removing this link
            # is there an alternate path of equal length to this node?
            conn = paths.is_path(A,top,delta+1)
            if not low in conn:
              A[low,top] = 1 # I guess we need this edge...
            else:
              Ignore_Far[low,top] = True
              # also remove an associated undirected edge
              if A[top,low]:
                Ignore_Far[top,low] = True
                A[top,low] = 0
  print "# Far edges", n_far_edges

  # 6: Print Stats
  assert np.sum(Ignore_Clust & Ignore_Far)==0
  NT = count_edges(ADJ_D['M'])
  NS = count_edges(Ignore_Clust)
  NF = count_edges(Ignore_Far)
  print "Total:", NT
  print "Same Level:", NS
  print "Redundant Far:", NF
  n_rm = NS['total']+NF['total']+NL['total'] # this is wrong
  #$print "removed:", n_rm
  #print "reduction:", n_rm/NT['total']
  
  # 7: Save Edge Ignore Matrix
  Ignore = Ignore_Clust | Ignore_Far | Ignore_Low
  NI = count_edges(Ignore)
  print "Ignored", NI
  print np.sum(Ignore)
  print np.sum(Ignore_Clust | Ignore_Far)
    
  print "Save Ignore Matrix at:", IGNORE_FNAME
  mio.save(Ignore, open(IGNORE_FNAME, "w"), ftype="txt", row_ids=ADJ_D['row_ids'], col_ids=ADJ_D['col_ids'], fmt="%d", delimit_c=",")