def load_tfgroups(args): with open("../data/dream5/pbm/tfids.txt") as f: tfids = sorted([line.rstrip("\r\n") for line in f.readlines()]) # Narrow down the list of ids based on which chunk we've been asked to compute tfids = util.getchunktargets(args, tfids) # Group the TF ids into those that are trained on A versus those that are trained on B tfgroups = [{ "ids": list( set(tfids).intersection(["C_%d" % (i + 1) for i in range(20)] + ["TF_%d" % (i + 1) for i in range(0, 33)])), "train_fold": "A", "test_fold": "B" }, { "ids": list( set(tfids).intersection(["TF_%d" % (i + 1) for i in range(33, 66)])), "train_fold": "B", "test_fold": "A" }] return tfgroups
def load_tfids(args): targetnames = sorted([ filename.replace("_AC" + seq_suffix, "") for filename in os.listdir(datadir) if filename.endswith("_AC" + seq_suffix) ]) chunktargets = util.getchunktargets(args, targetnames) return chunktargets
def get_chunktargets(args): # Determine which targetnames we're responsible for targetnames = gzip.open( "../data/rnac/targets.tsv.gz").readline().rstrip().split("\t") chunktargets = util.getchunktargets(args, targetnames) chunkcols = [ i for i in range(len(targetnames)) if targetnames[i] in chunktargets ] return chunktargets, chunkcols
def load_tfids(args): targetnames = sorted( [ filename.replace("_AC" + seq_suffix, "") for filename in os.listdir(datadir) if filename.endswith("_AC" + seq_suffix) ] ) chunktargets = util.getchunktargets(args, targetnames) return chunktargets
def load_tfids(args): targetnames = sorted( list( set([ filename.split(".")[0].rsplit("_", 1)[0] for filename in os.listdir(datadir) if not os.path.isdir(datadir + "/" + filename) ]))) chunktargets = util.getchunktargets(args, targetnames) return chunktargets
def load_tfgroups(args): with open("../data/dream5/pbm/tfids.txt") as f: tfids = sorted([line.rstrip("\r\n") for line in f.readlines()]) # Narrow down the list of ids based on which chunk we've been asked to compute tfids = util.getchunktargets(args, tfids) # Group the TF ids into those that are trained on A versus those that are trained on B tfgroups = [ {"ids" : list(set(tfids).intersection(["C_%d"%(i+1) for i in range(20)] + ["TF_%d"%(i+1) for i in range( 0,33)])), "train_fold" : "A", "test_fold" : "B" }, {"ids" : list(set(tfids).intersection( ["TF_%d"%(i+1) for i in range(33,66)])), "train_fold" : "B", "test_fold" : "A" }] return tfgroups
def load_tfids(args): targetnames = sorted(list(set([filename.split(".")[0].rsplit("_",1)[0] for filename in os.listdir(datadir) if not os.path.isdir(datadir+"/"+filename)]))) chunktargets = util.getchunktargets(args, targetnames) return chunktargets