Example #1
0
def load_tfgroups(args):
    with open("../data/dream5/pbm/tfids.txt") as f:
        tfids = sorted([line.rstrip("\r\n") for line in f.readlines()])

    # Narrow down the list of ids based on which chunk we've been asked to compute
    tfids = util.getchunktargets(args, tfids)

    # Group the TF ids into those that are trained on A versus those that are trained on B
    tfgroups = [{
        "ids":
        list(
            set(tfids).intersection(["C_%d" % (i + 1) for i in range(20)] +
                                    ["TF_%d" % (i + 1)
                                     for i in range(0, 33)])),
        "train_fold":
        "A",
        "test_fold":
        "B"
    }, {
        "ids":
        list(
            set(tfids).intersection(["TF_%d" % (i + 1)
                                     for i in range(33, 66)])),
        "train_fold":
        "B",
        "test_fold":
        "A"
    }]

    return tfgroups
Example #2
0
def load_tfids(args):
    targetnames = sorted([
        filename.replace("_AC" + seq_suffix, "")
        for filename in os.listdir(datadir)
        if filename.endswith("_AC" + seq_suffix)
    ])
    chunktargets = util.getchunktargets(args, targetnames)
    return chunktargets
Example #3
0
def get_chunktargets(args):
    # Determine which targetnames we're responsible for
    targetnames = gzip.open(
        "../data/rnac/targets.tsv.gz").readline().rstrip().split("\t")
    chunktargets = util.getchunktargets(args, targetnames)
    chunkcols = [
        i for i in range(len(targetnames)) if targetnames[i] in chunktargets
    ]
    return chunktargets, chunkcols
Example #4
0
def load_tfids(args):
    targetnames = sorted(
        [
            filename.replace("_AC" + seq_suffix, "")
            for filename in os.listdir(datadir)
            if filename.endswith("_AC" + seq_suffix)
        ]
    )
    chunktargets = util.getchunktargets(args, targetnames)
    return chunktargets
Example #5
0
def load_tfids(args):
    targetnames = sorted(
        list(
            set([
                filename.split(".")[0].rsplit("_", 1)[0]
                for filename in os.listdir(datadir)
                if not os.path.isdir(datadir + "/" + filename)
            ])))
    chunktargets = util.getchunktargets(args, targetnames)
    return chunktargets
Example #6
0
def load_tfgroups(args):
    with open("../data/dream5/pbm/tfids.txt") as f:
        tfids = sorted([line.rstrip("\r\n") for line in f.readlines()])

    # Narrow down the list of ids based on which chunk we've been asked to compute
    tfids  = util.getchunktargets(args, tfids)

    # Group the TF ids into those that are trained on A versus those that are trained on B
    tfgroups = [ {"ids" : list(set(tfids).intersection(["C_%d"%(i+1) for i in range(20)] 
                                                      + ["TF_%d"%(i+1) for i in range( 0,33)])), "train_fold" : "A", "test_fold" : "B" },
                 {"ids" : list(set(tfids).intersection( ["TF_%d"%(i+1) for i in range(33,66)])), "train_fold" : "B", "test_fold" : "A" }]

    return tfgroups
Example #7
0
def load_tfids(args):
    targetnames = sorted(list(set([filename.split(".")[0].rsplit("_",1)[0] for filename in os.listdir(datadir) if not os.path.isdir(datadir+"/"+filename)])))
    chunktargets = util.getchunktargets(args, targetnames)
    return chunktargets