Esempio n. 1
0
def load_traindata(tfid, args):
    print "load_traindata: %s" % tfid
    maxrows = 10000 if args.quick else None
    trdata = util.load_seq("%s/%s_A.seq.gz" % (datadir, tfid),
                           minrows=100000,
                           maxrows=maxrows)
    trdata.targetnames = [tfid]
    return trdata
def load_traindata(tfid, args):
    print "load_traindata: %s"%tfid
    maxrows = 10000 if args.quick else None
    minrows = 100000
    trdata = util.load_seq("%s/%s_AC%s" % (args.indir,tfid,seq_suffix), minrows=minrows, maxrows=maxrows)
    trdata.targetnames = [tfid]
    if args.mode == "top":
        trdata = trdata[:500] # Only top 500 even peaks (A); top 500 odd (B) are stored in the corresponding _B.seq.gz file
    elif args.mode == "all":
        pass # Top 500 even (A) plus everything else (C)
    else:
        quit("Unrecognized mode \"%s\". Expected \"top\" or \"all\".")
    return trdata
Esempio n. 3
0
def load_traindata(tfid, args):
    print "load_traindata: %s" % tfid
    maxrows = 10000 if args.quick else None
    minrows = 100000
    trdata = util.load_seq("%s/%s_AC%s" % (datadir, tfid, seq_suffix),
                           minrows=minrows,
                           maxrows=maxrows)
    trdata.targetnames = [tfid]
    if args.mode == "top":
        trdata = trdata[:
                        500]  # Only top 500 even peaks (A); top 500 odd (B) are stored in the corresponding _B.seq.gz file
    elif args.mode == "all":
        pass  # Top 500 even (A) plus everything else (C)
    else:
        quit("Unrecognized mode \"%s\". Expected \"top\" or \"all\".")
    return trdata
Esempio n. 4
0
def load_testdata(tedata, tfids, args):
    if tedata is not None:
        return tedata
    if "encode" in datadir:
        maxrows = 10000
    elif "chip" in datadir:
        maxrows = 10000
    else:
        maxrows = None
    all_tedata = {}
    for tfid in tfids:
        print "load_testdata: %s ..." % tfid,
        tedata = util.load_seq("%s/%s_B%s" % (datadir, tfid, seq_suffix), minrows=10000, maxrows=maxrows)
        tedata.targetnames = [tfid]
        all_tedata[tfid] = tedata
        print "done"
    return all_tedata
Esempio n. 5
0
def load_testdata(tedata, tfids, args):
    if tedata is not None:
        return tedata
    if "encode" in datadir:
        maxrows = 10000
    elif "chip" in datadir:
        maxrows = 10000
    else:
        maxrows = None
    all_tedata = {}
    for tfid in tfids:
        print "load_testdata: %s ..." % tfid,
        tedata = util.load_seq("%s/%s_B%s" % (datadir, tfid, seq_suffix),
                               minrows=10000,
                               maxrows=maxrows)
        tedata.targetnames = [tfid]
        all_tedata[tfid] = tedata
        print "done"
    return all_tedata
Esempio n. 6
0
def load_traindata(tfid, args):
    print "load_traindata: %s"%tfid
    maxrows = 10000 if args.quick else None
    trdata = util.load_seq("%s/%s_A.seq.gz" % (datadir,tfid), minrows=100000, maxrows=maxrows)
    trdata.targetnames = [tfid]
    return trdata