Ejemplo n.º 1
0
def maxenr(args):

    if not os.path.exists(args.sample):
        print "File %s does not exist!" % args.sample
        exit(1)

    if not os.path.exists(args.background):
        print "File %s does not exist!" % args.background
        exit(1)

    pwmfile = args.pwmfile
    fg_file = args.sample
    bg_file = args.background

    motifs = dict([(x.id, x) for x in pwmfile_to_motifs(pwmfile)])

    ids = []
    if args.ids:
        ids = args.ids.split(",")
    else:
        ids = motifs.keys()

    fg_jobs = {}
    bg_jobs = {}

    for id in ids:
        if motifs.has_key(id):
            bg_jobs[id] = pool.apply_async(get_scores, (motifs[id], bg_file))
            fg_jobs[id] = pool.apply_async(get_scores, (motifs[id], fg_file))
        else:
            print "Wrong id: %s" % id
            sys.exit()

    print "Motif\t# matches\tMax. enrichment\tScore\tCutoff"

    for id in ids:
        pos = array(fg_jobs[id].get())
        neg = array(bg_jobs[id].get())
        factor = len(neg) / float(len(pos))

        scores = array([s for s in hstack((pos, neg)) if sum(neg >= s) > 1])
        enr = array([(sum(pos >= x) / float(sum(neg >= x))) * factor for x in scores])

        # print len(scores), len(enr)
        # for x,y in zip(enr, scores):
        #    print "%s\t%s" % (x,y)

        max_score = scores[enr.argmax()]
        cutoff = (max_score - motifs[id].pwm_min_score()) / (motifs[id].pwm_max_score() - motifs[id].pwm_min_score())

        print "%s\t%s\t%0.2f\t%0.2f\t%0.3f" % (
            id,
            sum(pos >= scores[enr.argmax()]),
            max(enr),
            scores[enr.argmax()],
            cutoff,
        )
Ejemplo n.º 2
0
def location(args):
    fastafile = args.fastafile
    pwmfile = args.pwmfile

    lwidth = args.width
    if not lwidth:
        f = Fasta(fastafile)
        lwidth = len(f.items()[0][1])
        f = None

    jobs = []
    motifs = pwmfile_to_motifs(pwmfile)
    ids = [motif.id for motif in motifs]
    if args.ids:
        ids = args.ids.split(",")

    for motif in motifs:
        if motif.id in ids:
            outfile = os.path.join("%s_histogram" % motif.id)
            jobs.append(
                    pool.apply_async(
                        motif_localization, 
                        (fastafile,motif,lwidth,outfile, args.cutoff)
                        ))
    
    for job in jobs:
        job.get()
Ejemplo n.º 3
0
def location(args):
    """
    Creates histrogram of motif location.

    Parameters
    ----------
    args : argparse object
        Command line arguments.
    """
    fastafile = args.fastafile
    pwmfile = args.pwmfile

    lwidth = args.width
    if not lwidth:
        f = Fasta(fastafile)
        lwidth = len(f.items()[0][1])
        f = None

    jobs = []
    motifs = pwmfile_to_motifs(pwmfile)
    ids = [motif.id for motif in motifs]
    if args.ids:
        ids = args.ids.split(",")

    for motif in motifs:
        if motif.id in ids:
            outfile = os.path.join("%s_histogram" % motif.id)
            jobs.append(
                pool.apply_async(
                    motif_localization,
                    (fastafile, motif, lwidth, outfile, args.cutoff)))

    for job in jobs:
        job.get()
Ejemplo n.º 4
0
def scan(infile, motifs, cutoff, nreport=1, it=False):
    # Get configuration defaults
    config = MotifConfig()
    # Cutoff for motif scanning, only used if a cutoff is not supplied
    default_cutoff = config.get_default_params()['scan_cutoff']
    # Number of CPUs to use
    ncpus =  config.get_default_params()['ncpus']
    
    cutoffs = parse_cutoff(motifs, cutoff, default_cutoff) 
    
    total_result = {}
    jobs = []
    fa = Fasta(infile)
    for motif in motifs:
        for i in range(0, len(fa), CHUNK):
            total_result[motif] = {}
            jobs.append(pool.apply_async(
                                          scan_fa_with_motif,
                                          (fa[i:i + CHUNK],
                                          motif,
                                          cutoffs[motif.id],
                                          nreport,
                                          )))
    motifkey = dict([(m.id, m) for m in motifs])
    for job in jobs:
        motif, result = job.get()
        
        total_result[motifkey[motif.id]].update(result)
   
    return total_result
Ejemplo n.º 5
0
def scan_it(infile, motifs, cutoff, nreport=1, rc=True):
    # Get configuration defaults
    config = MotifConfig()
    # Cutoff for motif scanning, only used if a cutoff is not supplied
    default_cutoff = config.get_default_params()['scan_cutoff']
    # Number of CPUs to use
    ncpus =  config.get_default_params()['ncpus']
    
    cutoffs = parse_cutoff(motifs, cutoff, default_cutoff) 
    
    jobs = []
    fa = Fasta(infile)
    motifkey = dict([(m.id, m) for m in motifs])
    
    for motif in motifs:
        for i in range(0, len(fa), CHUNK):
            jobs.append(pool.apply_async(
                                          scan_fa_with_motif,
                                          (fa[i:i + CHUNK],
                                          motif,
                                          cutoffs[motif.id],
                                          nreport,
                                          rc,
                                          )))
    
        while len(jobs) > 10:
            job = jobs.pop(0) 
            motif, result = job.get()
            yield motifkey[motif.id], result

    for job in jobs:
        motif, result = job.get()
        yield motifkey[motif.id], result
Ejemplo n.º 6
0
def maxenr(args):

    if not os.path.exists(args.sample):
        print "File %s does not exist!" % args.sample
        exit(1)

    if not os.path.exists(args.background):
        print "File %s does not exist!" % args.background
        exit(1)

    pwmfile = args.pwmfile
    fg_file = args.sample
    bg_file = args.background

    motifs = dict([(x.id, x) for x in pwmfile_to_motifs(pwmfile)])

    ids = []
    if args.ids:
        ids = args.ids.split(",")
    else:
        ids = motifs.keys()

    fg_jobs = {}
    bg_jobs = {}

    for id in ids:
        if motifs.has_key(id):
            bg_jobs[id] = pool.apply_async(get_scores, (
                motifs[id],
                bg_file,
            ))
            fg_jobs[id] = pool.apply_async(get_scores, (
                motifs[id],
                fg_file,
            ))
        else:
            print "Wrong id: %s" % id
            sys.exit()

    print "Motif\t# matches\tMax. enrichment\tScore\tCutoff"

    for id in ids:
        pos = array(fg_jobs[id].get())
        neg = array(bg_jobs[id].get())
        factor = len(neg) / float(len(pos))

        scores = array([s for s in hstack((pos, neg)) if sum(neg >= s) > 1])
        enr = array([(sum(pos >= x) / float(sum(neg >= x))) * factor
                     for x in scores])

        #print len(scores), len(enr)
        #for x,y in zip(enr, scores):
        #    print "%s\t%s" % (x,y)

        max_score = scores[enr.argmax()]
        cutoff = (max_score - motifs[id].pwm_min_score()) / (
            motifs[id].pwm_max_score() - motifs[id].pwm_min_score())

        print "%s\t%s\t%0.2f\t%0.2f\t%0.3f" % (
            id, sum(pos >= scores[enr.argmax()]), max(enr),
            scores[enr.argmax()], cutoff)