def maxenr(args): if not os.path.exists(args.sample): print "File %s does not exist!" % args.sample exit(1) if not os.path.exists(args.background): print "File %s does not exist!" % args.background exit(1) pwmfile = args.pwmfile fg_file = args.sample bg_file = args.background motifs = dict([(x.id, x) for x in pwmfile_to_motifs(pwmfile)]) ids = [] if args.ids: ids = args.ids.split(",") else: ids = motifs.keys() fg_jobs = {} bg_jobs = {} for id in ids: if motifs.has_key(id): bg_jobs[id] = pool.apply_async(get_scores, (motifs[id], bg_file)) fg_jobs[id] = pool.apply_async(get_scores, (motifs[id], fg_file)) else: print "Wrong id: %s" % id sys.exit() print "Motif\t# matches\tMax. enrichment\tScore\tCutoff" for id in ids: pos = array(fg_jobs[id].get()) neg = array(bg_jobs[id].get()) factor = len(neg) / float(len(pos)) scores = array([s for s in hstack((pos, neg)) if sum(neg >= s) > 1]) enr = array([(sum(pos >= x) / float(sum(neg >= x))) * factor for x in scores]) # print len(scores), len(enr) # for x,y in zip(enr, scores): # print "%s\t%s" % (x,y) max_score = scores[enr.argmax()] cutoff = (max_score - motifs[id].pwm_min_score()) / (motifs[id].pwm_max_score() - motifs[id].pwm_min_score()) print "%s\t%s\t%0.2f\t%0.2f\t%0.3f" % ( id, sum(pos >= scores[enr.argmax()]), max(enr), scores[enr.argmax()], cutoff, )
def location(args): fastafile = args.fastafile pwmfile = args.pwmfile lwidth = args.width if not lwidth: f = Fasta(fastafile) lwidth = len(f.items()[0][1]) f = None jobs = [] motifs = pwmfile_to_motifs(pwmfile) ids = [motif.id for motif in motifs] if args.ids: ids = args.ids.split(",") for motif in motifs: if motif.id in ids: outfile = os.path.join("%s_histogram" % motif.id) jobs.append( pool.apply_async( motif_localization, (fastafile,motif,lwidth,outfile, args.cutoff) )) for job in jobs: job.get()
def location(args): """ Creates histrogram of motif location. Parameters ---------- args : argparse object Command line arguments. """ fastafile = args.fastafile pwmfile = args.pwmfile lwidth = args.width if not lwidth: f = Fasta(fastafile) lwidth = len(f.items()[0][1]) f = None jobs = [] motifs = pwmfile_to_motifs(pwmfile) ids = [motif.id for motif in motifs] if args.ids: ids = args.ids.split(",") for motif in motifs: if motif.id in ids: outfile = os.path.join("%s_histogram" % motif.id) jobs.append( pool.apply_async( motif_localization, (fastafile, motif, lwidth, outfile, args.cutoff))) for job in jobs: job.get()
def scan(infile, motifs, cutoff, nreport=1, it=False): # Get configuration defaults config = MotifConfig() # Cutoff for motif scanning, only used if a cutoff is not supplied default_cutoff = config.get_default_params()['scan_cutoff'] # Number of CPUs to use ncpus = config.get_default_params()['ncpus'] cutoffs = parse_cutoff(motifs, cutoff, default_cutoff) total_result = {} jobs = [] fa = Fasta(infile) for motif in motifs: for i in range(0, len(fa), CHUNK): total_result[motif] = {} jobs.append(pool.apply_async( scan_fa_with_motif, (fa[i:i + CHUNK], motif, cutoffs[motif.id], nreport, ))) motifkey = dict([(m.id, m) for m in motifs]) for job in jobs: motif, result = job.get() total_result[motifkey[motif.id]].update(result) return total_result
def scan_it(infile, motifs, cutoff, nreport=1, rc=True): # Get configuration defaults config = MotifConfig() # Cutoff for motif scanning, only used if a cutoff is not supplied default_cutoff = config.get_default_params()['scan_cutoff'] # Number of CPUs to use ncpus = config.get_default_params()['ncpus'] cutoffs = parse_cutoff(motifs, cutoff, default_cutoff) jobs = [] fa = Fasta(infile) motifkey = dict([(m.id, m) for m in motifs]) for motif in motifs: for i in range(0, len(fa), CHUNK): jobs.append(pool.apply_async( scan_fa_with_motif, (fa[i:i + CHUNK], motif, cutoffs[motif.id], nreport, rc, ))) while len(jobs) > 10: job = jobs.pop(0) motif, result = job.get() yield motifkey[motif.id], result for job in jobs: motif, result = job.get() yield motifkey[motif.id], result
def maxenr(args): if not os.path.exists(args.sample): print "File %s does not exist!" % args.sample exit(1) if not os.path.exists(args.background): print "File %s does not exist!" % args.background exit(1) pwmfile = args.pwmfile fg_file = args.sample bg_file = args.background motifs = dict([(x.id, x) for x in pwmfile_to_motifs(pwmfile)]) ids = [] if args.ids: ids = args.ids.split(",") else: ids = motifs.keys() fg_jobs = {} bg_jobs = {} for id in ids: if motifs.has_key(id): bg_jobs[id] = pool.apply_async(get_scores, ( motifs[id], bg_file, )) fg_jobs[id] = pool.apply_async(get_scores, ( motifs[id], fg_file, )) else: print "Wrong id: %s" % id sys.exit() print "Motif\t# matches\tMax. enrichment\tScore\tCutoff" for id in ids: pos = array(fg_jobs[id].get()) neg = array(bg_jobs[id].get()) factor = len(neg) / float(len(pos)) scores = array([s for s in hstack((pos, neg)) if sum(neg >= s) > 1]) enr = array([(sum(pos >= x) / float(sum(neg >= x))) * factor for x in scores]) #print len(scores), len(enr) #for x,y in zip(enr, scores): # print "%s\t%s" % (x,y) max_score = scores[enr.argmax()] cutoff = (max_score - motifs[id].pwm_min_score()) / ( motifs[id].pwm_max_score() - motifs[id].pwm_min_score()) print "%s\t%s\t%0.2f\t%0.2f\t%0.3f" % ( id, sum(pos >= scores[enr.argmax()]), max(enr), scores[enr.argmax()], cutoff)