def _main(args): if len(args) < 1: print >> sys.stderr, "usage: <mtx_1> [<mtx2>...] < fasta" sys.exit(1) seqname = "" seq = "" for x in sys.stdin: head_re = re.search(">(\S+)", x) if head_re and not seq: seqname = head_re.group(1) continue elif head_re: for x in args: annot = patser_tools.makePatserAnnotation(sequence=seq, matrix=x) for x in annot.getAllFeatures(): if x.tags['strand'] == '-': print "%s\t%s\t%d\t%s\t%f" % ( seqname, x.tags['motif_name'], x.end, x.tags['strand'], x.tags['score']) elif x.tags['strand'] == '+': print "%s\t%s\t%d\t%s\t%f" % ( seqname, x.tags['motif_name'], x.start, x.tags['strand'], x.tags['score']) seqname = head_re.group(1) seq = "" else: seq += x.rstrip()
def patSearch(self): annot = None try: annot = patser_tools.makePatserAnnotation(sequence=self.seq, matrix=self.matrix) except Exception: print "warning: Exception for seq %s" % (self.seq) annot = None self.annotation = annot
def patSearch(self): annot = None #try: annot = patser_tools.makePatserAnnotation(sequence=self.seq, matrix=self.matrix) #except Exception, e: # print "warning: %s for seq %s" % (type(e),self.seq_name) # annot = None self.annotation = annot
def scan_seqs(seqs,matrices): hits = {} name = "" seq = "" header = "" results = "" for s in seqs: nameres = re.search(">(\S+)",s) if nameres and not (name == ""): hits[name] = {'seq' : seq} name = nameres.group(0) seq = "" elif nameres and (name == ""): #print "1" name = nameres.group(0) seq = "" else: seq += s[:-1] #print hits mtx_names = [] for (name,d) in hits.iteritems(): for mtx in matrices: hit_annot = patser_tools.makePatserAnnotation(sequence=d['seq'],matrix=mtx,seqname=name,scorecut=-100) features = hit_annot.getAllFeatures() hit = None if len(features) > 0: max = features[0] for x in features: if x.tags['score'] > max.tags['score']: max = x hit = max else: print >> sys.stderr, "No hit for matrix %s in %s" % (mtx,d['seq']) continue #print hit d[hit.tags['motif_name']] = hit if hit.tags['motif_name'] not in mtx_names: mtx_names.append(hit.tags['motif_name']) #print hits header = "name\tsequence\t", for x in mtx_names: header += "%s_score\t%s_pval\t" % (x,x), for (name,h) in hits.iteritems(): matrices = [x for x in h.keys() if not x == 'seq'] result += "%s\t%s\t" % (name,h['seq']) , for x in matrices: result += str(h[x].tags['score']) + "\t", if 'pval' in h[x].tags.keys(): result += str(h[x].tags['pval']) + "\t", else: result += "-", return (header,result)
def _main(args): if len(args) < 4: print >> sys.stderr, "usage: xls_motif_window.py <xls> <fasta> <matrix_file> <window>" sys.exit(1) fasta = fasta_subseq_2.FastaDB() fasta.openFastaFile(args[1]) xls_regions = [] for x in open(args[0]): spl = x[:-1].split() region = { 'chr': spl[0], 'start': int(spl[1]), 'end': int(spl[2]), 'enrich': spl[7] } region['seq'] = fasta[ region['chr']]['sequence'][region['start']:region['end']] xls_regions.append(region) for r in xls_regions: try: annot = patser_tools.makePatserAnnotation(sequence=r['seq'], matrix=args[2]) except IOError: print >> sys.stderr, "Error in seq %s:%d..%d:" % ( r['chr'], r['start'], r['end']) continue if len(annot.getAllFeatures()) < 1: continue maxhit = annot.getMaxFeature("score") winstart = None winend = None winseq = None if maxhit.tags["strand"] == '+': winstart = r['start'] + (maxhit.start - int(args[3]) / 2) winend = r['start'] + (maxhit.start + int(args[3]) / 2) win_seq = fasta[r['chr']]['sequence'][winstart:winend] else: winstart = r['start'] + ((maxhit.end - 3) - int(args[3]) / 2) winend = r['start'] + ((maxhit.end - 3) + int(args[3]) / 2) win_seq = fasta_subseq_2.revcomp( fasta[r['chr']]['sequence'][winstart:winend]) print ">%s:%d..%d:%s enr=%s mtx=%s" % ( r['chr'], winstart, winend, maxhit.tags['strand'], r['enrich'], maxhit.tags['score']) print win_seq
def _main(args): if len(args) != 3: print "usage: <bed_file> <seq_file> <matrix>" sys.exit(0) fasta = fasta_subseq_2.FastaDB() fasta.openFastaFile(args[1]) bed_annots = [] bed_in = open(args[0]) for line in bed_in: spl = line[:-1].split() fseq = fasta[spl[0]]["sequence"][int(spl[1]):int(spl[2])] if spl[5] == "-": fseq = fasta_subseq_2.revcomp(fseq) #print spl try: patannot = patser_tools.makePatserAnnotation(sequence=fseq, matrix=args[2]) except: continue #print "-" * 30 #print spl #print pp(patannot.getAllFeatures()) bed_annots.append({ "seq": spl[0] + "_" + spl[1] + "_" + spl[2], "annotation": patannot }) for ann in bed_annots: for feat in ann["annotation"].getAllFeatures(): print "%s\t%i\t%i\t%f\t%f\t%s" % ( ann["seq"], feat.st, feat.en, feat.tagset["score"], feat.tagset["pval"], feat.tagset["strand"])
def _main(args): if len(args) < 1: print "usage: patser_list.py [mtx1][mtx2] ... < seqs.fa" sys.exit(1) matrices = args seqs = sys.stdin hits = {} name = "" seq = "" pssms = convertFreqMtx(matrices) for s in seqs: nameres = re.search(">(\S+)", s) if nameres and not (name == ""): hits[name] = {'seq': seq} name = nameres.group(1) seq = "" elif nameres and (name == ""): #print "1" name = nameres.group(1) seq = "" else: seq += s[:-1] if not (name == ""): hits[name] = {'seq': seq} #print hits mtx_names = [] for (name, d) in hits.iteritems(): for mtx in matrices: hit_annot = patser_tools.makePatserAnnotation(sequence=d['seq'], matrix=mtx, seqname=name, scorecut=-100) features = hit_annot.getAllFeatures() hit = None if len(features) > 0: max = features[0] for x in features: if x.tags['score'] > max.tags['score']: max = x hit = max else: print >> sys.stderr, "Sequence %s: No hit for matrix %s in %s" % ( name, mtx, d['seq']) continue #print hit d[hit.tags['motif_name']] = hit if hit.tags['motif_name'] not in mtx_names: mtx_names.append(hit.tags['motif_name']) #print hits print "name\tsequence\t", for x in mtx_names: print "%s_score\t%s_pval\t%s_PSSM_score" % (x, x, x), print "" for (name, h) in hits.iteritems(): matrices = [x for x in h.keys() if not x == 'seq'] print "%s\t%s\t" % (name, h['seq']), for x in matrices: print str(h[x].tags['score']) + "\t", if 'pval' in h[x].tags.keys(): print str(h[x].tags['pval']) + "\t", else: print "-", pssm_scores = [ scoreSeq(pssms[x], h['seq']), scoreSeq(pssms[x], fasta_subseq_2.revcomp(h['seq'])) ] score = None if pssm_scores[1] > pssm_scores[0]: score = pssm_scores[1] else: score = pssm_scores[0] print str(score) + "\t", print ""