Esempio n. 1
0
def _main(args):

    if len(args) < 1:
        print >> sys.stderr, "usage: <mtx_1> [<mtx2>...] < fasta"
        sys.exit(1)

    seqname = ""
    seq = ""
    for x in sys.stdin:
        head_re = re.search(">(\S+)", x)
        if head_re and not seq:
            seqname = head_re.group(1)
            continue
        elif head_re:
            for x in args:
                annot = patser_tools.makePatserAnnotation(sequence=seq,
                                                          matrix=x)
                for x in annot.getAllFeatures():
                    if x.tags['strand'] == '-':
                        print "%s\t%s\t%d\t%s\t%f" % (
                            seqname, x.tags['motif_name'], x.end,
                            x.tags['strand'], x.tags['score'])
                    elif x.tags['strand'] == '+':
                        print "%s\t%s\t%d\t%s\t%f" % (
                            seqname, x.tags['motif_name'], x.start,
                            x.tags['strand'], x.tags['score'])

            seqname = head_re.group(1)
            seq = ""
        else:
            seq += x.rstrip()
Esempio n. 2
0
 def patSearch(self):
     annot = None
     try:
         annot = patser_tools.makePatserAnnotation(sequence=self.seq,
                                                   matrix=self.matrix)
     except Exception:
         print "warning: Exception for seq %s" % (self.seq)
         annot = None
     self.annotation = annot
 def patSearch(self):
     annot = None
     #try:
     annot = patser_tools.makePatserAnnotation(sequence=self.seq,
                                               matrix=self.matrix)
     #except Exception, e:
     #    print "warning: %s for seq %s" % (type(e),self.seq_name)
     #    annot = None
     self.annotation = annot
def scan_seqs(seqs,matrices):
    hits = {}
    name = ""
    seq = ""
    header = ""
    results = ""
    for s in seqs:
        nameres = re.search(">(\S+)",s)

        if nameres and not (name == ""):
            hits[name] = {'seq' : seq}
            name = nameres.group(0)
            seq = ""
        elif nameres and (name == ""):
            #print "1"
            name = nameres.group(0)
            seq = ""
        else:
            seq += s[:-1]

    #print hits
    mtx_names = []
    for (name,d) in hits.iteritems():
        for mtx in matrices:
            hit_annot = patser_tools.makePatserAnnotation(sequence=d['seq'],matrix=mtx,seqname=name,scorecut=-100)
            features = hit_annot.getAllFeatures()
            hit = None
            if len(features) > 0:
                max = features[0]
                for x in features:
                    if x.tags['score'] > max.tags['score']:
                        max = x
                hit = max
            else:
                print >> sys.stderr, "No hit for matrix %s in %s" % (mtx,d['seq'])
                continue
            #print hit
            d[hit.tags['motif_name']] = hit
            if hit.tags['motif_name'] not in mtx_names:
                mtx_names.append(hit.tags['motif_name'])
    #print hits
    header = "name\tsequence\t",
    for x in mtx_names:
        header += "%s_score\t%s_pval\t" % (x,x),
    for (name,h) in hits.iteritems():
        matrices = [x for x in h.keys() if not x == 'seq']
        result += "%s\t%s\t" % (name,h['seq']) ,
        for x in matrices:
            result += str(h[x].tags['score']) + "\t",
            if 'pval' in h[x].tags.keys():
                result += str(h[x].tags['pval']) + "\t",
            else:
                result += "-",
    return (header,result)
def _main(args):

    if len(args) < 4:
        print >> sys.stderr, "usage: xls_motif_window.py <xls> <fasta> <matrix_file> <window>"
        sys.exit(1)

    fasta = fasta_subseq_2.FastaDB()
    fasta.openFastaFile(args[1])
    xls_regions = []
    for x in open(args[0]):
        spl = x[:-1].split()
        region = {
            'chr': spl[0],
            'start': int(spl[1]),
            'end': int(spl[2]),
            'enrich': spl[7]
        }
        region['seq'] = fasta[
            region['chr']]['sequence'][region['start']:region['end']]
        xls_regions.append(region)

    for r in xls_regions:
        try:
            annot = patser_tools.makePatserAnnotation(sequence=r['seq'],
                                                      matrix=args[2])
        except IOError:
            print >> sys.stderr, "Error in seq %s:%d..%d:" % (
                r['chr'], r['start'], r['end'])
            continue
        if len(annot.getAllFeatures()) < 1:
            continue
        maxhit = annot.getMaxFeature("score")
        winstart = None
        winend = None
        winseq = None
        if maxhit.tags["strand"] == '+':
            winstart = r['start'] + (maxhit.start - int(args[3]) / 2)
            winend = r['start'] + (maxhit.start + int(args[3]) / 2)
            win_seq = fasta[r['chr']]['sequence'][winstart:winend]
        else:
            winstart = r['start'] + ((maxhit.end - 3) - int(args[3]) / 2)
            winend = r['start'] + ((maxhit.end - 3) + int(args[3]) / 2)
            win_seq = fasta_subseq_2.revcomp(
                fasta[r['chr']]['sequence'][winstart:winend])
        print ">%s:%d..%d:%s enr=%s mtx=%s" % (
            r['chr'], winstart, winend, maxhit.tags['strand'], r['enrich'],
            maxhit.tags['score'])
        print win_seq
Esempio n. 6
0
def _main(args):

    if len(args) != 3:
        print "usage: <bed_file> <seq_file> <matrix>"
        sys.exit(0)

    fasta = fasta_subseq_2.FastaDB()
    fasta.openFastaFile(args[1])

    bed_annots = []
    bed_in = open(args[0])

    for line in bed_in:

        spl = line[:-1].split()
        fseq = fasta[spl[0]]["sequence"][int(spl[1]):int(spl[2])]
        if spl[5] == "-":
            fseq = fasta_subseq_2.revcomp(fseq)
        #print spl
        try:
            patannot = patser_tools.makePatserAnnotation(sequence=fseq,
                                                         matrix=args[2])
        except:
            continue
        #print "-" * 30
        #print spl
        #print pp(patannot.getAllFeatures())
        bed_annots.append({
            "seq": spl[0] + "_" + spl[1] + "_" + spl[2],
            "annotation": patannot
        })

    for ann in bed_annots:
        for feat in ann["annotation"].getAllFeatures():
            print "%s\t%i\t%i\t%f\t%f\t%s" % (
                ann["seq"], feat.st, feat.en, feat.tagset["score"],
                feat.tagset["pval"], feat.tagset["strand"])
Esempio n. 7
0
def _main(args):

    if len(args) < 1:
        print "usage: patser_list.py [mtx1][mtx2] ... < seqs.fa"
        sys.exit(1)

    matrices = args
    seqs = sys.stdin
    hits = {}
    name = ""
    seq = ""
    pssms = convertFreqMtx(matrices)
    for s in seqs:
        nameres = re.search(">(\S+)", s)

        if nameres and not (name == ""):
            hits[name] = {'seq': seq}
            name = nameres.group(1)
            seq = ""
        elif nameres and (name == ""):
            #print "1"
            name = nameres.group(1)
            seq = ""
        else:
            seq += s[:-1]
    if not (name == ""):
        hits[name] = {'seq': seq}

    #print hits

    mtx_names = []
    for (name, d) in hits.iteritems():
        for mtx in matrices:
            hit_annot = patser_tools.makePatserAnnotation(sequence=d['seq'],
                                                          matrix=mtx,
                                                          seqname=name,
                                                          scorecut=-100)
            features = hit_annot.getAllFeatures()
            hit = None
            if len(features) > 0:
                max = features[0]
                for x in features:
                    if x.tags['score'] > max.tags['score']:
                        max = x
                hit = max
            else:
                print >> sys.stderr, "Sequence %s: No hit for matrix %s in %s" % (
                    name, mtx, d['seq'])
                continue
            #print hit
            d[hit.tags['motif_name']] = hit
            if hit.tags['motif_name'] not in mtx_names:
                mtx_names.append(hit.tags['motif_name'])
    #print hits
    print "name\tsequence\t",
    for x in mtx_names:
        print "%s_score\t%s_pval\t%s_PSSM_score" % (x, x, x),
    print ""
    for (name, h) in hits.iteritems():
        matrices = [x for x in h.keys() if not x == 'seq']
        print "%s\t%s\t" % (name, h['seq']),
        for x in matrices:
            print str(h[x].tags['score']) + "\t",
            if 'pval' in h[x].tags.keys():
                print str(h[x].tags['pval']) + "\t",
            else:
                print "-",
            pssm_scores = [
                scoreSeq(pssms[x], h['seq']),
                scoreSeq(pssms[x], fasta_subseq_2.revcomp(h['seq']))
            ]
            score = None
            if pssm_scores[1] > pssm_scores[0]:
                score = pssm_scores[1]
            else:
                score = pssm_scores[0]
            print str(score) + "\t",
        print ""