예제 #1
0
    def testReader(self):

        # test basic format: i.e. for jaspar
        wms = [wm for wm in pwm.Reader(StringIO(basicPwm),format="basic", \
                          background=background,score_correction=False)]
        assert len(wms) == 1

        # test transfac format
        wms = [wm for wm in pwm.Reader(StringIO(transfacPwm),format="transfac", \
                          background=background,score_correction=False)]
        assert len(wms) == 1

        wm = wms[0]
        dScores = wm.score_seq(dSeq)
        assert len(dScores) == 2
        assert "%.4f %.4f %.4f %.4f" % (dScores[0][0], dScores[0][1],
                                        dScores[1][0],
                                        dScores[1][1]) == dScoresExpected

        qdSeq = []
        for (ix, nt) in enumerate(dSeq):
            qdSeq.append(dict())
            qdSeq[ix][nt] = 1.0
        qScores = wm.score_seq(qdSeq)
        assert len(qScores) == 2
        assert "%.4f %.4f %.4f %.4f" % (qScores[0][0], qScores[0][1],
                                        qScores[1][0],
                                        qScores[1][1]) == dScoresExpected

        qScores = wm.score_seq(qSeq)
        assert len(qScores) == 1
        assert "%.4f %.4f" % (qScores[0][0], qScores[0][1]) == qScoresExpected
예제 #2
0
def main():

    pwm_file = sys.argv[1]
    splist = sys.argv[2]
    if len(sys.argv) == 4:
        inmaf = open(sys.argv[3])
    else:
        inmaf = sys.stdin

    # read alignment species
    species = []
    for sp in splist.split(','):
        species.append(sp)

    # read weight matrices
    pwm = {}
    for wm in pwmx.Reader(open(pwm_file), format='basic'):
        pwm[wm.id] = wm

    fbunch = {}
    for scoremax, index, headers in MafScorer(pwm, species, inmaf):
        for k, matrix in scoremax.items():
            fname = k + '.mx'
            if fname not in fbunch:
                fbunch[fname] = open(fname, 'w')
                print("Writing", fname, file=sys.stderr)

            for i in range(len(matrix)):
                for j in range(len(matrix[i])):
                    print("%.2f" % matrix[i][j], end=' ', file=fbunch[fname])
                print(file=fbunch[fname])

    for file in fbunch.values():
        file.close()
예제 #3
0
def main():

    if len(sys.argv) < 6:
        print("%s transfac|basic pwmfile inmaf threshold spec1,spec2,... " %
              sys.argv[0],
              file=sys.stderr)
        sys.exit(0)

    pwm = {}
    format = sys.argv[1]
    for wm in pwmx.Reader(open(sys.argv[2]), format=format):
        pwm[wm.id] = wm

    inmaf = open(sys.argv[3])
    threshold = float(sys.argv[4])

    species = []

    for sp in sys.argv[5].split(','):
        species.append(sp)

    for maf in align_maf.Reader(inmaf):
        mafchrom = maf.components[0].src.split('.')[1]
        mafstart = maf.components[0].start
        mafend = maf.components[0].end
        reftext = maf.components[0].text

        # maf block scores for each matrix
        for scoremax, width, headers in MafBlockScorer(pwm, species, maf):
            blocklength = width
            mafsrc, mafstart, mafend = headers[0]
            mafchrom = mafsrc.split('.')[1]

            # lists of scores for each position in scoremax
            for id, mx in scoremax.items():
                for offset in range(blocklength):

                    # scan all species with threshold
                    for i in range(len(species)):
                        if mx[i][offset] > threshold:
                            refstart = mafstart + offset - reftext.count(
                                '-', 0, offset)
                            refend = refstart + len(pwm[id])
                            data = " ".join([
                                "%.2f" % mx[x][offset]
                                for x in range(len(species))
                            ])
                            # underscore spaces in the name
                            print(mafchrom, refstart, refend,
                                  id.replace(' ', '_'), data)
                            break
def main():

    if len(sys.argv) < 5:
        print >> sys.stderr, "%s transfac|basic pwmfile inmaf threshold [motif]" % sys.argv[
            0]
        sys.exit(2)

    r = pwmx.Reader(open(sys.argv[2]), format=sys.argv[1])
    pwm = iter(r).next()
    inmaf = open(sys.argv[3])
    threshold = float(sys.argv[4])
    if len(sys.argv) > 5: motif = sys.argv[5]
    else: motif = None

    for maf in align_maf.Reader(inmaf):
        for mafmotif, pwm_score, motif_score in MafMotifSelect(
                maf, pwm, motif, threshold):
            #mafwrite( mafmotif,pwm_score,motif_score)
            print mafmotif, pwm_score, motif_score
            print 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzz'
예제 #5
0


#FOX 6-mer motif

fm = """>FOX6
0   0   0   100
0   0   100   0
0   100   0  0
100   0   0  0
0   0   0   100
0   0   100   0
"""
background = { 'A':.28,'C':.21, 'G':.24, 'T':.27 } #genome background. not transcriptome :(

FOXwm = [w for w in pwm.Reader(StringIO.StringIO(fm),format="basic", background=background,score_correction=True)][0]

hg19Phylo = hg19Phylogeny()
fasta = pyfasta.Fasta("/home/lovci/data/Genome/hg19/all.fa", flatten_inplace=True)


def alnToPWM(aln, id = "id", background=background, nSpecies = 46.0):
    #import StringIO
    #s = StringIO.StringIO()
    #s.write(">" + id + "\n")


    sz = aln.components[0].size
    ar = np.ndarray( shape=(sz,4))

    for rowN, i in enumerate(aln.column_iter()):