def postProcessOutput(self):

        super(BlastOutput6ParserAnalysis,self).postProcessOutput()

        data   = {}

        file   = self.input_files[0]

        with open(file) as fp:

            for line in fp:

                line = line.rstrip('\n')
                ff   = line.split('\t')

                qid = ff[0]
                hid = ff[1]
                pid = float(ff[2])
                alnlen = ff[3]
                mm     = int(ff[4])
                gaps   = int(ff[5])
                qstart = int(ff[6])
                qend   = int(ff[7])
                hstart = int(ff[8])
                hend   = int(ff[9])
                exval   = float(ff[10])
                score  = float(ff[11])

                feat = Feature()

                feat.qid = qid
                feat.qstart = qstart
                feat.qend   = qend
                feat.hid    = hid
                feat.hstart = hstart
                feat.hend   = hend

                feat.pid = pid
                feat.score = score
                
                feat.mm = mm
                feat.gaps = gaps
                feat.exval = exval

                if len(ff) > 12:
                    feat.qlen = int(ff[12])
                    feat.hlen = int(ff[13])
                    feat.qseq = ff[14]
                    feat.hseq = ff[15]

                if not qid in data:
                    data[qid] = []

                tmp = data[qid]

                tmp.append(feat)

            self.data = data
Beispiel #2
0
    def parseLine(self,line):
        line = line.rstrip('\n')
        ff = line.split('\t')

        #psLayout version 3

        #match	mis- 	rep. 	N's	Q gap	Q gap	T gap	T gap	strand	Q        	Q   	Q    	Q  	T        	T   	T    	T  	block	blockSizes 	qStart	 tStarts
     	#match	match	   	count	bases	count	bases	      	name     	size	start	end	name     	size	start	end	count
        #---------------------------------------------------------------------------------------------------------------------------------------------------------------
        #236	0	0	0	0	0	0	0	+	TRINITY_DN4669_c0_g1_i1	237	0	236	Gm16144_ENSMUST00000131093	1843	1272	1508	1	236,	0,	1272,
        #179	0	0	0	0	0	0	0	-	TRINITY_DN4615_c0_g1_i1	317	0	179	Hdhd3_ENSMUST00000037820	2977	0	179	1	179,	138,	0,
        #183	0	0	0	0	0	0	0	+	TRINITY_DN4601_c0_g1_i1	219	36	219	Atp6v1a_ENSMUST00000130036	40052	2211	2394	1

            
        f = Feature()

        match  = int(ff[0])
        mismatch = int(ff[1])

        strand = ff[8]
        qid    = ff[9]
        qlen   = int(ff[10])
        qstart = int(ff[11])
        qend   = int(ff[12])
        hid    = ff[13]
        hlen   = int(ff[14])
        hstart = int(ff[15])
        hend   = int(ff[16])

        f.qid    = qid
        f.type1  = 'blat'
        f.type2  = 'blat'
        f.qstart = qstart
        f.qend   = qend

        f.hid    = hid
        f.hstart = hstart
        f.hend   = hend

        f.score = int(100*match/qlen)

        f.qlen  = qlen
        f.hlen  = hlen

        f.hitattr['match'] = match
        f.hitattr['mismatch'] = mismatch

        if strand != ".":
            if strand == "+":
                f.strand = 1
            elif strand == 1:
                f.strand = 1
            elif strand == "-":
                f.strand = -1
            elif strand  == -1:
                f.strand = -1

        return f
    def parseBlastOutput6(file):
 
      hits = {}

      prev = None

      with open(file) as fp:

         for line in fp:
             
             line = line.rstrip('\n')

             ff   = line.split('\t')

             qid = ff[0]
	     hid = ff[1]
             pid = float(ff[2])
             alnlen = ff[3]
             mm     = int(ff[4])
             gaps   = int(ff[5])
             qstart = int(ff[6])
             qend   = int(ff[7])
             hstart = int(ff[8])
             hend   = int(ff[9])
             exval   = float(ff[10])
             score  = float(ff[11])

             feat = Feature()

             feat.qid = qid
             feat.qstart = qstart
             feat.qend   = qend
             feat.hid    = hid
             feat.hstart = hstart
             feat.hend   = hend

             feat.pid = pid
             feat.score = score
                
             feat.mm = mm
             feat.gaps = gaps
             feat.exval = exval

             if len(ff) > 15:
                 feat.qlen = int(ff[12])
                 feat.hlen = int(ff[13])
                 feat.qseq = ff[14]
                 feat.hseq = ff[15]

             if not qid in hits:
                 hits[qid] = []

             tmp = hits[qid]
             tmp.append(feat)

      return hits 
Beispiel #4
0
    def parseLine(self, line):

        line = line.rstrip('\n')
        ff = line.split('\t')

        ##score  name1   strand1 size1   zstart1 end1    name2   strand2 size2   zstart2 end2    identity        idPct   coverage        covPct
        #12413   98004798        +       1579    278     1520    F27C8.1 -       1482    200     1455    709/1185        59.8%   1255/1482       84.7%
        #15213   98029119        +       1752    526     1572    F27C8.1 -       1482    365     1415    615/1014        60.7%   1050/1482       70.9%

        f = Feature()

        qstrand = ff[2]
        hstrand = ff[7]

        qid = ff[1]
        qlen = int(ff[3])
        qstart = int(ff[4])
        qend = int(ff[5])
        hid = ff[6]
        hlen = int(ff[8])
        hstart = int(ff[9])
        hend = int(ff[10])

        f.qid = qid
        f.type1 = 'lastz'
        f.type2 = 'lastz'
        f.qstart = qstart
        f.qend = qend

        f.hid = hid
        f.hstart = hstart
        f.hend = hend

        f.score = int(ff[0])

        f.qlen = qlen
        f.hlen = hlen

        pid = ff[12].replace('%', '')
        cov = ff[14].replace('%', '')

        f.hitattr['pid'] = float(pid)
        f.hitattr['cov'] = float(cov)

        if qstrand == "+" and hstrand == "+":
            strand = 1
        elif qstrand == "+" and hstrand == "-":
            strand = -1
        elif qstrand == "-" and hstrand == "+":
            strand = -1
        elif qstrand == "-" and hstrand == "-":
            strand = 1

        return f