def postProcessOutput(self): super(BlastOutput6ParserAnalysis,self).postProcessOutput() data = {} file = self.input_files[0] with open(file) as fp: for line in fp: line = line.rstrip('\n') ff = line.split('\t') qid = ff[0] hid = ff[1] pid = float(ff[2]) alnlen = ff[3] mm = int(ff[4]) gaps = int(ff[5]) qstart = int(ff[6]) qend = int(ff[7]) hstart = int(ff[8]) hend = int(ff[9]) exval = float(ff[10]) score = float(ff[11]) feat = Feature() feat.qid = qid feat.qstart = qstart feat.qend = qend feat.hid = hid feat.hstart = hstart feat.hend = hend feat.pid = pid feat.score = score feat.mm = mm feat.gaps = gaps feat.exval = exval if len(ff) > 12: feat.qlen = int(ff[12]) feat.hlen = int(ff[13]) feat.qseq = ff[14] feat.hseq = ff[15] if not qid in data: data[qid] = [] tmp = data[qid] tmp.append(feat) self.data = data
def parseLine(self,line): line = line.rstrip('\n') ff = line.split('\t') #psLayout version 3 #match mis- rep. N's Q gap Q gap T gap T gap strand Q Q Q Q T T T T block blockSizes qStart tStarts #match match count bases count bases name size start end name size start end count #--------------------------------------------------------------------------------------------------------------------------------------------------------------- #236 0 0 0 0 0 0 0 + TRINITY_DN4669_c0_g1_i1 237 0 236 Gm16144_ENSMUST00000131093 1843 1272 1508 1 236, 0, 1272, #179 0 0 0 0 0 0 0 - TRINITY_DN4615_c0_g1_i1 317 0 179 Hdhd3_ENSMUST00000037820 2977 0 179 1 179, 138, 0, #183 0 0 0 0 0 0 0 + TRINITY_DN4601_c0_g1_i1 219 36 219 Atp6v1a_ENSMUST00000130036 40052 2211 2394 1 f = Feature() match = int(ff[0]) mismatch = int(ff[1]) strand = ff[8] qid = ff[9] qlen = int(ff[10]) qstart = int(ff[11]) qend = int(ff[12]) hid = ff[13] hlen = int(ff[14]) hstart = int(ff[15]) hend = int(ff[16]) f.qid = qid f.type1 = 'blat' f.type2 = 'blat' f.qstart = qstart f.qend = qend f.hid = hid f.hstart = hstart f.hend = hend f.score = int(100*match/qlen) f.qlen = qlen f.hlen = hlen f.hitattr['match'] = match f.hitattr['mismatch'] = mismatch if strand != ".": if strand == "+": f.strand = 1 elif strand == 1: f.strand = 1 elif strand == "-": f.strand = -1 elif strand == -1: f.strand = -1 return f
def parseBlastOutput6(file): hits = {} prev = None with open(file) as fp: for line in fp: line = line.rstrip('\n') ff = line.split('\t') qid = ff[0] hid = ff[1] pid = float(ff[2]) alnlen = ff[3] mm = int(ff[4]) gaps = int(ff[5]) qstart = int(ff[6]) qend = int(ff[7]) hstart = int(ff[8]) hend = int(ff[9]) exval = float(ff[10]) score = float(ff[11]) feat = Feature() feat.qid = qid feat.qstart = qstart feat.qend = qend feat.hid = hid feat.hstart = hstart feat.hend = hend feat.pid = pid feat.score = score feat.mm = mm feat.gaps = gaps feat.exval = exval if len(ff) > 15: feat.qlen = int(ff[12]) feat.hlen = int(ff[13]) feat.qseq = ff[14] feat.hseq = ff[15] if not qid in hits: hits[qid] = [] tmp = hits[qid] tmp.append(feat) return hits
def parseLine(self, line): line = line.rstrip('\n') ff = line.split('\t') ##score name1 strand1 size1 zstart1 end1 name2 strand2 size2 zstart2 end2 identity idPct coverage covPct #12413 98004798 + 1579 278 1520 F27C8.1 - 1482 200 1455 709/1185 59.8% 1255/1482 84.7% #15213 98029119 + 1752 526 1572 F27C8.1 - 1482 365 1415 615/1014 60.7% 1050/1482 70.9% f = Feature() qstrand = ff[2] hstrand = ff[7] qid = ff[1] qlen = int(ff[3]) qstart = int(ff[4]) qend = int(ff[5]) hid = ff[6] hlen = int(ff[8]) hstart = int(ff[9]) hend = int(ff[10]) f.qid = qid f.type1 = 'lastz' f.type2 = 'lastz' f.qstart = qstart f.qend = qend f.hid = hid f.hstart = hstart f.hend = hend f.score = int(ff[0]) f.qlen = qlen f.hlen = hlen pid = ff[12].replace('%', '') cov = ff[14].replace('%', '') f.hitattr['pid'] = float(pid) f.hitattr['cov'] = float(cov) if qstrand == "+" and hstrand == "+": strand = 1 elif qstrand == "+" and hstrand == "-": strand = -1 elif qstrand == "-" and hstrand == "+": strand = -1 elif qstrand == "-" and hstrand == "-": strand = 1 return f