def parseFasta(self, fasta_f, fasta_type): print BtLog.status_d["1"] % ("FASTA", fasta_f) self.assembly_f = abspath(fasta_f) if fasta_type: # Set up CovLibObj for coverage in assembly header self.covLibs[fasta_type] = CovLibObj(fasta_type, fasta_type, fasta_f) for name, seq in BtIO.readFasta(fasta_f): blObj = BlObj(name, seq) if not blObj.name in self.dict_of_blobs: self.seqs += 1 self.length += blObj.length self.n_count += blObj.n_count if fasta_type: cov = BtIO.parseCovFromHeader(fasta_type, blObj.name) self.covLibs[fasta_type].cov_sum += cov blObj.addCov(fasta_type, cov) self.order_of_blobs.append(blObj.name) self.dict_of_blobs[blObj.name] = blObj else: BtLog.error("5", blObj.name) if self.seqs == 0 or self.length == 0: BtLog.error("1")
def parseFasta(self, fasta_f, fasta_type): print BtLog.status_d['1'] % ('FASTA', fasta_f) self.assembly_f = abspath(fasta_f) if (fasta_type): # Set up CovLibObj for coverage in assembly header self.covLibs[fasta_type] = CovLibObj(fasta_type, fasta_type, fasta_f) for name, seq in BtIO.readFasta(fasta_f): blObj = BlObj(name, seq) if not blObj.name in self.dict_of_blobs: self.seqs += 1 self.length += blObj.length self.n_count += blObj.n_count if (fasta_type): cov = BtIO.parseCovFromHeader(fasta_type, blObj.name) self.covLibs[fasta_type].cov_sum += cov blObj.addCov(fasta_type, cov) self.order_of_blobs.append(blObj.name) self.dict_of_blobs[blObj.name] = blObj else: BtLog.error('5', blObj.name) if self.seqs == 0 or self.length == 0: BtLog.error('1')
def main(): args = docopt(__doc__) fasta_f = args['--infile'] list_f = args['--list'] invert = args['--invert'] prefix = args['--out'] output = [] out_f = BtIO.getOutFile(fasta_f, prefix, "filtered.fna") print BtLog.status_d['1'] % ("list", list_f) items = BtIO.parseSet(list_f) items_count = len(items) print BtLog.status_d['22'] % fasta_f items_parsed = [] sequences = 0 for header, sequence in BtIO.readFasta(fasta_f): sequences += 1 if header in items: if not (invert): items_parsed.append(header) output.append(">%s\n%s\n" % (header, sequence)) else: if (invert): items_parsed.append(header) output.append(">%s\n%s\n" % (header, sequence)) BtLog.progress(len(output), 10, items_count, no_limit=True) BtLog.progress(items_count, 10, items_count) items_parsed_count = len(items_parsed) print BtLog.status_d['23'] % ('{:.2%}'.format(items_parsed_count/sequences), "{:,}".format(items_count), "{:,}".format(items_parsed_count), "{:,}".format(sequences)) items_parsed_count_unique = len(set(items_parsed)) if not items_parsed_count == items_parsed_count_unique: print BtLog.warn_d['8'] % "\n\t\t\t".join(list(set([x for x in items_parsed if items_parsed.count(x) > 1]))) with open(out_f, "w") as fh: print BtLog.status_d['24'] % out_f fh.write("".join(output))