def runGeneFasta(argNamespace): paftolTargetSet = paftol.PaftolTargetSet() paftolTargetSet.readFasta(argNamespace.infile) fastaFnameList = [] for geneName in paftolTargetSet.paftolGeneDict: srList = paftolTargetSet.getSeqRecordSelection(organismNameList=None, geneNameList=[geneName]) fastaFname = argNamespace.outFastaFormat % geneName Bio.SeqIO.write(srList, fastaFname, 'fasta') fastaFnameList.append(fastaFname) sys.stdout.write('%s\n' % ' '.join(fastaFnameList))
def runRetrieveTargets(argNamespace): paftolTargetSet = paftol.PaftolTargetSet() if argNamespace.targetsfile is None: paftolTargetSet.readFasta(sys.stdin) else: paftolTargetSet.readFasta(argNamespace.targetsfile) blastnRunner = argToBlastnRunner(argNamespace) paftolTargetSeqRetriever = paftol.PaftolTargetSeqRetriever() targetList = paftolTargetSeqRetriever.retrievePaftolTargetList( argNamespace.genomeName, argNamespace.fastaFname, paftolTargetSet, blastnRunner) if argNamespace.outfile is None: Bio.SeqIO.write(targetList, sys.stdout, 'fasta') else: with open(argNamespace.outfile, 'w') as f: Bio.SeqIO.write(targetList, f, 'fasta')
def runGeneSetStats(argNamespace): paftolTargetSet = paftol.PaftolTargetSet() sampleId = 'unknown' if argNamespace.sampleId is not None: sampleId = argNamespace.sampleId if argNamespace.targetsfile is None: raise StandardError, 'no targets file specified' else: paftolTargetSet.readFasta(argNamespace.targetsfile) if argNamespace.seqfile is None: geneSetStatsDataFrame = paftol.makeGeneSetStatsDataFrame( sys.stdin, sampleId, paftolTargetSet) else: with open(argNamespace.seqfile, 'r') as f: geneSetStatsDataFrame = paftol.makeGeneSetStatsDataFrame( f, sampleId, paftolTargetSet) if argNamespace.outfile is None: geneSetStatsDataFrame.writeCsv(sys.stdout) else: with open(argNamespace.outfile, 'w') as f: geneSetStatsDataFrame.writeCsv(f)
def runTargetGeneScan(argNamespace): paftolTargetSet = paftol.PaftolTargetSet() if argNamespace.targetsfile is None: paftolTargetSet.readFasta(sys.stdin) else: paftolTargetSet.readFasta(argNamespace.targetsfile) sys.stderr.write('read target set with %d genes and %d organisms\n' % (len( paftolTargetSet.paftolGeneDict), len(paftolTargetSet.organismDict))) # FIXME: hack to use scanMethod as the genome name as well referenceGenome = paftol.ReferenceGenome(argNamespace.scanMethod, argNamespace.refFasta, argNamespace.refGenbank) referenceGenome.scanGenes(argNamespace.scanMethod) sys.stderr.write('read reference genome and scanned %d genes\n' % len(referenceGenome.geneList)) targetGeneTable, cdsList = referenceGenome.blastTargetSet(paftolTargetSet) if argNamespace.outfile is None: targetGeneTable.writeCsv(sys.stdout) else: with open(argNamespace.outfile, 'w') as csvFile: targetGeneTable.writeCsv(csvFile) if argNamespace.cdsFasta is not None: Bio.SeqIO.write(cdsList, argNamespace.cdsFasta, 'fasta')
def runSelectgenes(argNamespace): organismNameSet = None if argNamespace.organism is not None: organismNameSet = set(argNamespace.organism) geneNameSet = [] if argNamespace.gene is not None: geneNameSet = set(argNamespace.gene) if argNamespace.genefile is not None: with open(argNamespace.genefile) as f: for line in f: geneNameSet.append(line.strip()) if len(geneNameSet) == 0: geneNameSet = None paftolTargetSet = paftol.PaftolTargetSet() if argNamespace.targetsfile is None: paftolTargetSet.readFasta(sys.stdin) else: paftolTargetSet.readFasta(argNamespace.targetsfile) srList = paftolTargetSet.getSeqRecordSelection(organismNameSet, geneNameSet) if argNamespace.outfile is None: Bio.SeqIO.write(srList, sys.stdout, 'fasta') else: Bio.SeqIO.write(srList, argNamespace.outfile, 'fasta')