Python FastaReader.records Examples

Programming Language: Python

Namespace/Package Name: MGT.FastaIO

Class/Type: FastaReader

Method/Function: records

Examples at hotexamples.com: 3

Python FastaReader.records - 3 examples found. These are the top rated real world Python examples of MGT.FastaIO.FastaReader.records extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

close(6)

records(3)

Frequently Used Methods

close (6)

records (3)

Example #1

Show file

File: HctApp.py Project: andreyto/mgtaxa

 def loadProtIdsOrgType(self,db,idGenSeq,inserterSeq,orgType):
     """Load Fasta deflines for protein sequences generated by pullSeq, parse and insert them into SQL table.
     """
     inFasta = self.store.getFilePath('%s.protein.cat.gz' % orgType)
     inp = FastaReader(inFasta)
     for rec in inp.records():
         hdr = CvTreeId.splitFastaDeflineCvTree(rec.header())
         idSeq = idGenSeq()
         seqLen = rec.seqLen()
         values = (idSeq,hdr.taxid,seqLen,hdr.acc,hdr.acc_gen,orgType[:4])
         inserterSeq(values)
     inp.close()

Example #2

Show file

File: HctApp.py Project: andreyto/mgtaxa

    def loadGenomicIdsOrgType(self,db,idGenSeq,inserterSeq,orgType):
        """Load Fasta deflines for genomic sequences, index them by accession, and drop non-NC_ and all plasmids.
        @todo It might be more robust to parse the GenBank file, e.g.
        FEATURES             Location/Qualifiers
        source          1..208369
            /organism="Bacillus cereus ATCC 10987"
            /mol_type="genomic DNA"
            /strain="ATCC 10987"
            /db_xref="ATCC:10987"
            /db_xref="taxon:222523"
            /plasmid="pBc10987"
                                                                                                                           gene            join(207497..208369,1..687)

        We would have to fix the gap(unk100) bug first, and also check how the "extrachromosomal" is labeled
        in GB file.
        """
        if orgType == "outgroup":
            inFasta = self.outGroupFna
        else:
            inFasta = pjoin(options.refSeqDataDir,"%s.genomic.fna.gz" % orgType)
        inp = FastaReader(inFasta)
        for rec in inp.records():
            line = rec.header()[1:]
            parts = line.strip().split('|',4)
            assert parts[0] == 'gi'
            gi = int(parts[1])
            assert parts[2] == 'ref'
            acc = parts[3].strip()
            # we can do re.search(r'\bplasmid\b',) instead, but this is safer 
            # (there is a record called 'megaplasmid'):
            if acc[:3] == 'NC_':
                hdr = (' '.join(parts[4:])).strip()
                hlow = hdr.lower()
                ## genel values must differ by the first letter - this is used
                ## by the name generation method later
                if 'plasmid' in hlow:
                    genel = "pla"
                elif 'extrachromosomal' in hlow or 'extra-chromosomal' in hlow:
                    genel = "ext"
                elif 'transposon' in hlow:
                    genel = "tra"
                else:
                    genel = "chr"
                idSeq = idGenSeq()
                seqLen = rec.seqLen()
                values = (idSeq,gi,0,seqLen,acc,genel,orgType[:4],hdr)
                #values = [ str(x) for x in values ]
                inserterSeq(values)

        inp.close()

Example #3

Show file

File: GbFeatApp.py Project: andreyto/mgtaxa

 def makeFeat(self):
     """Create feature vectors out of FASTA files."""
     maxSampLen = 100000
     kmerCnt = KmerSparseFeatures(sampLen=maxSampLen,
             kmerLen=2,
             rcPolicy=RC_POLICY.MERGE,
             normPolicy=NORM_POLICY.FREQ)
     for fastaFile in self.store.getFilePaths("*.fasta.gz"):
         inpFasta = FastaReader(fastaFile)
         iRec = 0
         for rec in inpFasta.records():
             kmerCnt.process(rec.sequence(format="array"))
             iRec += 1
             if iRec > 100:
                 break
         inpFasta.close()
         feat = kmerCnt.kmerFrequencies()
         id = stripSfx(os.path.basename(fastaFile),".fasta.gz")
         print id, feat