def applyMethod(self ): """index the graph. """ self.info( "indexing of %s started" % self.mFilenameInputGraph ) self.info( "loading map_id2nid from %s" % self.mConfig.get( "files", "output_nids", "adda.nids" )) infile = open( self.mConfig.get( "files", "output_nids", "adda.nids" ) ) map_id2nid = AddaIO.readMapId2Nid( infile, storage = self.mConfig.get( "files", "storage_nids", "memory" ) ) infile.close() infile = AddaIO.openStream( self.mFilenameInputGraph ) cadda.indexGraph( cadda.PairsDBNeighboursIterator( self.mGraphIterator( infile, map_id2nid, self.mLogger ), self.mLogger ), len(map_id2nid), self.mFilenameOutputGraph, self.mFilenameOutputIndex, self.mLogger ) del map_id2nid
def applyMethod(self): self.mInput = 0 self.mOutput = 0 self.mRemoved = 0 self.mDuplicates = 0 # use existing fasta file iterator = FastaIterator(AddaIO.openStream(self.mFilenameInputFasta)) fasta = IndexedFasta.IndexedFasta(self.mFilenameOutputFasta, "w") outfile = self.openOutputStream(self.mFilenameNids) outfile.write("nid\tpid\thid\tlength\tsequence\n") nid = 1 hids = set() for seq in iterator: self.mInput += 1 if len(seq.sequence) > self.mMaxSequenceLength: self.mRemoved += 1 continue hid = self.getHID(seq.sequence) if hid in hids: self.mDuplicates += 1 continue hids.add(hid) outfile.write("%s\t%s\t%s\t%i\t%s\n" % (nid, seq.pid, hid, len(seq.sequence), seq.sequence)) fasta.addSequence(nid, seq.sequence) nid += 1 self.mOutput += 1 fasta.close() outfile.close()
def applyMethod(self): """index the graph. """ self.info("indexing of %s started" % self.mFilenameInputGraph) self.info("loading map_id2nid from %s" % self.mConfig.get("files", "output_nids", "adda.nids")) infile = open(self.mConfig.get("files", "output_nids", "adda.nids")) map_id2nid = AddaIO.readMapId2Nid(infile, storage=self.mConfig.get( "files", "storage_nids", "memory")) infile.close() infile = AddaIO.openStream(self.mFilenameInputGraph) cadda.indexGraph( cadda.PairsDBNeighboursIterator( self.mGraphIterator(infile, map_id2nid, self.mLogger), self.mLogger), len(map_id2nid), self.mFilenameOutputGraph, self.mFilenameOutputIndex, self.mLogger) del map_id2nid
def applyMethod(self ): self.mInput = 0 self.mOutput = 0 self.mRemoved = 0 self.mDuplicates = 0 # use existing fasta file iterator = FastaIterator( AddaIO.openStream( self.mFilenameInputFasta) ) fasta = IndexedFasta.IndexedFasta( self.mFilenameOutputFasta, "w" ) outfile = self.openOutputStream(self.mFilenameNids) outfile.write( "nid\tpid\thid\tlength\tsequence\n" ) nid = 1 hids = set() for seq in iterator: self.mInput += 1 if len( seq.sequence ) > self.mMaxSequenceLength: self.mRemoved += 1 continue hid = self.getHID( seq.sequence ) if hid in hids: self.mDuplicates += 1 continue hids.add(hid) outfile.write( "%s\t%s\t%s\t%i\t%s\n" % (nid, seq.pid, hid, len(seq.sequence), seq.sequence) ) fasta.addSequence( nid, seq.sequence ) nid += 1 self.mOutput += 1 fasta.close() outfile.close()