Example #1
0
    def applyMethod(self ):
        """index the graph.        
        """
        self.info( "indexing of %s started" % self.mFilenameInputGraph )

        self.info( "loading map_id2nid from %s" % self.mConfig.get( "files", "output_nids", "adda.nids" ))
        infile = open( self.mConfig.get( "files", "output_nids", "adda.nids" ) )
        map_id2nid = AddaIO.readMapId2Nid( infile, 
                                           storage = self.mConfig.get( "files", "storage_nids", "memory" ) )
        infile.close()
    
        infile = AddaIO.openStream( self.mFilenameInputGraph )

        cadda.indexGraph( cadda.PairsDBNeighboursIterator( 
                self.mGraphIterator( infile, map_id2nid, self.mLogger ), self.mLogger ),
                          len(map_id2nid), 
                          self.mFilenameOutputGraph, 
                          self.mFilenameOutputIndex, 
                          self.mLogger )

        del map_id2nid
Example #2
0
    def applyMethod(self):

        self.mInput = 0
        self.mOutput = 0
        self.mRemoved = 0
        self.mDuplicates = 0

        # use existing fasta file
        iterator = FastaIterator(AddaIO.openStream(self.mFilenameInputFasta))
        fasta = IndexedFasta.IndexedFasta(self.mFilenameOutputFasta, "w")

        outfile = self.openOutputStream(self.mFilenameNids)
        outfile.write("nid\tpid\thid\tlength\tsequence\n")

        nid = 1
        hids = set()

        for seq in iterator:

            self.mInput += 1
            if len(seq.sequence) > self.mMaxSequenceLength:
                self.mRemoved += 1
                continue

            hid = self.getHID(seq.sequence)
            if hid in hids:
                self.mDuplicates += 1
                continue

            hids.add(hid)
            outfile.write("%s\t%s\t%s\t%i\t%s\n" %
                          (nid, seq.pid, hid, len(seq.sequence), seq.sequence))
            fasta.addSequence(nid, seq.sequence)
            nid += 1
            self.mOutput += 1

        fasta.close()
        outfile.close()
Example #3
0
    def applyMethod(self):
        """index the graph.        
        """
        self.info("indexing of %s started" % self.mFilenameInputGraph)

        self.info("loading map_id2nid from %s" %
                  self.mConfig.get("files", "output_nids", "adda.nids"))
        infile = open(self.mConfig.get("files", "output_nids", "adda.nids"))
        map_id2nid = AddaIO.readMapId2Nid(infile,
                                          storage=self.mConfig.get(
                                              "files", "storage_nids",
                                              "memory"))
        infile.close()

        infile = AddaIO.openStream(self.mFilenameInputGraph)

        cadda.indexGraph(
            cadda.PairsDBNeighboursIterator(
                self.mGraphIterator(infile, map_id2nid, self.mLogger),
                self.mLogger), len(map_id2nid), self.mFilenameOutputGraph,
            self.mFilenameOutputIndex, self.mLogger)

        del map_id2nid
Example #4
0
    def applyMethod(self ):

        self.mInput = 0
        self.mOutput = 0
        self.mRemoved = 0
        self.mDuplicates = 0

        # use existing fasta file
        iterator = FastaIterator( AddaIO.openStream( self.mFilenameInputFasta) )
        fasta = IndexedFasta.IndexedFasta( self.mFilenameOutputFasta, "w" )

        outfile = self.openOutputStream(self.mFilenameNids)
        outfile.write( "nid\tpid\thid\tlength\tsequence\n" )

        nid = 1
        hids = set()
        
        for seq in iterator:
            
            self.mInput += 1
            if len( seq.sequence ) > self.mMaxSequenceLength:
                self.mRemoved += 1
                continue

            hid = self.getHID( seq.sequence )
            if hid in hids:
                self.mDuplicates += 1
                continue
            
            hids.add(hid)
            outfile.write( "%s\t%s\t%s\t%i\t%s\n" % (nid, seq.pid, hid, len(seq.sequence), seq.sequence) )
            fasta.addSequence( nid, seq.sequence )
            nid += 1
            self.mOutput += 1

        fasta.close()
        outfile.close()