Beispiel #1
0
    def startUp(self):
        if self.isComplete(): return
        self.mOutfile = self.openOutputStream(self.mFilenameOutput)

        self.mMapId2Nid = AddaIO.readMapId2Nid(open(self.mFilenamesNids, "r"))
        self.mMapNid2Id = dict(
            ((x[1], x[0]) for x in self.mMapId2Nid.iteritems()))
Beispiel #2
0
    def startUp(self):
        if self.isComplete(): return

        self.mOutfile = self.openOutputStream( self.mFilenameDomains )
        self.mOutfileFamilies = self.openOutputStream( self.mFilenameFamilies )
        self.mMapId2Nid = AddaIO.readMapId2Nid( open(self.mFilenamesNids, "r") )
        self.mMapNid2Id = dict( ( (x[1],x[0]) for x in self.mMapId2Nid.iteritems() ) )
Beispiel #3
0
    def merge( self ):
        '''merge several runs.
        
        simply concatenate all files and reindex
        '''

        f = self.mFilenameOutputGraph

        if self.mNumChunks == 1:
            raise ValueError("merge called with only one chunk" )

        if os.path.exists( f ):
            raise ValueError( "file %s already exists - no merging" % f )

        self.info( "merging file %s from %i chunks" % (f, self.mNumChunks) )

        # check if all parts have finished and are present
        ff = []

        for chunk in range( self.mNumChunks ):
            fn = f + self.getSlice( chunk )
            if not os.path.exists( fn ):
                self.info("file %s is not present - merging aborted" % fn )
                return False
            ff.append( fn )

        self.info( "all files present" )

        ff = " ".join( ff )
        self.execute( "cat %s > %s" % (ff,f) )

        self.info( "rebuilding index" )

        self.info( "loading map_id2nid from %s" % self.mConfig.get( "output", "nids", "adda.nids" ))
        infile = open( self.mConfig.get( "output", "nids", "adda.nids" ) )
        map_id2nid = AddaIO.readMapId2Nid( infile, 
                                           storage = self.mConfig.get( "adda", "storage_nids", "memory" ) )
        infile.close()

        self.info( "starting the indexing" )

        cadda.reindexGraph( 
            len(map_id2nid), 
            self.mFilenameOutputGraph, 
            self.mFilenameOutputIndex, 
            self.mLogger )

        return True
Beispiel #4
0
    def merge(self):
        '''merge several runs.
        
        simply concatenate all files and reindex
        '''

        f = self.mFilenameOutputGraph

        if self.mNumChunks == 1:
            raise ValueError("merge called with only one chunk")

        if os.path.exists(f):
            raise ValueError("file %s already exists - no merging" % f)

        self.info("merging file %s from %i chunks" % (f, self.mNumChunks))

        # check if all parts have finished and are present
        ff = []

        for chunk in range(self.mNumChunks):
            fn = f + self.getSlice(chunk)
            if not os.path.exists(fn):
                self.info("file %s is not present - merging aborted" % fn)
                return False
            ff.append(fn)

        self.info("all files present")

        ff = " ".join(ff)
        self.execute("cat %s > %s" % (ff, f))

        self.info("rebuilding index")

        self.info("loading map_id2nid from %s" %
                  self.mConfig.get("files", "output_nids", "adda.nids"))
        infile = open(self.mConfig.get("files", "output_nids", "adda.nids"))
        map_id2nid = AddaIO.readMapId2Nid(infile,
                                          storage=self.mConfig.get(
                                              "files", "storage_nids",
                                              "memory"))
        infile.close()

        cadda.reindexGraph(len(map_id2nid), self.mFilenameOutputGraph,
                           self.mFilenameOutputIndex, self.mLogger)

        return True
Beispiel #5
0
    def applyMethod(self ):
        """index the graph.        
        """
        self.info( "indexing of %s started" % self.mFilenameInputGraph )

        self.info( "loading map_id2nid from %s" % self.mConfig.get( "files", "output_nids", "adda.nids" ))
        infile = open( self.mConfig.get( "files", "output_nids", "adda.nids" ) )
        map_id2nid = AddaIO.readMapId2Nid( infile, 
                                           storage = self.mConfig.get( "files", "storage_nids", "memory" ) )
        infile.close()
    
        infile = AddaIO.openStream( self.mFilenameInputGraph )

        cadda.indexGraph( cadda.PairsDBNeighboursIterator( 
                self.mGraphIterator( infile, map_id2nid, self.mLogger ), self.mLogger ),
                          len(map_id2nid), 
                          self.mFilenameOutputGraph, 
                          self.mFilenameOutputIndex, 
                          self.mLogger )

        del map_id2nid
Beispiel #6
0
    def applyMethod(self):
        """index the graph.        
        """
        self.info("indexing of %s started" % self.mFilenameInputGraph)

        self.info("loading map_id2nid from %s" %
                  self.mConfig.get("files", "output_nids", "adda.nids"))
        infile = open(self.mConfig.get("files", "output_nids", "adda.nids"))
        map_id2nid = AddaIO.readMapId2Nid(infile,
                                          storage=self.mConfig.get(
                                              "files", "storage_nids",
                                              "memory"))
        infile.close()

        infile = AddaIO.openStream(self.mFilenameInputGraph)

        cadda.indexGraph(
            cadda.PairsDBNeighboursIterator(
                self.mGraphIterator(infile, map_id2nid, self.mLogger),
                self.mLogger), len(map_id2nid), self.mFilenameOutputGraph,
            self.mFilenameOutputIndex, self.mLogger)

        del map_id2nid