def startUp(self): if self.isComplete(): return self.mOutfile = self.openOutputStream(self.mFilenameOutput) self.mMapId2Nid = AddaIO.readMapId2Nid(open(self.mFilenamesNids, "r")) self.mMapNid2Id = dict( ((x[1], x[0]) for x in self.mMapId2Nid.iteritems()))
def startUp(self): if self.isComplete(): return self.mOutfile = self.openOutputStream( self.mFilenameDomains ) self.mOutfileFamilies = self.openOutputStream( self.mFilenameFamilies ) self.mMapId2Nid = AddaIO.readMapId2Nid( open(self.mFilenamesNids, "r") ) self.mMapNid2Id = dict( ( (x[1],x[0]) for x in self.mMapId2Nid.iteritems() ) )
def merge( self ): '''merge several runs. simply concatenate all files and reindex ''' f = self.mFilenameOutputGraph if self.mNumChunks == 1: raise ValueError("merge called with only one chunk" ) if os.path.exists( f ): raise ValueError( "file %s already exists - no merging" % f ) self.info( "merging file %s from %i chunks" % (f, self.mNumChunks) ) # check if all parts have finished and are present ff = [] for chunk in range( self.mNumChunks ): fn = f + self.getSlice( chunk ) if not os.path.exists( fn ): self.info("file %s is not present - merging aborted" % fn ) return False ff.append( fn ) self.info( "all files present" ) ff = " ".join( ff ) self.execute( "cat %s > %s" % (ff,f) ) self.info( "rebuilding index" ) self.info( "loading map_id2nid from %s" % self.mConfig.get( "output", "nids", "adda.nids" )) infile = open( self.mConfig.get( "output", "nids", "adda.nids" ) ) map_id2nid = AddaIO.readMapId2Nid( infile, storage = self.mConfig.get( "adda", "storage_nids", "memory" ) ) infile.close() self.info( "starting the indexing" ) cadda.reindexGraph( len(map_id2nid), self.mFilenameOutputGraph, self.mFilenameOutputIndex, self.mLogger ) return True
def merge(self): '''merge several runs. simply concatenate all files and reindex ''' f = self.mFilenameOutputGraph if self.mNumChunks == 1: raise ValueError("merge called with only one chunk") if os.path.exists(f): raise ValueError("file %s already exists - no merging" % f) self.info("merging file %s from %i chunks" % (f, self.mNumChunks)) # check if all parts have finished and are present ff = [] for chunk in range(self.mNumChunks): fn = f + self.getSlice(chunk) if not os.path.exists(fn): self.info("file %s is not present - merging aborted" % fn) return False ff.append(fn) self.info("all files present") ff = " ".join(ff) self.execute("cat %s > %s" % (ff, f)) self.info("rebuilding index") self.info("loading map_id2nid from %s" % self.mConfig.get("files", "output_nids", "adda.nids")) infile = open(self.mConfig.get("files", "output_nids", "adda.nids")) map_id2nid = AddaIO.readMapId2Nid(infile, storage=self.mConfig.get( "files", "storage_nids", "memory")) infile.close() cadda.reindexGraph(len(map_id2nid), self.mFilenameOutputGraph, self.mFilenameOutputIndex, self.mLogger) return True
def applyMethod(self ): """index the graph. """ self.info( "indexing of %s started" % self.mFilenameInputGraph ) self.info( "loading map_id2nid from %s" % self.mConfig.get( "files", "output_nids", "adda.nids" )) infile = open( self.mConfig.get( "files", "output_nids", "adda.nids" ) ) map_id2nid = AddaIO.readMapId2Nid( infile, storage = self.mConfig.get( "files", "storage_nids", "memory" ) ) infile.close() infile = AddaIO.openStream( self.mFilenameInputGraph ) cadda.indexGraph( cadda.PairsDBNeighboursIterator( self.mGraphIterator( infile, map_id2nid, self.mLogger ), self.mLogger ), len(map_id2nid), self.mFilenameOutputGraph, self.mFilenameOutputIndex, self.mLogger ) del map_id2nid
def applyMethod(self): """index the graph. """ self.info("indexing of %s started" % self.mFilenameInputGraph) self.info("loading map_id2nid from %s" % self.mConfig.get("files", "output_nids", "adda.nids")) infile = open(self.mConfig.get("files", "output_nids", "adda.nids")) map_id2nid = AddaIO.readMapId2Nid(infile, storage=self.mConfig.get( "files", "storage_nids", "memory")) infile.close() infile = AddaIO.openStream(self.mFilenameInputGraph) cadda.indexGraph( cadda.PairsDBNeighboursIterator( self.mGraphIterator(infile, map_id2nid, self.mLogger), self.mLogger), len(map_id2nid), self.mFilenameOutputGraph, self.mFilenameOutputIndex, self.mLogger) del map_id2nid