def __init__(self, *args, **kwargs ): AddaModuleBlock.__init__( self, *args, **kwargs ) self.mFilenameInputGraph = self.mConfig.get( "files", "input_graph", "pairsdb_40x40.links.gz") self.mFilenameOutputGraph = self.mConfig.get( "files", "output_graph", "adda.graph") self.mFilenameOutputIndex = self.mConfig.get( "files", "output_index", "adda.graph.index") cadda.setLogLevel( self.mLogLevel ) cadda.setReportStep( self.mConfig.get( "adda", "report_step", 1000 ) ) cadda.dump_parameters() self.mFilenames = (self.mFilenameOutputGraph, self.mFilenameOutputIndex, ) self.mAlignmentFormat = self.mConfig.get( "files", "graph_format", "pairsdb") if self.mAlignmentFormat == "pairsdb": self.mGraphIterator = cadda.PairsDBNeighbourIterator elif self.mAlignmentFormat == "pairsdb-old": self.mGraphIterator = cadda.PairsDBNeighbourIteratorOldFormat elif self.mAlignmentFormat == "simap": self.mGraphIterator = AddaIO.NeighbourIteratorSimap elif self.mAlignmentFormat == "pairsdb-realign": self.mGraphIterator = AddaIO.NeighbourRecordPairsdbRealign else: raise ValueError ("unknown record type %s" % self.mAlignmentFormat) self.info("indexing graph in format %s" % (self.mAlignmentFormat, ))
def __init__(self, *args, **kwargs): AddaModuleBlock.__init__(self, *args, **kwargs) self.mFilenameInputGraph = self.mConfig.get("files", "input_graph", "pairsdb_40x40.links.gz") self.mFilenameOutputGraph = self.mConfig.get("files", "output_graph", "adda.graph") self.mFilenameOutputIndex = self.mConfig.get("files", "output_index", "adda.graph.index") cadda.setLogLevel(self.mLogLevel) cadda.setReportStep(self.mConfig.get("adda", "report_step", 1000)) cadda.dump_parameters() self.mFilenames = ( self.mFilenameOutputGraph, self.mFilenameOutputIndex, ) self.mAlignmentFormat = self.mConfig.get("files", "graph_format", "pairsdb") if self.mAlignmentFormat == "pairsdb": self.mGraphIterator = cadda.PairsDBNeighbourIterator elif self.mAlignmentFormat == "pairsdb-old": self.mGraphIterator = cadda.PairsDBNeighbourIteratorOldFormat elif self.mAlignmentFormat == "simap": self.mGraphIterator = AddaIO.NeighbourIteratorSimap elif self.mAlignmentFormat == "pairsdb-realign": self.mGraphIterator = AddaIO.NeighbourRecordPairsdbRealign else: raise ValueError("unknown record type %s" % self.mAlignmentFormat) self.info("indexing graph in format %s" % (self.mAlignmentFormat, ))
def applyMethod(self ): """index the graph. """ if self.isComplete(): return self.info( "setting parameters" ) config = AddaIO.ConfigParser() config.read( self.mFilenameFit ) self.mExponentialF = float( config.get( "optimise", "exponential_f" ) ) self.mExponentialE = float( config.get( "optimise", "exponential_e" ) ) cadda.setFilenameGraph( self.mFilenameGraph ) cadda.setFilenameIndex( self.mFilenameIndex ) cadda.setFilenameTransfers( self.mFilenameTransfers ) cadda.setFilenameDomains( self.mFilenameDomains ) cadda.setLogLevel( self.mLogLevel ) cadda.setReportStep( 1000 ) cadda.setMaxIterations( self.mMaxIterations ) cadda.setResolution( self.mResolution ) cadda.setExponentialF( self.mExponentialF ) cadda.setExponentialE( self.mExponentialE ) self.info( "optimisation started" ) cadda.dump_parameters() retval = cadda.optimise_initialise() if retval == 0: self.warn( "initialisation failed" ) else: self.info( "initialisation success" ) improvements = [] domains = [ self.mNSequences ] for iteration in range( self.mMaxIterations ): self.info( "iteration %i: started" % iteration) t = time.time() improvement = cadda.optimise_iteration() if improvements: rel_improvement = improvement / max(improvements) else: rel_improvement = 1 ndomains = cadda.optimise_get_num_partitions() self.info( "iteration %i: finished in %i seconds: improvement=%f, relative improvement=%f, ndomains=%i" %\ (iteration, time.time() - t, improvement, rel_improvement, ndomains) ) if cadda.optimise_save_partitions( self.mFilenameDomains ): self.info( "domains saved to %s" % self.mFilenameDomains) else: self.warn( "saving domains to %s failed" % self.mFilenameDomains) improvements.append( improvement ) domains.append( ndomains ) self.plotProgress( improvements, self.mOutputFilenameProgressImprovement, "progress: improvement" ) self.plotProgress( domains, self.mOutputFilenameProgressDomains, "progress: domains" ) self.plotProgress( map( lambda x: float( x ) / self.mNSequences, domains), self.mOutputFilenameProgressDomainsPerSequence, "progress: domains per sequence" ) if improvement < self.mMinAbsImprovement: self.info( "optimisation stopped because absolute improvement less than %f" %\ (self.mMinAbsImprovement) ) break if rel_improvement < self.mMinRelImprovement: self.info( "optimisation stopped because relative improvement less than %f" %\ (self.mMinRelImprovement) ) break else: self.info( "optimisation stopped because maximum iteration %i reached" %\ (self.mMaxIterations) ) retval = cadda.optimise_destroy() if retval == 0: self.warn( "destruction failed" ) else: self.info( "destruction success" )