Ejemplo n.º 1
0
    def __init__(self, *args, **kwargs ):
        AddaModuleBlock.__init__( self, *args, **kwargs )
                
        self.mFilenameInputGraph = self.mConfig.get( "files", "input_graph", "pairsdb_40x40.links.gz")

        self.mFilenameOutputGraph = self.mConfig.get( "files", "output_graph", "adda.graph")
        self.mFilenameOutputIndex = self.mConfig.get( "files", "output_index", "adda.graph.index")
                        
        cadda.setLogLevel( self.mLogLevel )
        cadda.setReportStep( self.mConfig.get( "adda", "report_step", 1000 ) )
        cadda.dump_parameters()
        
        self.mFilenames = (self.mFilenameOutputGraph, 
                           self.mFilenameOutputIndex, )

        self.mAlignmentFormat = self.mConfig.get( "files", "graph_format", "pairsdb")

        if self.mAlignmentFormat == "pairsdb":
            self.mGraphIterator = cadda.PairsDBNeighbourIterator
        elif self.mAlignmentFormat == "pairsdb-old":
            self.mGraphIterator = cadda.PairsDBNeighbourIteratorOldFormat
        elif self.mAlignmentFormat == "simap":
            self.mGraphIterator = AddaIO.NeighbourIteratorSimap
        elif self.mAlignmentFormat == "pairsdb-realign":
            self.mGraphIterator = AddaIO.NeighbourRecordPairsdbRealign
        else:
            raise ValueError ("unknown record type %s" % self.mAlignmentFormat)

        self.info("indexing graph in format %s" % (self.mAlignmentFormat, ))
Ejemplo n.º 2
0
    def __init__(self, *args, **kwargs):
        AddaModuleBlock.__init__(self, *args, **kwargs)

        self.mFilenameInputGraph = self.mConfig.get("files", "input_graph",
                                                    "pairsdb_40x40.links.gz")

        self.mFilenameOutputGraph = self.mConfig.get("files", "output_graph",
                                                     "adda.graph")
        self.mFilenameOutputIndex = self.mConfig.get("files", "output_index",
                                                     "adda.graph.index")

        cadda.setLogLevel(self.mLogLevel)
        cadda.setReportStep(self.mConfig.get("adda", "report_step", 1000))
        cadda.dump_parameters()

        self.mFilenames = (
            self.mFilenameOutputGraph,
            self.mFilenameOutputIndex,
        )

        self.mAlignmentFormat = self.mConfig.get("files", "graph_format",
                                                 "pairsdb")

        if self.mAlignmentFormat == "pairsdb":
            self.mGraphIterator = cadda.PairsDBNeighbourIterator
        elif self.mAlignmentFormat == "pairsdb-old":
            self.mGraphIterator = cadda.PairsDBNeighbourIteratorOldFormat
        elif self.mAlignmentFormat == "simap":
            self.mGraphIterator = AddaIO.NeighbourIteratorSimap
        elif self.mAlignmentFormat == "pairsdb-realign":
            self.mGraphIterator = AddaIO.NeighbourRecordPairsdbRealign
        else:
            raise ValueError("unknown record type %s" % self.mAlignmentFormat)

        self.info("indexing graph in format %s" % (self.mAlignmentFormat, ))
Ejemplo n.º 3
0
    def applyMethod(self ):
        """index the graph.        
        """

        if self.isComplete(): return
        
        self.info( "setting parameters" )
                
        config = AddaIO.ConfigParser()

        config.read( self.mFilenameFit )                                
        self.mExponentialF = float( config.get( "optimise", "exponential_f" ) )   
        self.mExponentialE = float( config.get( "optimise", "exponential_e" ) )           

        cadda.setFilenameGraph( self.mFilenameGraph )
        cadda.setFilenameIndex( self.mFilenameIndex )
        cadda.setFilenameTransfers( self.mFilenameTransfers )
        cadda.setFilenameDomains( self.mFilenameDomains )        
        cadda.setLogLevel( self.mLogLevel )
        cadda.setReportStep( 1000 )
        cadda.setMaxIterations( self.mMaxIterations )
        cadda.setResolution( self.mResolution )
        cadda.setExponentialF( self.mExponentialF )
        cadda.setExponentialE( self.mExponentialE )
        
        self.info( "optimisation started" )
        
        cadda.dump_parameters()
        
        retval = cadda.optimise_initialise()
        
        if retval == 0:
            self.warn( "initialisation failed" )
        else:
            self.info( "initialisation success" )        

        improvements = []
        domains = [ self.mNSequences ]
        
        for iteration in range( self.mMaxIterations ):
            
            self.info( "iteration %i: started" % iteration)

            t = time.time()

            improvement = cadda.optimise_iteration()
            if improvements:
                rel_improvement = improvement / max(improvements)
            else:
                rel_improvement = 1
                  
            ndomains = cadda.optimise_get_num_partitions()
            
            self.info( "iteration %i: finished in %i seconds: improvement=%f, relative improvement=%f, ndomains=%i" %\
                       (iteration, 
                        time.time() - t,
                        improvement, 
                        rel_improvement, 
                        ndomains) )            

            if cadda.optimise_save_partitions( self.mFilenameDomains ):
                self.info( "domains saved to %s" % self.mFilenameDomains)
            else:
                self.warn( "saving domains to %s failed" % self.mFilenameDomains)
                
            improvements.append( improvement )
            domains.append( ndomains )
                       
            self.plotProgress( improvements, 
                               self.mOutputFilenameProgressImprovement,
                               "progress: improvement" )
            self.plotProgress( domains, 
                               self.mOutputFilenameProgressDomains,
                                "progress: domains" )
            self.plotProgress( map( lambda x: float( x ) / self.mNSequences, domains), 
                               self.mOutputFilenameProgressDomainsPerSequence,
                               "progress: domains per sequence" )
            
            if improvement < self.mMinAbsImprovement:
                self.info( "optimisation stopped because absolute improvement less than %f" %\
                           (self.mMinAbsImprovement) )            
                break

            if rel_improvement < self.mMinRelImprovement:
                self.info( "optimisation stopped because relative improvement less than %f" %\
                           (self.mMinRelImprovement) )            
                break
        else:
            self.info( "optimisation stopped because maximum iteration %i reached" %\
                       (self.mMaxIterations) )            
            
        retval = cadda.optimise_destroy()
        
        if retval == 0:
            self.warn( "destruction failed" )
        else:
            self.info( "destruction success" )