Example #1
0
    def __init__(self, *args, **kwargs ):
        AddaModuleBlock.__init__( self, *args, **kwargs )
                
        self.mFilenameInputGraph = self.mConfig.get( "files", "input_graph", "pairsdb_40x40.links.gz")

        self.mFilenameOutputGraph = self.mConfig.get( "files", "output_graph", "adda.graph")
        self.mFilenameOutputIndex = self.mConfig.get( "files", "output_index", "adda.graph.index")
                        
        cadda.setLogLevel( self.mLogLevel )
        cadda.setReportStep( self.mConfig.get( "adda", "report_step", 1000 ) )
        cadda.dump_parameters()
        
        self.mFilenames = (self.mFilenameOutputGraph, 
                           self.mFilenameOutputIndex, )

        self.mAlignmentFormat = self.mConfig.get( "files", "graph_format", "pairsdb")

        if self.mAlignmentFormat == "pairsdb":
            self.mGraphIterator = cadda.PairsDBNeighbourIterator
        elif self.mAlignmentFormat == "pairsdb-old":
            self.mGraphIterator = cadda.PairsDBNeighbourIteratorOldFormat
        elif self.mAlignmentFormat == "simap":
            self.mGraphIterator = AddaIO.NeighbourIteratorSimap
        elif self.mAlignmentFormat == "pairsdb-realign":
            self.mGraphIterator = AddaIO.NeighbourRecordPairsdbRealign
        else:
            raise ValueError ("unknown record type %s" % self.mAlignmentFormat)

        self.info("indexing graph in format %s" % (self.mAlignmentFormat, ))
Example #2
0
    def applyMethod(self ):
        """index the graph.        
        """
        
        if self.isComplete(): return

        self.info( "construction of minimum spanning tree started" )
                
        cadda.dump_parameters()
        
        tmpdir = tempfile.mkdtemp( dir = self.mTemporaryDirectory )
        tmpfile = os.path.join( tmpdir, "sorted" )

        if not os.path.exists( tmpfile ):
            statement = "gunzip < %s | sort -T%s -k3,3n | gzip > %s" % ( self.mFilenameDomainGraph, 
                                                                         tmpdir,                                                  
                                                                         tmpfile ) 
        
            self.info( "sorting started" )
                        
            try:
                retcode = subprocess.call( statement , shell=True)
                if retcode < 0:
                    self.warn( "sorting was terminated by signal %i" % (-retcode) )
                elif retcode > 0:
                    self.warn( "sorting returned %i" % (retcode) )                
            except OSError, e:
                self.warn( "sorting failed with message: %s" % (e) )
            
            self.info( "sorting finished" )                
Example #3
0
    def __init__(self, *args, **kwargs):
        AddaModuleBlock.__init__(self, *args, **kwargs)

        self.mFilenameInputGraph = self.mConfig.get("files", "input_graph",
                                                    "pairsdb_40x40.links.gz")

        self.mFilenameOutputGraph = self.mConfig.get("files", "output_graph",
                                                     "adda.graph")
        self.mFilenameOutputIndex = self.mConfig.get("files", "output_index",
                                                     "adda.graph.index")

        cadda.setLogLevel(self.mLogLevel)
        cadda.setReportStep(self.mConfig.get("adda", "report_step", 1000))
        cadda.dump_parameters()

        self.mFilenames = (
            self.mFilenameOutputGraph,
            self.mFilenameOutputIndex,
        )

        self.mAlignmentFormat = self.mConfig.get("files", "graph_format",
                                                 "pairsdb")

        if self.mAlignmentFormat == "pairsdb":
            self.mGraphIterator = cadda.PairsDBNeighbourIterator
        elif self.mAlignmentFormat == "pairsdb-old":
            self.mGraphIterator = cadda.PairsDBNeighbourIteratorOldFormat
        elif self.mAlignmentFormat == "simap":
            self.mGraphIterator = AddaIO.NeighbourIteratorSimap
        elif self.mAlignmentFormat == "pairsdb-realign":
            self.mGraphIterator = AddaIO.NeighbourRecordPairsdbRealign
        else:
            raise ValueError("unknown record type %s" % self.mAlignmentFormat)

        self.info("indexing graph in format %s" % (self.mAlignmentFormat, ))
Example #4
0
    def applyMethod(self):
        """index the graph.        
        """

        if self.isComplete(): return

        self.info("construction of minimum spanning tree started")

        cadda.dump_parameters()

        tmpdir = tempfile.mkdtemp(dir=self.mTemporaryDirectory)
        tmpfile = os.path.join(tmpdir, "sorted")

        if not os.path.exists(tmpfile):
            statement = "gunzip < %s | sort -T%s -k3,3n | gzip > %s" % (
                self.mFilenameDomainGraph, tmpdir, tmpfile)

            self.info("sorting started")

            try:
                retcode = subprocess.call(statement, shell=True)
                if retcode < 0:
                    self.warn("sorting was terminated by signal %i" %
                              (-retcode))
                elif retcode > 0:
                    self.warn("sorting returned %i" % (retcode))
            except OSError, e:
                self.warn("sorting failed with message: %s" % (e))

            self.info("sorting finished")
Example #5
0
    def applyMethod(self ):
        """index the graph.        
        """
        
        if self.isComplete(): return

        self.info( "conversion of sequence graph to domain graph started" )
                
        cadda.dump_parameters()

        retval = cadda.convert( self.mFilenameDomainGraph )
        
        if retval == 0:
            self.warn( "domain graph construction failed" )
        else:
            self.info( "domain graph construction success" )
Example #6
0
    def applyMethod(self):
        """index the graph.        
        """

        if self.isComplete(): return

        self.info("conversion of sequence graph to domain graph started")

        cadda.dump_parameters()

        retval = cadda.convert(self.mFilenameDomainGraph)

        if retval == 0:
            self.warn("domain graph construction failed")
        else:
            self.info("domain graph construction success")
Example #7
0
    def applyMethod(self ):
        """index the graph.        
        """

        if self.isComplete(): return
        
        self.info( "setting parameters" )
                
        config = AddaIO.ConfigParser()

        config.read( self.mFilenameFit )                                
        self.mExponentialF = float( config.get( "optimise", "exponential_f" ) )   
        self.mExponentialE = float( config.get( "optimise", "exponential_e" ) )           

        cadda.setFilenameGraph( self.mFilenameGraph )
        cadda.setFilenameIndex( self.mFilenameIndex )
        cadda.setFilenameTransfers( self.mFilenameTransfers )
        cadda.setFilenameDomains( self.mFilenameDomains )        
        cadda.setLogLevel( self.mLogLevel )
        cadda.setReportStep( 1000 )
        cadda.setMaxIterations( self.mMaxIterations )
        cadda.setResolution( self.mResolution )
        cadda.setExponentialF( self.mExponentialF )
        cadda.setExponentialE( self.mExponentialE )
        
        self.info( "optimisation started" )
        
        cadda.dump_parameters()
        
        retval = cadda.optimise_initialise()
        
        if retval == 0:
            self.warn( "initialisation failed" )
        else:
            self.info( "initialisation success" )        

        improvements = []
        domains = [ self.mNSequences ]
        
        for iteration in range( self.mMaxIterations ):
            
            self.info( "iteration %i: started" % iteration)

            t = time.time()

            improvement = cadda.optimise_iteration()
            if improvements:
                rel_improvement = improvement / max(improvements)
            else:
                rel_improvement = 1
                  
            ndomains = cadda.optimise_get_num_partitions()
            
            self.info( "iteration %i: finished in %i seconds: improvement=%f, relative improvement=%f, ndomains=%i" %\
                       (iteration, 
                        time.time() - t,
                        improvement, 
                        rel_improvement, 
                        ndomains) )            

            if cadda.optimise_save_partitions( self.mFilenameDomains ):
                self.info( "domains saved to %s" % self.mFilenameDomains)
            else:
                self.warn( "saving domains to %s failed" % self.mFilenameDomains)
                
            improvements.append( improvement )
            domains.append( ndomains )
                       
            self.plotProgress( improvements, 
                               self.mOutputFilenameProgressImprovement,
                               "progress: improvement" )
            self.plotProgress( domains, 
                               self.mOutputFilenameProgressDomains,
                                "progress: domains" )
            self.plotProgress( map( lambda x: float( x ) / self.mNSequences, domains), 
                               self.mOutputFilenameProgressDomainsPerSequence,
                               "progress: domains per sequence" )
            
            if improvement < self.mMinAbsImprovement:
                self.info( "optimisation stopped because absolute improvement less than %f" %\
                           (self.mMinAbsImprovement) )            
                break

            if rel_improvement < self.mMinRelImprovement:
                self.info( "optimisation stopped because relative improvement less than %f" %\
                           (self.mMinRelImprovement) )            
                break
        else:
            self.info( "optimisation stopped because maximum iteration %i reached" %\
                       (self.mMaxIterations) )            
            
        retval = cadda.optimise_destroy()
        
        if retval == 0:
            self.warn( "destruction failed" )
        else:
            self.info( "destruction success" )