def __init__(self, *args, **kwargs ): AddaModuleBlock.__init__( self, *args, **kwargs ) self.mFilenameInputGraph = self.mConfig.get( "files", "input_graph", "pairsdb_40x40.links.gz") self.mFilenameOutputGraph = self.mConfig.get( "files", "output_graph", "adda.graph") self.mFilenameOutputIndex = self.mConfig.get( "files", "output_index", "adda.graph.index") cadda.setLogLevel( self.mLogLevel ) cadda.setReportStep( self.mConfig.get( "adda", "report_step", 1000 ) ) cadda.dump_parameters() self.mFilenames = (self.mFilenameOutputGraph, self.mFilenameOutputIndex, ) self.mAlignmentFormat = self.mConfig.get( "files", "graph_format", "pairsdb") if self.mAlignmentFormat == "pairsdb": self.mGraphIterator = cadda.PairsDBNeighbourIterator elif self.mAlignmentFormat == "pairsdb-old": self.mGraphIterator = cadda.PairsDBNeighbourIteratorOldFormat elif self.mAlignmentFormat == "simap": self.mGraphIterator = AddaIO.NeighbourIteratorSimap elif self.mAlignmentFormat == "pairsdb-realign": self.mGraphIterator = AddaIO.NeighbourRecordPairsdbRealign else: raise ValueError ("unknown record type %s" % self.mAlignmentFormat) self.info("indexing graph in format %s" % (self.mAlignmentFormat, ))
def finish( self ): """clean up.""" self.mOutfileFamilies.close() self.mOutfile.close() AddaModuleBlock.finish( self )
def __init__(self, *args, **kwargs): AddaModuleBlock.__init__(self, *args, **kwargs) self.mFilenameInputGraph = self.mConfig.get("files", "input_graph", "pairsdb_40x40.links.gz") self.mFilenameOutputGraph = self.mConfig.get("files", "output_graph", "adda.graph") self.mFilenameOutputIndex = self.mConfig.get("files", "output_index", "adda.graph.index") cadda.setLogLevel(self.mLogLevel) cadda.setReportStep(self.mConfig.get("adda", "report_step", 1000)) cadda.dump_parameters() self.mFilenames = ( self.mFilenameOutputGraph, self.mFilenameOutputIndex, ) self.mAlignmentFormat = self.mConfig.get("files", "graph_format", "pairsdb") if self.mAlignmentFormat == "pairsdb": self.mGraphIterator = cadda.PairsDBNeighbourIterator elif self.mAlignmentFormat == "pairsdb-old": self.mGraphIterator = cadda.PairsDBNeighbourIteratorOldFormat elif self.mAlignmentFormat == "simap": self.mGraphIterator = AddaIO.NeighbourIteratorSimap elif self.mAlignmentFormat == "pairsdb-realign": self.mGraphIterator = AddaIO.NeighbourRecordPairsdbRealign else: raise ValueError("unknown record type %s" % self.mAlignmentFormat) self.info("indexing graph in format %s" % (self.mAlignmentFormat, ))
def finish(self): """clean up.""" self.mOutfileFamilies.close() self.mOutfile.close() AddaModuleBlock.finish(self)
def __init__(self, *args, **kwargs): AddaModuleBlock.__init__(self, *args, **kwargs) self.mFilenameClusters = self.mConfig.get("files", "output_clusters", "adda.clusters") self.mFilenameAlignments = self.mConfig.get("files", "output_align", "adda.align") self.mFilenameNids = self.mConfig.get("files", "output_nids", "adda.nids") self.mFilenameGraph = self.mConfig.get("files", "output_graph", "adda.graph") self.mFilenameIndex = self.mConfig.get("files", "output_index", "adda.graph.index") self.mFilenameProfiles = self.mConfig.get("files", "output_profiles", "adda.profiles") self.mFilenameTransfers = self.mConfig.get("files", "output_fit_transfer", "adda.fit.transfer") self.mFilenameDomains = self.mConfig.get("files", "output_domains", "adda.domains") self.mFilenameSummary = self.mConfig.get("files", "output_summary", "adda.summary") self.mFilenameDomainGraph = self.mConfig.get("files", "output_domain_graph", "adda.domaingraph.gz") self.mFilenameMst = self.mConfig.get("files", "output_mst", "adda.mst") self.mFilenameResult = self.mConfig.get("files", "output_result", "adda.result") self.mFilenameAlignments = self.mConfig.get("files", "output_align", "adda.align") self.mFilenameSegments = self.mConfig.get("files", "output_segments", "adda.segments") self.mFilenames = (self.mFilenameSummary, )
def __init__(self, *args, **kwargs ): AddaModuleBlock.__init__( self, *args, **kwargs ) self.mFilenamesNids = self.mConfig.get( "files", "output_nids", "adda.nids" ) self.mMinAlignedResidues = self.mConfig.get("cluster", "min_aligned_residues", 30 ) self.mPatternFamily = self.mConfig.get("cluster", "pattern_family", "AD%06i" )
def __init__(self, *args, **kwargs ): AddaModuleBlock.__init__( self, *args, **kwargs ) self.mFilenameGraph = self.mConfig.get("files", "output_graph", "adda.graph" ) self.mFilenameIndex = self.mConfig.get("files", "output_index", "adda.graph.idx" ) self.mFilenameNids = self.mConfig.get("files", "output_nids", "adda.nids" ) self.mFilenameStats = self.mConfig.get("files", "output_stats", "adda.stats" ) self.mFilenameStatsSequences = self.mFilenameStats + ".persequence" self.mFilenames = (self.mFilenameStats, self.mFilenameStatsSequences )
def __init__(self, *args, **kwargs ): AddaModuleBlock.__init__( self, *args, **kwargs ) self.mFilenameOutputFasta = self.mConfig.get( "files", "output_fasta", "adda" ) self.mBlastResults = self.mConfig.get( "files", "output_blast", "adda.blast.gz" ) self.mBlastDatabase = self.mConfig.get( "blast", "database", "adda" ) self.mBlastCPUs = self.mConfig.get( "blast", "num_cpus", 2 ) self.mBlastEvalue = self.mConfig.get( "blast", "evalue", 1.0 ) self.mBlastNumResults = self.mConfig.get( "blast", "num_results", 100000 )
def __init__(self, *args, **kwargs ): AddaModuleBlock.__init__( self, *args, **kwargs ) self.mFilenameNids = self.mConfig.get( "files", "output_nids", "adda.nids" ) self.mFilenameInputFasta = self.mConfig.get( "files", "input_fasta" ) self.mFilenameOutputFasta = self.mConfig.get( "files", "output_fasta", "adda" ) self.mMaxSequenceLength = self.mConfig.get( "segments", "max_sequence_length", 10000 ) self.mFilenames = (self.mFilenameNids, )
def __init__(self, *args, **kwargs ): AddaModuleBlock.__init__( self, *args, **kwargs ) self.mFilenameDomainGraph = self.mConfig.get( "output", "domaingraph", "adda.domaingraph.gz" ) self.mFilenameMst = self.mConfig.get( "output", "mst", "adda.mst" ) cadda.setLogLevel( self.mLogLevel ) # cadda.setReportStep( 1 ) self.mFilenames = ( self.mFilenameMst, )
def __init__(self, *args, **kwargs): AddaModuleBlock.__init__(self, *args, **kwargs) self.mFilenamesNids = self.mConfig.get("files", "output_nids", "adda.nids") self.mMinAlignedResidues = self.mConfig.get("cluster", "min_aligned_residues", 30) self.mPatternFamily = self.mConfig.get("cluster", "pattern_family", "AD%06i")
def __init__(self, *args, **kwargs): AddaModuleBlock.__init__(self, *args, **kwargs) self.mFilenameDomainGraph = self.mConfig.get("files", "output_domaingraph", "adda.domaingraph.gz") self.mFilenameMst = self.mConfig.get("files", "output_mst", "adda.mst") cadda.setLogLevel(self.mLogLevel) # cadda.setReportStep( 1 ) self.mFilenames = (self.mFilenameMst, )
def __init__(self, *args, **kwargs ): AddaModuleBlock.__init__( self, *args, **kwargs ) self.mFilenameClusters = self.mConfig.get( "output", "clusters", "adda.clusters" ) self.mFilenameFamilies = self.mConfig.get( "output", "families", "adda.families" ) self.mFilenameDomains = self.mConfig.get( "output", "adda", "adda.result" ) self.mFilenamesNids = self.mConfig.get( "output", "nids", "adda.nids" ) self.mFilenames = (self.mFilenameFamilies, self.mFilenameDomains ) self.mPatternFamily = self.mConfig.get("cluster", "pattern_family", "AD%06i" ) self.mMinDomainSize = self.mConfig.get("adda", "min_domain_size", 30 )
def __init__(self, *args, **kwargs): AddaModuleBlock.__init__(self, *args, **kwargs) self.mFilenameOutputFasta = self.mConfig.get("files", "output_fasta", "adda") self.mBlastResults = self.mConfig.get("files", "output_blast", "adda.blast.gz") self.mBlastDatabase = self.mConfig.get("blast", "database", "adda") self.mBlastCPUs = self.mConfig.get("blast", "num_cpus", 2) self.mBlastEvalue = self.mConfig.get("blast", "evalue", 1.0) self.mBlastNumResults = self.mConfig.get("blast", "num_results", 100000)
def __init__(self, *args, **kwargs): AddaModuleBlock.__init__(self, *args, **kwargs) self.mFilenameNids = self.mConfig.get("files", "output_nids", "adda.nids") self.mFilenameInputFasta = self.mConfig.get("files", "input_fasta") self.mFilenameOutputFasta = self.mConfig.get("files", "output_fasta", "adda") self.mMaxSequenceLength = self.mConfig.get("segments", "max_sequence_length", 10000) self.mFilenames = (self.mFilenameNids, )
def __init__(self, *args, **kwargs ): AddaModuleBlock.__init__( self, *args, **kwargs ) self.mFilenameGraph = self.mConfig.get( "output", "graph", "adda.graph" ) self.mFilenameDomains = self.mConfig.get( "output", "domains", "adda.domains" ) self.mEvalueThresholdTrustedLinks = float(self.mConfig.get( "align", "evalue_threshold_trusted_links", -12.0 )) self.mFilenameDomainGraph = self.mConfig.get( "output", "domaingraph", "adda.domaingraph.gz" ) cadda.setFilenameGraph( self.mFilenameGraph ) cadda.setFilenameDomains( self.mFilenameDomains ) cadda.setLogLevel( self.mLogLevel ) cadda.setEvalueThresholdTrustedLinks( self.mEvalueThresholdTrustedLinks ) self.mFilenames = (self.mFilenameDomainGraph, )
def __init__(self, *args, **kwargs): AddaModuleBlock.__init__(self, *args, **kwargs) self.mFilenameClusters = self.mConfig.get("files", "output_clusters", "adda.clusters") self.mFilenameFamilies = self.mConfig.get("files", "output_families", "adda.families") self.mFilenameDomains = self.mConfig.get("files", "output_adda", "adda.result") self.mFilenamesNids = self.mConfig.get("files", "output_nids", "adda.nids") self.mFilenames = (self.mFilenameFamilies, self.mFilenameDomains) self.mPatternFamily = self.mConfig.get("cluster", "pattern_family", "AD%06i") self.mMinDomainSize = self.mConfig.get("adda", "min_domain_size", 30)
def __init__(self, *args, **kwargs ): AddaModuleBlock.__init__( self, *args, **kwargs ) self.mFilenameClusters = self.mConfig.get( "output", "clusters", "adda.clusters" ) self.mFilenameAlignments = self.mConfig.get("output","align", "adda.align" ) self.mFilenameNids = self.mConfig.get( "output", "nids", "adda.nids" ) self.mFilenameGraph = self.mConfig.get( "output", "graph", "adda.graph") self.mFilenameIndex = self.mConfig.get( "output", "index", "adda.graph.index") self.mFilenameProfiles = self.mConfig.get( "output", "profiles", "adda.profiles" ) self.mFilenameTransfers = self.mConfig.get( "output", "fit_transfer", "adda.fit.transfer" ) self.mFilenameDomains = self.mConfig.get( "output", "domains", "adda.domains" ) self.mFilenameSummary = self.mConfig.get( "output", "summary", "adda.summary" ) self.mFilenameDomainGraph = self.mConfig.get( "output", "domain_graph", "adda.domaingraph.gz" ) self.mFilenameMst = self.mConfig.get( "output", "mst", "adda.mst" ) self.mFilenameResult = self.mConfig.get( "output", "result", "adda.result" ) self.mFilenameAlignments = self.mConfig.get("output","align", "adda.align" ) self.mFilenameSegments = self.mConfig.get("output","segments", "adda.segments" ) self.mFilenames = (self.mFilenameSummary,)
def __init__(self, *args, **kwargs): AddaModuleBlock.__init__(self, *args, **kwargs) self.mFilenameGraph = self.mConfig.get("files", "output_graph", "adda.graph") self.mFilenameDomains = self.mConfig.get("files", "output_domains", "adda.domains") self.mEvalueThresholdTrustedLinks = float( self.mConfig.get("align", "evalue_threshold_trusted_links", -12.0)) self.mFilenameDomainGraph = self.mConfig.get("files", "output_domaingraph", "adda.domaingraph.gz") cadda.setFilenameGraph(self.mFilenameGraph) cadda.setFilenameDomains(self.mFilenameDomains) cadda.setLogLevel(self.mLogLevel) cadda.setEvalueThresholdTrustedLinks(self.mEvalueThresholdTrustedLinks) self.mFilenames = (self.mFilenameDomainGraph, )
def __init__(self, *args, **kwargs ): AddaModuleBlock.__init__( self, *args, **kwargs ) self.mFilenameGraph = self.mConfig.get( "files", "output_graph") self.mFilenameIndex = self.mConfig.get( "files", "output_index") self.mFilenameTransfers = self.mConfig.get( "files", "output_fit_transfer" ) self.mFilenameFit = self.mConfig.get( "files", "output_fit", "adda.fit" ) self.mFilenameNids = self.mConfig.get( "files", "output_nids" ) self.mFilenameDomains = self.mConfig.get( "files", "output_domains" ) self.mMaxIterations = int( self.mConfig.get( "optimise", "iterations" ) ) self.mResolution = float( self.mConfig.get( "optimise", "resolution" ) ) self.mMinAbsImprovement = float(self.mConfig.get( "optimise", "min_abs_improvement" )) self.mMinRelImprovement = float(self.mConfig.get( "optimise", "min_rel_improvement" )) self.mOutputFilenameProgressImprovement = self.mFilenameDomains + "_progress_improvement.png" self.mOutputFilenameProgressDomains = self.mFilenameDomains + "_progress_domains.png" self.mOutputFilenameProgressDomainsPerSequence = self.mFilenameDomains + "_progress_domains_per_sequence.png" self.mNSequences = len(self.mFasta) self.mFilenames = ( self.mFilenameDomains, )
def finish(self): self.info( "sequences: %i input, %i output, %i removed, %i duplicates" %\ (self.mInput, self.mOutput, self.mRemoved, self.mDuplicates ) ) AddaModuleBlock.finish(self)
def finish(self): self.info( "sequences: %i input, %i output, %i removed, %i duplicates" %\ (self.mInput, self.mOutput, self.mRemoved, self.mDuplicates ) ) AddaModuleBlock.finish( self )