def initialize(self): # Get the main keyword that defines the strategy self.strategy = sys.argv[1] Logger.get_instance().info("Chosen strategy = " + self.strategy) # If the strategy is not known, raise an exception if self.strategy not in OptionConstants.STRATEGIES_LIST: raise RainetException( "OptionManager.initialize() : The main keyword is not correct : '" + self.strategy + "'. Should be one of " + str(OptionConstants.STRATEGIES_LIST)) # Build an option parser to collect the option values option_parser = OptionParser() for current_prop_list in OptionConstants.OPTION_LIST[self.strategy]: option_parser.add_option(current_prop_list[0], current_prop_list[1], action=current_prop_list[2], type=current_prop_list[3], dest=current_prop_list[4], default=current_prop_list[5], help=current_prop_list[6]) # Get the various option values into a dictionary (opts, args) = option_parser.parse_args() self.optionDict = vars(opts)
def write_output(self): #=============================================================================== # Output file #=============================================================================== # proteinID\tOMIM_ID\tOMIM_description # Note: output file only contains proteins that have any associated disease # Note: a protein may have several diseases, the output file contains one line per disease of a protein outFile = open(self.outputFolder + OMIMProteinDisease.OUTPUT_FILE, "w") notFound = set() nEntries = 0 for prot in self.proteinOMIMDict: for omimID in self.proteinOMIMDict[prot]: # Important: all OMIM IDs should be found if using updated mimTitles.txt file if omimID in self.minTitlesDict: outFile.write("%s\t%s\t%s\n" % (prot, omimID, self.minTitlesDict[omimID])) nEntries += 1 else: Logger.get_instance().warning( "write_output: %s OMIM ID not found in mimTitles.txt" % omimID) notFound.add(omimID) print "write_output: %s OMIM IDs not found." % len(notFound) print "write_output: wrote %s protein-disease association entries." % nEntries outFile.close()
def read_rainet_db(self): rnaCrossReference = {} # key -> transcript ID, val -> ensembl Gene ID #=============================================================================== # Query RNA table, which contains gene ID #=============================================================================== query = self.sql_session.query(RNA.transcriptID, RNA.geneID).all() # Note: an gene name points to several ensembl IDs # Correspondence should be many-to-1 (a transcript can only have an associated gene, a gene can have many transcripts) geneSet = set() for transcriptID, geneID in query: if transcriptID in rnaCrossReference: raise RainetException( "LncRNAGroupOddsRatio.read_rainet_db: duplicate transcript ID " ) rnaCrossReference[transcriptID] = str(geneID) geneSet.add(geneID) Logger.get_instance().info( "LncRNAGroupOddsRatio.read_rainet_db : Number transcripts read %s" % (len(rnaCrossReference))) Logger.get_instance().info( "LncRNAGroupOddsRatio.read_rainet_db : Number genes read %s" % (len(geneSet))) self.rnaCrossReference = rnaCrossReference
def parse_file(file_path, class_tag, class_regex, protein_tag, annotation_tag, comment_char, clean_table=True): # Build the Factory to Clean the table data_factory = DataFactory( DataConstants.INTERACTOME_NETWORK_PARTITION_ANNOTATION_CLASS) if clean_table: data_factory.clean_table() # Parse the file to get the modules NetworkModuleAnnotationParser.process_modules(file_path, class_tag, class_regex, protein_tag, annotation_tag, comment_char) # Commit the SQLAlchemy session Logger.get_instance().info( "NetworkModuleAnnotationParser.parse_file : Committing SQL session." ) SQLManager.get_instance().commit() return Constants.STATUS_OK
def read_protein_rna_interactions_file(self): # Example format # ENST00000600002|Q9UKA9 -1.7 1 162 # ENST00000602495|Q92804 -3.22 1 34000 transcriptExperimentalInteractions = { } # key -> transcript ID, value -> set of interacting proteins count = 0 with open(self.listTxProteinInteractions, "r") as inFile: for line in inFile: spl = line.strip().split("\t") transcriptID, proteinID = spl[0].split("|") if transcriptID not in transcriptExperimentalInteractions: transcriptExperimentalInteractions[transcriptID] = set() transcriptExperimentalInteractions[transcriptID].add(proteinID) count += 1 Logger.get_instance().info( "read_protein_rna_interactions_file: read %s interactions" % count) Logger.get_instance().info( "read_protein_rna_interactions_file: %s transcripts with interactions" % len(transcriptExperimentalInteractions)) self.transcriptExperimentalInteractions = transcriptExperimentalInteractions
def read_properties(self, file_path): Logger.get_instance().info("Reading properties from file : " + file_path) config_parser = ConfigParser() config_parser.read(file_path) for section in config_parser.sections(): options = config_parser.options(section) for option in options: try: option = option.lower() self.propertiesDict[option] = config_parser.get( section, option) if self.propertiesDict[option] == None: raise RainetException( "PropertyManager.readProperties : Wrong property definition for key = " + option + " in section " + section + " of file " + file_path) except: raise RainetException( "PropertyManager.readProperties : Abnormal property definition for key = " + option + " in section " + section + " of file " + file_path)
def parse_file( file_path, go_id_tag, go_name_tag, go_namespace_tag, clean_table = True): # Parse the file go_term_list = OboParser.processGOOBO( file_path) # Build the Factory to clean the table data_factory = DataFactory( DataConstants.GENE_ONTOLOGY_CLASS) if clean_table: data_factory.clean_table() # Get the SQL session sql_session = SQLManager.get_instance().get_session() # Build the GeneOntology objects from the parsing result for go_term in go_term_list: go_id = go_term[ go_id_tag] go_name = go_term[ go_name_tag] go_namespace = go_term[ go_namespace_tag] if go_id != None and go_id != '' and go_name != None and go_name != '' and go_namespace != None and go_namespace != '': Logger.get_instance().debug( "OboParser.parse_file : inserting GO " + go_id + " " + go_name + " " + go_namespace) new_go = GeneOntology( go_id, go_name, go_namespace) sql_session.add( new_go) # Commit the SQLAlchemy session Logger.get_instance().info( "OboParser.parse_file : Committing SQL session.") SQLManager.get_instance().commit(); return Constants.STATUS_OK
def calculate_protein_degree(self): degreeDict = {} # key -> uniprotID, val -> degree of protein proteinsPerDegreeDict = { } # key -> degree, val -> set of proteins with that degree g = self.graph proteinNotFound = set() # for each vertice in graph for v in g.vs: # look for protein name using index try: proteinName = self.proteinGraphIDDict[v.index] except KeyError: if v.index == 0: continue else: raise RainetException( "NetworkScoreAnalysis.calculate_protein_degree : Could not find protein name for graph index %s" % (v.index)) # try: # proteinUniprotAC = self.proteinIDMappingDict[ proteinName] # except KeyError: # Logger.get_instance().warning( "NetworkScoreAnalysis.calculate_protein_degree : Could not find protein uniprotac for protein name %s. We will be converting here to uniprotAC" % ( proteinName ) ) # # if "_HUMAN" in proteinName: # proteinUniprotAC = proteinName.split("_")[0] # else: # proteinNotFound.add( proteinName) # continue if proteinName not in degreeDict: degreeDict[proteinName] = v.degree() else: raise RainetException( "NetworkScoreAnalysis.calculate_protein_degree : duplicate uniprotAC %s" % (proteinName)) Logger.get_instance().info( "NetworkScoreAnalysis.calculate_protein_degree : Could not find proteins uniprotac for protein names %s. These were discarded." % (proteinNotFound)) # create dictionary with list of proteins per degree for prot in degreeDict: degree = degreeDict[prot] if degree not in proteinsPerDegreeDict: proteinsPerDegreeDict[degree] = set() proteinsPerDegreeDict[degree].add(prot) ## list of degrees available, so that closest degree to the degree of interest can be probed #sortedDegrees = sorted( proteinsPerDegreeDict.keys()) self.degreeDict = degreeDict self.proteinsPerDegreeDict = proteinsPerDegreeDict
def remove_database_file(self, path): try: remove(path) Logger.get_instance().info('File : "' + str(path) + '" deleted.') except: Logger.get_instance().info('Can not delete file : ' + str(path)) pass
def find_protein(self, uniprot_ac_list): try: return SQLUtil.find_unique_protein_from_uniprotAC_list( uniprot_ac_list) except RainetException as e: Logger.get_instance().debug(e.to_string()) return None
def pick_top_proteins(self): #======================================================================= # pick top X proteins for each RNA #======================================================================= # store proteins that are in top interactors but not present in network, and therefore skipped in analysis skippedProteins = set() rnaTops = { } # key -> transcript ID, value -> dict. key -> topValue, value -> list of Top X proteins for rna in self.rnaTargets: sortedScores = sorted(self.rnaTargets[rna].keys(), reverse=True) if rna not in rnaTops: rnaTops[rna] = {} for topVal in self.topPartners: if topVal not in rnaTops[rna]: rnaTops[rna][topVal] = [] # switch of whether to keep searching for more scores to get more top proteins boo = 1 for score in sortedScores: if boo: for prot in self.rnaTargets[rna][score]: # add top proteins to rnaTops if top limit is not yet reached if len(rnaTops[rna][topVal]) < topVal: # check if protein present in provided graph, otherwise pick next protein if prot in self.dictNames: rnaTops[rna][topVal].append(prot) else: skippedProteins.add(prot) else: # if top is full, stop searching for more proteins boo = 0 break # Warn if there is not enough proteins to fill top # transcript is not analysed as it wouldn't be comparable to others if len(rnaTops[rna][topVal]) < topVal: Logger.get_instance().warning( "NetworkScoreAnalysis.pick_top_proteins : %s does not have enough interactions (has %s) to fill provided top %s. No results for this transcript." % (rna, len(rnaTops[rna][topVal]), topVal)) del rnaTops[rna][topVal] # Warn about how many top proteins skipped because they are not present in PPI network # the fact that a protein is not in the PPI network, does not mean that protein has no known interactions, but that we can't easily apply any metrics, therefore we ignore those cases Logger.get_instance().warning( "NetworkScoreAnalysis.pick_top_proteins : %s. 'Top' proteins skipped for not being present in PPI network: %s. List: %s" % (rna, len(skippedProteins), ",".join(skippedProteins))) self.rnaTops = rnaTops
def open_text_w(path): try: file_handle = open(path, 'w') except IOError as detail: Logger.get_instance().critical\ ("IOError: Unable to open " + path + " : " + str(detail), detail) exit() return file_handle
def open_text_r(path): try: file_handle = open(path, 'r') except IOError as ioe: Logger.get_instance().critical\ ("FileUtils.open_text_r : IOError: Unable to open '" + path + "' : " + str(ioe)) raise RainetException( "Unable to open file '" + path + "' : " + str(ioe), ioe) return file_handle
def perform_query(self, sql_session, query_string): full_query = 'sql_session.' + query_string Logger.get_instance().info( "InteractiveQueryStrategy.perform_query : query is '" + full_query + "'") try: query_result = eval( full_query) except Exception as ex: raise RainetException( "InteractiveQueryStrategy.perform_query : Exception occurred during query on DB", ex) return query_result
def run(self): Logger.get_instance().info("TemplateScriptWithClass.run: Starting...") #=================================================================== # Initialising datasets #=================================================================== Timer.get_instance().step("Initialising interaction datasets..") self.function_one()
def setUp(self): # Set the options # Note: if running from command line / main script, the optionManager gets the default values, # but in unittest we must set up all arguments, whether optional or not. # In the actual unittests I may override the default options, for testing. optionManager = OptionManager.get_instance() optionManager.set_option(OptionConstants.OPTION_VERBOSITY, "debug") optionManager.set_option( OptionConstants.OPTION_DB_NAME, "/home/diogo/Documents/RAINET_data/TAGC/rainetDatabase/db_testing/rainet_testing_DB.sqlite" ) #rainet2016-06-17.human_expression_wPRI.sqlite") optionManager.set_option(OptionConstants.OPTION_SPECIES, "human") optionManager.set_option( OptionConstants.OPTION_OUTPUT_FOLDER, "/home/diogo/workspace/tagc-rainet-RNA/test/fr/tagc/rainet/core/test_results/enrichmentAnalysis/" ) optionManager.set_option(OptionConstants.OPTION_ANNOTATION_TABLE, "NetworkModule") optionManager.set_option( OptionConstants.OPTION_MINIMUM_PROTEIN_ANNOTATION, OptionConstants.DEFAULT_MINIMUM_PROTEIN_ANNOTATION) optionManager.set_option( OptionConstants.OPTION_MINIMUM_PROTEIN_INTERACTION, OptionConstants.DEFAULT_MINIMUM_PROTEIN_INTERACTION) optionManager.set_option(OptionConstants.OPTION_NUMBER_RANDOMIZATIONS, OptionConstants.DEFAULT_NUMBER_RANDOMIZATIONS) optionManager.set_option(OptionConstants.OPTION_EXPRESSION_WARNING, OptionConstants.DEFAULT_EXPRESSION_WARNING) optionManager.set_option(OptionConstants.OPTION_MINIMUM_EXPRESSION, OptionConstants.DEFAULT_MINIMUM_EXPRESSION) optionManager.set_option(OptionConstants.OPTION_LOWER_TAIL, OptionConstants.DEFAULT_LOWER_TAIL) # Set the level of verbosity Logger.get_instance().set_level( OptionManager.get_instance().get_option( OptionConstants.OPTION_VERBOSITY)) # Setting up SQL manager SQLManager.get_instance().set_DBpath( OptionManager.get_instance().get_option( OptionConstants.OPTION_DB_NAME)) self.sql_session = SQLManager.get_instance().get_session() # setting up internal test folder paths self.expectedFolder = "/home/diogo/workspace/tagc-rainet-RNA/test/fr/tagc/rainet/core/enrichmentAnalysis/test_expected" self.outputFolder = OptionManager.get_instance().get_option( OptionConstants.OPTION_OUTPUT_FOLDER) # create instance of strategy self.run = EnrichmentAnalysisStrategy() self.run.sql_session = SQLManager.get_instance().get_session()
def stop_chrono(self, message): current_time = time.time() step_duration = current_time - self.lastTime total_duration = current_time - self.start_time Logger.get_instance().info("Step duration : " + Timer.format_duration(step_duration)) Logger.get_instance().info("\n\nSTOP CHRONO : " + message + ". Total duration " + Timer.format_duration(total_duration)) self.lastTime = 0 self.start_time = 0
def read_network_file(self): listOfTuples = [] #e.g. [ (0,1), (1,2), (0,2) ] listOfNames = [] #e.g. [('PRRT3_HUMAN', 'TMM17_HUMAN')] dictNames = { } # key -> protein name, value -> graph index (internal id) idCounter = 0 with open(self.networkFile, "r") as inFile: for line in inFile: line = line.strip() node1, node2 = line.split("\t") if node1 not in dictNames: idCounter += 1 dictNames[node1] = idCounter if node2 not in dictNames: idCounter += 1 dictNames[node2] = idCounter names = (node1, node2) ids = (dictNames[node1], dictNames[node2]) listOfNames.append(names) listOfTuples.append(ids) assert (len(listOfNames) == len(listOfTuples)) graph = igraph.Graph(listOfTuples) Logger.get_instance().info( "NetworkScoreAnalysis.read_network_file : Number of PPIs %s" % (len(listOfNames))) Logger.get_instance().info( "NetworkScoreAnalysis.read_network_file : Number of unique proteins %s" % (len(dictNames))) # swap dictionary to have graph index as keys (note that there is 1-to-1 correspondence) proteinGraphIDDict = { dictNames[prot]: prot for prot in dictNames } # key -> graph index (internal id), value -> protein name assert (len(dictNames) == len(dictNames)) self.graph = graph self.proteinGraphIDDict = proteinGraphIDDict self.dictNames = dictNames # for testing purposes return graph, listOfNames, listOfTuples, dictNames
def setUp(self): # Set the options # Note: if running from command line / main script, the optionManager gets the default values, # but in unittest we must set up all arguments, whether optional or not. # In the actual unittests I may override the default options, for testing. optionManager = OptionManager.get_instance() optionManager.set_option(OptionConstants.OPTION_VERBOSITY, "debug") # optionManager.set_option(OptionConstants.OPTION_DB_NAME, "/home/diogo/Documents/RAINET_data/TAGC/rainetDatabase/db_testing/rainet2016-06-17.human_expression_wPRI.sqlite") optionManager.set_option(OptionConstants.OPTION_DB_NAME, DB_PATH) optionManager.set_option(OptionConstants.OPTION_SPECIES, "human") optionManager.set_option( OptionConstants.OPTION_OUTPUT_FOLDER, "/home/diogo/workspace/tagc-rainet-RNA/test/fr/tagc/rainet/core/test_results/" ) optionManager.set_option( OptionConstants.OPTION_MINIMUM_INTERACTION_SCORE, OptionConstants.DEFAULT_INTERACTION_SCORE) optionManager.set_option(OptionConstants.OPTION_RNA_BIOTYPES, OptionConstants.DEFAULT_RNA_BIOTYPES) optionManager.set_option(OptionConstants.OPTION_GENCODE, OptionConstants.DEFAULT_GENCODE) optionManager.set_option( OptionConstants.OPTION_EXPRESSION_VALUE_CUTOFF, OptionConstants.DEFAULT_EXPRESSION_VALUE_CUTOFF) optionManager.set_option( OptionConstants.OPTION_EXPRESSION_TISSUE_CUTOFF, OptionConstants.DEFAULT_EXPRESSION_TISSUE_CUTOFF) optionManager.set_option(OptionConstants.OPTION_LOW_MEMORY, OptionConstants.DEFAULT_LOW_MEMORY) # Set the level of verbosity Logger.get_instance().set_level( OptionManager.get_instance().get_option( OptionConstants.OPTION_VERBOSITY)) # Setting up SQL manager SQLManager.get_instance().set_DBpath(DB_PATH) self.sql_session = SQLManager.get_instance().get_session() # setting up internal test folder paths self.expectedFolder = "/home/diogo/workspace/tagc-rainet-RNA/test/fr/tagc/rainet/core/test_expected/Report" self.outputFolder = OptionManager.get_instance().get_option( OptionConstants.OPTION_OUTPUT_FOLDER) + "/Report/" # create instance of strategy self.strategy = AnalysisStrategy() # report only written for selected tests self.strategy.writeReportFile = 0
def execute( self ): # Get the option values self.DBPath = OptionManager.get_instance().get_option( OptionConstants.OPTION_DB_NAME ) query_file_path = OptionManager.get_instance().get_option( OptionConstants.OPTION_QUERY_FILE ) species = OptionManager.get_instance().get_option( OptionConstants.OPTION_SPECIES ) # Check if a species has been provided if species == None or len( species) == 0: raise RainetException( "InteractiveQueryStrategy.execute: You must specify a species in your command line. Please check help.") # Build a SQL session to DB SQLManager.get_instance().set_DBpath( self.DBPath) sql_session = SQLManager.get_instance().get_session() # Check if the species in the DB correspond to the species given by the user SQLManager.check_species(sql_session, species, True) # Read the query from file Logger.get_instance().info("InteractiveQueryStrategy.execute : Reading query...") query_string = self.read_query( query_file_path) # Get the result of the query on DB Logger.get_instance().info("InteractiveQueryStrategy.execute : Querying database...") query_result = self.perform_query( sql_session, query_string) # Export query result to file Logger.get_instance().info("InteractiveQueryStrategy.execute : Exporting result...") self.export_query_result( query_result, query_file_path) Logger.get_instance().info("InteractiveQueryStrategy.execute : Finished.")
def read_query(self, query_file_path): try: file_handler = FileUtils.open_text_r( query_file_path) except RainetException: Logger.get_instance().error( "InteractiveQueryStrategy.read_query : Unable to open query file : " + query_file_path) raise RainetException( "InteractiveQueryStrategy.read_query : Unable to open query file : " + query_file_path) line = file_handler.readline() query_string = '' while line != None and line != '': query_string = query_string + line[0:-1] line = file_handler.readline() return query_string
def __init__( self, transcript_id ): #======================================================================= # Query RNA transcript ID in respective DataManager object #======================================================================= from fr.tagc.rainet.core.util.data.DataManager import DataManager allRNAs = DataManager.get_instance().get_data(DataConstants.RNA_ALL_KW) if transcript_id in allRNAs: self.transcriptID = transcript_id else: Logger.get_instance().warning( "\n InteractingRNA.init : Transcript ID not found:\t" + str( transcript_id) ) raise NotRequiredInstantiationException( "InteractingRNA.init : no insertion needed, RNA object not found.")
def _get_new_degree(self, degree, proteinsPerDegreeDictCopy): sortedDegrees = sorted(proteinsPerDegreeDictCopy.keys()) # get list of all degrees except current degree otherDegrees = filter(lambda a: a != degree, sortedDegrees) # calculate distances of all degrees against ours and return the closest degree newDegree = min(otherDegrees, key=lambda x: abs(x - degree)) Logger.get_instance().warning( "NetworkScoreAnalysis._get_new_degree : No more proteins with degree %s. Sampling closest degree: %s." % (degree, newDegree)) return newDegree
def set_option(self, option_name, option_value): if self.optionDict == None: self.optionDict = {} if option_name != None and len(option_name) > 0: if option_value != None: self.optionDict[option_name] = option_value else: Logger.get_instance().warning( "OptionManager.set_option: Trying to pass a None or void value on option dictionary for key : " + option_name) else: Logger.get_instance().warning( "OptionManager.set_option: Trying to pass a None or void key on option dictionary." )
def perform_query(self, keyword, query_string): sql_session = SQLManager.get_instance().get_session() full_query = 'sql_session.' + query_string Logger.get_instance().info("DataManager.init : query is '" + full_query + "'") try: query_result = eval(full_query) except Exception as ex: raise RainetException( "DataManager.init : Exception occurred during query on DB", ex) self.data[keyword] = query_result
def launch_insertion_TSV(self, file_path, has_headers, headers, class_name, params, default_values, comment_char, table_extension="", clean_table=True): composite_table_name = class_name + table_extension try: # Start timing Timer.get_instance().step("Inserting " + composite_table_name + ":") # Check if the insertion is required not not, depending on the fact the data # were already inserted and theforceOverride option if SQLUtil.insert_data_required(class_name, self.forceOverride, table_extension): Logger.get_instance().info("|--Starting insertion...") status = TSVParser.parse_file(file_path, has_headers, headers, class_name, params, default_values, comment_char, clean_table) else: Logger.get_instance().info( "|--Data already inserted: insertion bypassed.") status = None # If an exception is raised by controlled code, status of insertion is set to "Rainet Error" except RainetException as re: Logger.get_instance().error(re.to_string()) status = Constants.STATUS_RAINET_ERROR raise re # If an exception is raised by uncontrolled code, status of insertion is set to "Error" except Exception as e: Logger.get_instance().error(e.message) status = Constants.STATUS_ERROR raise RainetException( "Abnormal Exception during insertion of " + class_name, e) # In all case insert or update the status of the insertion to TableStatus table in DB finally: if status != None: sql_session = SQLManager.get_instance().get_session() db_status_list = sql_session.query(TableStatus).filter( TableStatus.tableName == composite_table_name).all() if db_status_list == None or len(db_status_list) == 0: sql_session.add( TableStatus(composite_table_name, status, file_path)) else: db_status = db_status_list[0] db_status.tableStatus = status sql_session.add(db_status) SQLManager.get_instance().commit()
def __init__(self, peptide_id): #======================================================================= # Query Protein uniprot_ac using cross references # From Cross ref ID, search primary ID using cross reference DataManager item created during insertion # Note: assuming that if uniprot_ac exists in protein cross reference table it will also exist in protein table. #======================================================================= from fr.tagc.rainet.core.util.data.DataManager import DataManager proteinXrefs = DataManager.get_instance().get_data( DataConstants.PROTEIN_ENSP_XREF_KW) if peptide_id in proteinXrefs: self.proteinID = proteinXrefs[peptide_id][0] else: Logger.get_instance().debug( "\n MRNA.init : Peptide ID not found:\t" + str(peptide_id))
def associate_proteins(module, line): token_list = line.split(" ") sql_session = SQLManager.get_instance().get_session() if token_list != None and len(token_list) > 0: for token in token_list: # Get the protein with the token as ID (uniprotAD or cross reference) # Search first in Protein table both with Uniprot AC and Uniprot ID # then with CrossReference protein_list = sql_session.query(Protein).filter( or_(Protein.uniprotID == token, Protein.uniprotAC == token)).all() if protein_list == None or len(protein_list) == 0: protein_list = sql_session.query(Protein).filter( Protein.uniprotAC == ProteinCrossReference.protein_id, ProteinCrossReference.crossReferenceID == token).all() # Check if a single Protein is found. If not, raise an issue or report problem protein = None if protein_list != None and len(protein_list) > 0: if len(protein_list) == 1: protein = protein_list[0] sql_session.add(protein) else: raise RainetException( "NetworkModuleParser.associate_proteins : Abnormal number of Protein found for ID = " + token + " : " + str(len(protein_list))) else: Logger.get_instance().debug( "NetworkModuleParser.associate_proteins : No Protein found for ID = " + token) continue # if a protein is found, associate it to the module if protein != None: module.add_associated_protein(protein) else: raise RainetException( "NetworkModuleParser.associate_proteins : Protein found is none for ID = " + token) else: raise RainetException( "NetworkModuleParser.associate_proteins : list of proteins is empty or None : " + line)
def filter_by_specificity(self, filtered_enrichment_results, annot_dict, lnc_dict): notPassingFilters = 0 filteredEnrichmentResults = [] # write header filteredEnrichmentResults.append(filtered_enrichment_results[0]) for enrich in filtered_enrichment_results[1:]: spl = enrich.split("\t") annotID = spl[1] transcriptID = spl[0] # keep only enrichments where number of enriched transcripts is lower or equal to self.annotSpecificityFilter, or if filter is OFF if self.annotSpecificityFilter == -1 and self.transcriptSpecificityFilter == -1: filteredEnrichmentResults.append(enrich) elif self.annotSpecificityFilter == -1 and len( lnc_dict[transcriptID] ) <= self.transcriptSpecificityFilter: filteredEnrichmentResults.append(enrich) elif len( annot_dict[annotID] ) <= self.annotSpecificityFilter and self.transcriptSpecificityFilter == -1: filteredEnrichmentResults.append(enrich) elif len(annot_dict[annotID] ) <= self.annotSpecificityFilter and len( lnc_dict[transcriptID] ) <= self.transcriptSpecificityFilter: filteredEnrichmentResults.append(enrich) else: notPassingFilters += 1 Logger.get_instance().info( "filter_by_specificity : Filtered out %s enrichments" % (notPassingFilters)) return filteredEnrichmentResults
def create_object_from_tsv(self, parameter_value_list): # Test if the parameter list contains elements if parameter_value_list == None or len(parameter_value_list) == 0: raise RainetException( "DataFactory.create_object_from_tsv: No value provided for the object creation of class " + self.className) #======================================================================= # Build the code (introspection) for constructor call #======================================================================= # -- start with the variable that will receive the new instance # constructor_command = "new_instance = " constructor_command = "" # -- add the class name with one parenthesis constructor_command += self.className + "(" # -- add the list of parameters separated by a comma for parameter_value in parameter_value_list: constructor_command += "\"" + str( parameter_value.replace("\"", "\\\"")) + "\"," # -- remove the last comma constructor_command = constructor_command[:-1] # -- add the final closing parenthesis constructor_command += ")" #======================================================================= # Evaluate the constructor command and add the object to SQLAlchemy session if required #======================================================================= new_instance = None try: Logger.get_instance().debug("command = " + constructor_command) new_instance = eval(constructor_command) new_instance.add_to_session() # Possibly the object does not have to be created because it is already in database # In that case, a NotRequiredInstantiationException is raised by the class constructor except NotRequiredInstantiationException: pass # If any other Exception occurred, something went wrong except Exception as excep: raise RainetException( "DataFactory.create_object_from_tsv : An issue occurred in the object creation.", excep) return new_instance