Beispiel #1
0
    def initialize(self):

        # Get the main keyword that defines the strategy
        self.strategy = sys.argv[1]
        Logger.get_instance().info("Chosen strategy = " + self.strategy)

        # If the strategy is not known, raise an exception
        if self.strategy not in OptionConstants.STRATEGIES_LIST:
            raise RainetException(
                "OptionManager.initialize() : The main keyword is not correct : '"
                + self.strategy + "'. Should be one of " +
                str(OptionConstants.STRATEGIES_LIST))

        # Build an option parser to collect the option values
        option_parser = OptionParser()
        for current_prop_list in OptionConstants.OPTION_LIST[self.strategy]:
            option_parser.add_option(current_prop_list[0],
                                     current_prop_list[1],
                                     action=current_prop_list[2],
                                     type=current_prop_list[3],
                                     dest=current_prop_list[4],
                                     default=current_prop_list[5],
                                     help=current_prop_list[6])

        # Get the various option values into a dictionary
        (opts, args) = option_parser.parse_args()
        self.optionDict = vars(opts)
Beispiel #2
0
    def write_output(self):

        #===============================================================================
        # Output file
        #===============================================================================
        # proteinID\tOMIM_ID\tOMIM_description

        # Note: output file only contains proteins that have any associated disease
        # Note: a protein may have several diseases, the output file contains one line per disease of a protein

        outFile = open(self.outputFolder + OMIMProteinDisease.OUTPUT_FILE, "w")

        notFound = set()

        nEntries = 0

        for prot in self.proteinOMIMDict:
            for omimID in self.proteinOMIMDict[prot]:

                # Important: all OMIM IDs should be found if using updated mimTitles.txt file
                if omimID in self.minTitlesDict:
                    outFile.write("%s\t%s\t%s\n" %
                                  (prot, omimID, self.minTitlesDict[omimID]))
                    nEntries += 1
                else:
                    Logger.get_instance().warning(
                        "write_output: %s OMIM ID not found in mimTitles.txt" %
                        omimID)

                    notFound.add(omimID)

        print "write_output: %s OMIM IDs not found." % len(notFound)
        print "write_output: wrote %s protein-disease association entries." % nEntries

        outFile.close()
Beispiel #3
0
    def read_rainet_db(self):

        rnaCrossReference = {}  # key -> transcript ID, val -> ensembl Gene ID

        #===============================================================================
        # Query RNA table, which contains gene ID
        #===============================================================================
        query = self.sql_session.query(RNA.transcriptID, RNA.geneID).all()
        # Note: an gene name points to several ensembl IDs

        # Correspondence should be many-to-1 (a transcript can only have an associated gene, a gene can have many transcripts)

        geneSet = set()

        for transcriptID, geneID in query:
            if transcriptID in rnaCrossReference:
                raise RainetException(
                    "LncRNAGroupOddsRatio.read_rainet_db: duplicate transcript ID "
                )

            rnaCrossReference[transcriptID] = str(geneID)

            geneSet.add(geneID)

        Logger.get_instance().info(
            "LncRNAGroupOddsRatio.read_rainet_db : Number transcripts read %s"
            % (len(rnaCrossReference)))
        Logger.get_instance().info(
            "LncRNAGroupOddsRatio.read_rainet_db : Number genes read %s" %
            (len(geneSet)))

        self.rnaCrossReference = rnaCrossReference
Beispiel #4
0
    def parse_file(file_path,
                   class_tag,
                   class_regex,
                   protein_tag,
                   annotation_tag,
                   comment_char,
                   clean_table=True):

        # Build the Factory to Clean the table
        data_factory = DataFactory(
            DataConstants.INTERACTOME_NETWORK_PARTITION_ANNOTATION_CLASS)
        if clean_table:
            data_factory.clean_table()

        # Parse the file to get the modules
        NetworkModuleAnnotationParser.process_modules(file_path, class_tag,
                                                      class_regex, protein_tag,
                                                      annotation_tag,
                                                      comment_char)

        # Commit the SQLAlchemy session
        Logger.get_instance().info(
            "NetworkModuleAnnotationParser.parse_file : Committing SQL session."
        )
        SQLManager.get_instance().commit()

        return Constants.STATUS_OK
Beispiel #5
0
    def read_protein_rna_interactions_file(self):

        # Example format
        # ENST00000600002|Q9UKA9  -1.7    1       162
        # ENST00000602495|Q92804  -3.22   1       34000

        transcriptExperimentalInteractions = {
        }  # key -> transcript ID, value -> set of interacting proteins

        count = 0
        with open(self.listTxProteinInteractions, "r") as inFile:
            for line in inFile:
                spl = line.strip().split("\t")

                transcriptID, proteinID = spl[0].split("|")

                if transcriptID not in transcriptExperimentalInteractions:
                    transcriptExperimentalInteractions[transcriptID] = set()
                transcriptExperimentalInteractions[transcriptID].add(proteinID)

                count += 1

        Logger.get_instance().info(
            "read_protein_rna_interactions_file: read %s interactions" % count)

        Logger.get_instance().info(
            "read_protein_rna_interactions_file: %s transcripts with interactions"
            % len(transcriptExperimentalInteractions))

        self.transcriptExperimentalInteractions = transcriptExperimentalInteractions
Beispiel #6
0
    def read_properties(self, file_path):

        Logger.get_instance().info("Reading properties from file : " +
                                   file_path)
        config_parser = ConfigParser()

        config_parser.read(file_path)

        for section in config_parser.sections():
            options = config_parser.options(section)
            for option in options:
                try:
                    option = option.lower()
                    self.propertiesDict[option] = config_parser.get(
                        section, option)
                    if self.propertiesDict[option] == None:
                        raise RainetException(
                            "PropertyManager.readProperties : Wrong property definition for key = "
                            + option + " in section " + section + " of file " +
                            file_path)
                except:
                    raise RainetException(
                        "PropertyManager.readProperties : Abnormal property definition for key = "
                        + option + " in section " + section + " of file " +
                        file_path)
Beispiel #7
0
    def parse_file( file_path, go_id_tag, go_name_tag, go_namespace_tag, clean_table = True):
        
        # Parse the file
        go_term_list = OboParser.processGOOBO( file_path)

        # Build the Factory to clean the table
        data_factory = DataFactory( DataConstants.GENE_ONTOLOGY_CLASS)
        if clean_table:
            data_factory.clean_table()
        
        # Get the SQL session
        sql_session = SQLManager.get_instance().get_session()
        
        # Build the GeneOntology objects from the parsing result
        for go_term in go_term_list:
            go_id = go_term[ go_id_tag]
            go_name = go_term[ go_name_tag]
            go_namespace = go_term[ go_namespace_tag]
            if go_id != None and go_id != '' and go_name != None and go_name != '' and go_namespace != None and go_namespace != '':
                Logger.get_instance().debug( "OboParser.parse_file : inserting GO " + go_id + " " + go_name + " " + go_namespace)
                new_go = GeneOntology( go_id, go_name, go_namespace)
                sql_session.add( new_go)
                
       
        # Commit the SQLAlchemy session
        Logger.get_instance().info( "OboParser.parse_file : Committing SQL session.")
        SQLManager.get_instance().commit();
        
        return Constants.STATUS_OK
Beispiel #8
0
    def calculate_protein_degree(self):

        degreeDict = {}  # key -> uniprotID, val -> degree of protein

        proteinsPerDegreeDict = {
        }  # key -> degree, val -> set of proteins with that degree

        g = self.graph

        proteinNotFound = set()

        # for each vertice in graph
        for v in g.vs:

            # look for protein name using index
            try:
                proteinName = self.proteinGraphIDDict[v.index]
            except KeyError:
                if v.index == 0:
                    continue
                else:
                    raise RainetException(
                        "NetworkScoreAnalysis.calculate_protein_degree : Could not find protein name for graph index %s"
                        % (v.index))

#             try:
#                 proteinUniprotAC = self.proteinIDMappingDict[ proteinName]
#             except KeyError:
#                 Logger.get_instance().warning( "NetworkScoreAnalysis.calculate_protein_degree : Could not find protein uniprotac for protein name %s. We will be converting here to uniprotAC" % ( proteinName ) )
#
#                 if "_HUMAN" in proteinName:
#                     proteinUniprotAC = proteinName.split("_")[0]
#                 else:
#                     proteinNotFound.add( proteinName)
#                     continue

            if proteinName not in degreeDict:
                degreeDict[proteinName] = v.degree()
            else:
                raise RainetException(
                    "NetworkScoreAnalysis.calculate_protein_degree : duplicate uniprotAC %s"
                    % (proteinName))

        Logger.get_instance().info(
            "NetworkScoreAnalysis.calculate_protein_degree : Could not find proteins uniprotac for protein names %s. These were discarded."
            % (proteinNotFound))

        # create dictionary with list of proteins per degree
        for prot in degreeDict:
            degree = degreeDict[prot]
            if degree not in proteinsPerDegreeDict:
                proteinsPerDegreeDict[degree] = set()
            proteinsPerDegreeDict[degree].add(prot)

        ## list of degrees available, so that closest degree to the degree of interest can be probed
        #sortedDegrees = sorted( proteinsPerDegreeDict.keys())

        self.degreeDict = degreeDict
        self.proteinsPerDegreeDict = proteinsPerDegreeDict
Beispiel #9
0
    def remove_database_file(self, path):

        try:
            remove(path)
            Logger.get_instance().info('File : "' + str(path) + '" deleted.')
        except:
            Logger.get_instance().info('Can not delete file : ' + str(path))
            pass
Beispiel #10
0
    def find_protein(self, uniprot_ac_list):

        try:
            return SQLUtil.find_unique_protein_from_uniprotAC_list(
                uniprot_ac_list)
        except RainetException as e:
            Logger.get_instance().debug(e.to_string())
            return None
Beispiel #11
0
    def pick_top_proteins(self):

        #=======================================================================
        # pick top X proteins for each RNA
        #=======================================================================

        # store proteins that are in top interactors but not present in network, and therefore skipped in analysis
        skippedProteins = set()

        rnaTops = {
        }  # key -> transcript ID, value -> dict. key -> topValue, value -> list of Top X proteins

        for rna in self.rnaTargets:

            sortedScores = sorted(self.rnaTargets[rna].keys(), reverse=True)

            if rna not in rnaTops:
                rnaTops[rna] = {}

            for topVal in self.topPartners:

                if topVal not in rnaTops[rna]:
                    rnaTops[rna][topVal] = []

                # switch of whether to keep searching for more scores to get more top proteins
                boo = 1

                for score in sortedScores:
                    if boo:
                        for prot in self.rnaTargets[rna][score]:
                            # add top proteins to rnaTops if top limit is not yet reached
                            if len(rnaTops[rna][topVal]) < topVal:
                                # check if protein present in provided graph, otherwise pick next protein
                                if prot in self.dictNames:
                                    rnaTops[rna][topVal].append(prot)
                                else:
                                    skippedProteins.add(prot)
                            else:
                                # if top is full, stop searching for more proteins
                                boo = 0
                                break

                # Warn if there is not enough proteins to fill top
                # transcript is not analysed as it wouldn't be comparable to others
                if len(rnaTops[rna][topVal]) < topVal:
                    Logger.get_instance().warning(
                        "NetworkScoreAnalysis.pick_top_proteins : %s does not have enough interactions (has %s) to fill provided top %s. No results for this transcript."
                        % (rna, len(rnaTops[rna][topVal]), topVal))
                    del rnaTops[rna][topVal]

        # Warn about how many top proteins skipped because they are not present in PPI network
        # the fact that a protein is not in the PPI network, does not mean that protein has no known interactions, but that we can't easily apply any metrics, therefore we ignore those cases
        Logger.get_instance().warning(
            "NetworkScoreAnalysis.pick_top_proteins : %s. 'Top' proteins skipped for not being present in PPI network: %s. List: %s"
            % (rna, len(skippedProteins), ",".join(skippedProteins)))

        self.rnaTops = rnaTops
Beispiel #12
0
    def open_text_w(path):
        try:
            file_handle = open(path, 'w')
        except IOError as detail:
            Logger.get_instance().critical\
            ("IOError:  Unable to open " + path + " : " + str(detail), detail)
            exit()

        return file_handle
Beispiel #13
0
    def open_text_r(path):
        try:
            file_handle = open(path, 'r')
        except IOError as ioe:
            Logger.get_instance().critical\
            ("FileUtils.open_text_r : IOError:  Unable to open '" + path + "' : " + str(ioe))
            raise RainetException(
                "Unable to open file '" + path + "' : " + str(ioe), ioe)

        return file_handle
Beispiel #14
0
 def perform_query(self, sql_session, query_string):
     
     full_query = 'sql_session.' + query_string
     Logger.get_instance().info( "InteractiveQueryStrategy.perform_query : query is '" + full_query + "'")
     
     try:
         query_result = eval( full_query)
     except Exception as ex:
         raise RainetException( "InteractiveQueryStrategy.perform_query : Exception occurred during query on DB", ex)
     
     return query_result
Beispiel #15
0
    def run(self):

        Logger.get_instance().info("TemplateScriptWithClass.run: Starting...")

        #===================================================================
        # Initialising datasets
        #===================================================================

        Timer.get_instance().step("Initialising interaction datasets..")

        self.function_one()
Beispiel #16
0
    def setUp(self):

        # Set the options
        # Note: if running from command line / main script, the optionManager gets the default values,
        # but in unittest we must set up all arguments, whether optional or not.
        # In the actual unittests I may override the default options, for testing.
        optionManager = OptionManager.get_instance()
        optionManager.set_option(OptionConstants.OPTION_VERBOSITY, "debug")
        optionManager.set_option(
            OptionConstants.OPTION_DB_NAME,
            "/home/diogo/Documents/RAINET_data/TAGC/rainetDatabase/db_testing/rainet_testing_DB.sqlite"
        )  #rainet2016-06-17.human_expression_wPRI.sqlite")
        optionManager.set_option(OptionConstants.OPTION_SPECIES, "human")
        optionManager.set_option(
            OptionConstants.OPTION_OUTPUT_FOLDER,
            "/home/diogo/workspace/tagc-rainet-RNA/test/fr/tagc/rainet/core/test_results/enrichmentAnalysis/"
        )
        optionManager.set_option(OptionConstants.OPTION_ANNOTATION_TABLE,
                                 "NetworkModule")
        optionManager.set_option(
            OptionConstants.OPTION_MINIMUM_PROTEIN_ANNOTATION,
            OptionConstants.DEFAULT_MINIMUM_PROTEIN_ANNOTATION)
        optionManager.set_option(
            OptionConstants.OPTION_MINIMUM_PROTEIN_INTERACTION,
            OptionConstants.DEFAULT_MINIMUM_PROTEIN_INTERACTION)
        optionManager.set_option(OptionConstants.OPTION_NUMBER_RANDOMIZATIONS,
                                 OptionConstants.DEFAULT_NUMBER_RANDOMIZATIONS)
        optionManager.set_option(OptionConstants.OPTION_EXPRESSION_WARNING,
                                 OptionConstants.DEFAULT_EXPRESSION_WARNING)
        optionManager.set_option(OptionConstants.OPTION_MINIMUM_EXPRESSION,
                                 OptionConstants.DEFAULT_MINIMUM_EXPRESSION)
        optionManager.set_option(OptionConstants.OPTION_LOWER_TAIL,
                                 OptionConstants.DEFAULT_LOWER_TAIL)

        # Set the level of verbosity
        Logger.get_instance().set_level(
            OptionManager.get_instance().get_option(
                OptionConstants.OPTION_VERBOSITY))

        # Setting up SQL manager
        SQLManager.get_instance().set_DBpath(
            OptionManager.get_instance().get_option(
                OptionConstants.OPTION_DB_NAME))
        self.sql_session = SQLManager.get_instance().get_session()

        # setting up internal test folder paths
        self.expectedFolder = "/home/diogo/workspace/tagc-rainet-RNA/test/fr/tagc/rainet/core/enrichmentAnalysis/test_expected"
        self.outputFolder = OptionManager.get_instance().get_option(
            OptionConstants.OPTION_OUTPUT_FOLDER)

        # create instance of strategy
        self.run = EnrichmentAnalysisStrategy()
        self.run.sql_session = SQLManager.get_instance().get_session()
Beispiel #17
0
    def stop_chrono(self, message):

        current_time = time.time()
        step_duration = current_time - self.lastTime
        total_duration = current_time - self.start_time
        Logger.get_instance().info("Step duration : " +
                                   Timer.format_duration(step_duration))
        Logger.get_instance().info("\n\nSTOP CHRONO : " + message +
                                   ". Total duration " +
                                   Timer.format_duration(total_duration))
        self.lastTime = 0
        self.start_time = 0
Beispiel #18
0
    def read_network_file(self):

        listOfTuples = []  #e.g. [ (0,1), (1,2), (0,2) ]
        listOfNames = []  #e.g. [('PRRT3_HUMAN', 'TMM17_HUMAN')]

        dictNames = {
        }  # key -> protein name, value -> graph index (internal id)
        idCounter = 0

        with open(self.networkFile, "r") as inFile:
            for line in inFile:
                line = line.strip()
                node1, node2 = line.split("\t")

                if node1 not in dictNames:
                    idCounter += 1
                    dictNames[node1] = idCounter

                if node2 not in dictNames:
                    idCounter += 1
                    dictNames[node2] = idCounter

                names = (node1, node2)
                ids = (dictNames[node1], dictNames[node2])

                listOfNames.append(names)
                listOfTuples.append(ids)

        assert (len(listOfNames) == len(listOfTuples))

        graph = igraph.Graph(listOfTuples)

        Logger.get_instance().info(
            "NetworkScoreAnalysis.read_network_file : Number of PPIs %s" %
            (len(listOfNames)))
        Logger.get_instance().info(
            "NetworkScoreAnalysis.read_network_file : Number of unique proteins %s"
            % (len(dictNames)))

        # swap dictionary to have graph index as keys (note that there is 1-to-1 correspondence)
        proteinGraphIDDict = {
            dictNames[prot]: prot
            for prot in dictNames
        }  # key -> graph index (internal id), value -> protein name

        assert (len(dictNames) == len(dictNames))

        self.graph = graph
        self.proteinGraphIDDict = proteinGraphIDDict
        self.dictNames = dictNames

        # for testing purposes
        return graph, listOfNames, listOfTuples, dictNames
Beispiel #19
0
    def setUp(self):

        # Set the options
        # Note: if running from command line / main script, the optionManager gets the default values,
        # but in unittest we must set up all arguments, whether optional or not.
        # In the actual unittests I may override the default options, for testing.
        optionManager = OptionManager.get_instance()
        optionManager.set_option(OptionConstants.OPTION_VERBOSITY, "debug")
        #        optionManager.set_option(OptionConstants.OPTION_DB_NAME, "/home/diogo/Documents/RAINET_data/TAGC/rainetDatabase/db_testing/rainet2016-06-17.human_expression_wPRI.sqlite")
        optionManager.set_option(OptionConstants.OPTION_DB_NAME, DB_PATH)
        optionManager.set_option(OptionConstants.OPTION_SPECIES, "human")
        optionManager.set_option(
            OptionConstants.OPTION_OUTPUT_FOLDER,
            "/home/diogo/workspace/tagc-rainet-RNA/test/fr/tagc/rainet/core/test_results/"
        )
        optionManager.set_option(
            OptionConstants.OPTION_MINIMUM_INTERACTION_SCORE,
            OptionConstants.DEFAULT_INTERACTION_SCORE)
        optionManager.set_option(OptionConstants.OPTION_RNA_BIOTYPES,
                                 OptionConstants.DEFAULT_RNA_BIOTYPES)
        optionManager.set_option(OptionConstants.OPTION_GENCODE,
                                 OptionConstants.DEFAULT_GENCODE)
        optionManager.set_option(
            OptionConstants.OPTION_EXPRESSION_VALUE_CUTOFF,
            OptionConstants.DEFAULT_EXPRESSION_VALUE_CUTOFF)
        optionManager.set_option(
            OptionConstants.OPTION_EXPRESSION_TISSUE_CUTOFF,
            OptionConstants.DEFAULT_EXPRESSION_TISSUE_CUTOFF)
        optionManager.set_option(OptionConstants.OPTION_LOW_MEMORY,
                                 OptionConstants.DEFAULT_LOW_MEMORY)

        # Set the level of verbosity
        Logger.get_instance().set_level(
            OptionManager.get_instance().get_option(
                OptionConstants.OPTION_VERBOSITY))

        # Setting up SQL manager
        SQLManager.get_instance().set_DBpath(DB_PATH)
        self.sql_session = SQLManager.get_instance().get_session()

        # setting up internal test folder paths
        self.expectedFolder = "/home/diogo/workspace/tagc-rainet-RNA/test/fr/tagc/rainet/core/test_expected/Report"
        self.outputFolder = OptionManager.get_instance().get_option(
            OptionConstants.OPTION_OUTPUT_FOLDER) + "/Report/"

        # create instance of strategy
        self.strategy = AnalysisStrategy()

        # report only written for selected tests
        self.strategy.writeReportFile = 0
Beispiel #20
0
 def execute( self ):
     
     # Get the option values
     self.DBPath = OptionManager.get_instance().get_option( OptionConstants.OPTION_DB_NAME )
     query_file_path = OptionManager.get_instance().get_option( OptionConstants.OPTION_QUERY_FILE )
     species = OptionManager.get_instance().get_option( OptionConstants.OPTION_SPECIES )
     
     # Check if a species has been provided
     if species == None or len( species) == 0:
         raise RainetException( "InteractiveQueryStrategy.execute: You must specify a species in your command line. Please check help.")
     
     # Build a SQL session to DB
     SQLManager.get_instance().set_DBpath( self.DBPath)
     sql_session = SQLManager.get_instance().get_session()
     
     # Check if the species in the DB correspond to the species given by the user
     SQLManager.check_species(sql_session, species, True)
     
     # Read the query from file
     Logger.get_instance().info("InteractiveQueryStrategy.execute : Reading query...")
     query_string = self.read_query( query_file_path)
     
     # Get the result of the query on DB
     Logger.get_instance().info("InteractiveQueryStrategy.execute : Querying database...")
     query_result = self.perform_query( sql_session, query_string)
     
     # Export query result to file
     Logger.get_instance().info("InteractiveQueryStrategy.execute : Exporting result...")
     self.export_query_result( query_result, query_file_path)
     
     Logger.get_instance().info("InteractiveQueryStrategy.execute : Finished.")
Beispiel #21
0
 def read_query(self, query_file_path):
     
     try:
         file_handler = FileUtils.open_text_r( query_file_path)
     except RainetException:
         Logger.get_instance().error( "InteractiveQueryStrategy.read_query : Unable to open query file : " + query_file_path)
         raise RainetException( "InteractiveQueryStrategy.read_query : Unable to open query file : " + query_file_path)
     
     line = file_handler.readline()
     query_string = ''
     while line != None and line != '':
         query_string = query_string + line[0:-1]
         line = file_handler.readline()
         
     return query_string
Beispiel #22
0
    def __init__( self, transcript_id ):
        
        #=======================================================================
        # Query RNA transcript ID in respective DataManager object
        #=======================================================================
 
        from fr.tagc.rainet.core.util.data.DataManager import DataManager

        allRNAs = DataManager.get_instance().get_data(DataConstants.RNA_ALL_KW)
                
        if transcript_id in allRNAs:
            self.transcriptID = transcript_id
        else:
            Logger.get_instance().warning( "\n InteractingRNA.init : Transcript ID not found:\t" + str( transcript_id) )
            raise NotRequiredInstantiationException( "InteractingRNA.init : no insertion needed, RNA object not found.")
Beispiel #23
0
    def _get_new_degree(self, degree, proteinsPerDegreeDictCopy):

        sortedDegrees = sorted(proteinsPerDegreeDictCopy.keys())

        # get list of all degrees except current degree
        otherDegrees = filter(lambda a: a != degree, sortedDegrees)

        # calculate distances of all degrees against ours and return the closest degree
        newDegree = min(otherDegrees, key=lambda x: abs(x - degree))

        Logger.get_instance().warning(
            "NetworkScoreAnalysis._get_new_degree : No more proteins with degree %s. Sampling closest degree: %s."
            % (degree, newDegree))

        return newDegree
Beispiel #24
0
    def set_option(self, option_name, option_value):

        if self.optionDict == None:
            self.optionDict = {}

        if option_name != None and len(option_name) > 0:
            if option_value != None:
                self.optionDict[option_name] = option_value
            else:
                Logger.get_instance().warning(
                    "OptionManager.set_option: Trying to pass a None or void value on option dictionary for key : "
                    + option_name)
        else:
            Logger.get_instance().warning(
                "OptionManager.set_option: Trying to pass a None or void key on option dictionary."
            )
Beispiel #25
0
    def perform_query(self, keyword, query_string):

        sql_session = SQLManager.get_instance().get_session()

        full_query = 'sql_session.' + query_string

        Logger.get_instance().info("DataManager.init : query is '" +
                                   full_query + "'")

        try:
            query_result = eval(full_query)
        except Exception as ex:
            raise RainetException(
                "DataManager.init : Exception occurred during query on DB", ex)

        self.data[keyword] = query_result
Beispiel #26
0
    def launch_insertion_TSV(self,
                             file_path,
                             has_headers,
                             headers,
                             class_name,
                             params,
                             default_values,
                             comment_char,
                             table_extension="",
                             clean_table=True):

        composite_table_name = class_name + table_extension

        try:
            # Start timing
            Timer.get_instance().step("Inserting " + composite_table_name +
                                      ":")
            # Check if the insertion is required not not, depending on the fact the data
            # were already inserted and theforceOverride option
            if SQLUtil.insert_data_required(class_name, self.forceOverride,
                                            table_extension):
                Logger.get_instance().info("|--Starting insertion...")
                status = TSVParser.parse_file(file_path, has_headers, headers,
                                              class_name, params,
                                              default_values, comment_char,
                                              clean_table)
            else:
                Logger.get_instance().info(
                    "|--Data already inserted: insertion bypassed.")
                status = None
        # If an exception is raised by controlled code, status of insertion is set to "Rainet Error"
        except RainetException as re:
            Logger.get_instance().error(re.to_string())
            status = Constants.STATUS_RAINET_ERROR
            raise re
        # If an exception is raised by uncontrolled code, status of insertion is set to "Error"
        except Exception as e:
            Logger.get_instance().error(e.message)
            status = Constants.STATUS_ERROR
            raise RainetException(
                "Abnormal Exception during insertion of " + class_name, e)
        # In all case insert or update the status of the insertion to TableStatus table in DB
        finally:
            if status != None:
                sql_session = SQLManager.get_instance().get_session()
                db_status_list = sql_session.query(TableStatus).filter(
                    TableStatus.tableName == composite_table_name).all()
                if db_status_list == None or len(db_status_list) == 0:
                    sql_session.add(
                        TableStatus(composite_table_name, status, file_path))
                else:
                    db_status = db_status_list[0]
                    db_status.tableStatus = status
                    sql_session.add(db_status)
                SQLManager.get_instance().commit()
Beispiel #27
0
    def __init__(self, peptide_id):

        #=======================================================================
        # Query Protein uniprot_ac using cross references
        # From Cross ref ID, search primary ID using cross reference DataManager item created during insertion
        # Note: assuming that if uniprot_ac exists in protein cross reference table it will also exist in protein table.
        #=======================================================================

        from fr.tagc.rainet.core.util.data.DataManager import DataManager

        proteinXrefs = DataManager.get_instance().get_data(
            DataConstants.PROTEIN_ENSP_XREF_KW)

        if peptide_id in proteinXrefs:
            self.proteinID = proteinXrefs[peptide_id][0]
        else:
            Logger.get_instance().debug(
                "\n MRNA.init : Peptide ID not found:\t" + str(peptide_id))
Beispiel #28
0
    def associate_proteins(module, line):

        token_list = line.split(" ")

        sql_session = SQLManager.get_instance().get_session()

        if token_list != None and len(token_list) > 0:
            for token in token_list:
                # Get the protein with the token as ID (uniprotAD or cross reference)
                # Search first in Protein table both with Uniprot AC and Uniprot ID
                # then with CrossReference
                protein_list = sql_session.query(Protein).filter(
                    or_(Protein.uniprotID == token,
                        Protein.uniprotAC == token)).all()
                if protein_list == None or len(protein_list) == 0:
                    protein_list = sql_session.query(Protein).filter(
                        Protein.uniprotAC == ProteinCrossReference.protein_id,
                        ProteinCrossReference.crossReferenceID == token).all()
                # Check if a single Protein is found. If not, raise an issue or report problem
                protein = None
                if protein_list != None and len(protein_list) > 0:
                    if len(protein_list) == 1:
                        protein = protein_list[0]
                        sql_session.add(protein)
                    else:
                        raise RainetException(
                            "NetworkModuleParser.associate_proteins : Abnormal number of Protein found for ID = "
                            + token + " : " + str(len(protein_list)))
                else:
                    Logger.get_instance().debug(
                        "NetworkModuleParser.associate_proteins : No Protein found for ID = "
                        + token)
                    continue
                # if a protein is found, associate it to the module
                if protein != None:
                    module.add_associated_protein(protein)
                else:
                    raise RainetException(
                        "NetworkModuleParser.associate_proteins : Protein found is none for ID = "
                        + token)
        else:
            raise RainetException(
                "NetworkModuleParser.associate_proteins : list of proteins is empty or None : "
                + line)
Beispiel #29
0
    def filter_by_specificity(self, filtered_enrichment_results, annot_dict,
                              lnc_dict):

        notPassingFilters = 0

        filteredEnrichmentResults = []

        # write header
        filteredEnrichmentResults.append(filtered_enrichment_results[0])

        for enrich in filtered_enrichment_results[1:]:

            spl = enrich.split("\t")

            annotID = spl[1]
            transcriptID = spl[0]

            # keep only enrichments where number of enriched transcripts is lower or equal to self.annotSpecificityFilter, or if filter is OFF
            if self.annotSpecificityFilter == -1 and self.transcriptSpecificityFilter == -1:
                filteredEnrichmentResults.append(enrich)
            elif self.annotSpecificityFilter == -1 and len(
                    lnc_dict[transcriptID]
            ) <= self.transcriptSpecificityFilter:
                filteredEnrichmentResults.append(enrich)

            elif len(
                    annot_dict[annotID]
            ) <= self.annotSpecificityFilter and self.transcriptSpecificityFilter == -1:
                filteredEnrichmentResults.append(enrich)

            elif len(annot_dict[annotID]
                     ) <= self.annotSpecificityFilter and len(
                         lnc_dict[transcriptID]
                     ) <= self.transcriptSpecificityFilter:
                filteredEnrichmentResults.append(enrich)
            else:
                notPassingFilters += 1

        Logger.get_instance().info(
            "filter_by_specificity : Filtered out %s enrichments" %
            (notPassingFilters))

        return filteredEnrichmentResults
Beispiel #30
0
    def create_object_from_tsv(self, parameter_value_list):
        # Test if the parameter list contains elements
        if parameter_value_list == None or len(parameter_value_list) == 0:
            raise RainetException(
                "DataFactory.create_object_from_tsv: No value provided for the object creation of class "
                + self.className)

        #=======================================================================
        # Build the code (introspection) for constructor call
        #=======================================================================
        # -- start with the variable that will receive the new instance
#         constructor_command = "new_instance = "
        constructor_command = ""
        # -- add the class name with one parenthesis
        constructor_command += self.className + "("
        # -- add the list of parameters separated by a comma
        for parameter_value in parameter_value_list:
            constructor_command += "\"" + str(
                parameter_value.replace("\"", "\\\"")) + "\","
        # -- remove the last comma
        constructor_command = constructor_command[:-1]
        # -- add the final closing parenthesis
        constructor_command += ")"

        #=======================================================================
        # Evaluate the constructor command and add the object to SQLAlchemy session if required
        #=======================================================================
        new_instance = None
        try:
            Logger.get_instance().debug("command = " + constructor_command)
            new_instance = eval(constructor_command)
            new_instance.add_to_session()
        # Possibly the object does not have to be created because it is already in database
        # In that case, a NotRequiredInstantiationException is raised by the class constructor
        except NotRequiredInstantiationException:
            pass
        # If any other Exception occurred, something went wrong
        except Exception as excep:
            raise RainetException(
                "DataFactory.create_object_from_tsv : An issue occurred in the object creation.",
                excep)

        return new_instance