Example #1
0
    def run(self):

        Timer.get_instance().step("Reading RAINET DB..")
        self.read_rainet_db()

        Timer.get_instance().step("Reading enrichment file..")
        self.read_enrichment_file()
Example #2
0
    def launch_insertion_TSV(self,
                             file_path,
                             has_headers,
                             headers,
                             class_name,
                             params,
                             default_values,
                             comment_char,
                             table_extension="",
                             clean_table=True):

        composite_table_name = class_name + table_extension

        try:
            # Start timing
            Timer.get_instance().step("Inserting " + composite_table_name +
                                      ":")
            # Check if the insertion is required not not, depending on the fact the data
            # were already inserted and theforceOverride option
            if SQLUtil.insert_data_required(class_name, self.forceOverride,
                                            table_extension):
                Logger.get_instance().info("|--Starting insertion...")
                status = TSVParser.parse_file(file_path, has_headers, headers,
                                              class_name, params,
                                              default_values, comment_char,
                                              clean_table)
            else:
                Logger.get_instance().info(
                    "|--Data already inserted: insertion bypassed.")
                status = None
        # If an exception is raised by controlled code, status of insertion is set to "Rainet Error"
        except RainetException as re:
            Logger.get_instance().error(re.to_string())
            status = Constants.STATUS_RAINET_ERROR
            raise re
        # If an exception is raised by uncontrolled code, status of insertion is set to "Error"
        except Exception as e:
            Logger.get_instance().error(e.message)
            status = Constants.STATUS_ERROR
            raise RainetException(
                "Abnormal Exception during insertion of " + class_name, e)
        # In all case insert or update the status of the insertion to TableStatus table in DB
        finally:
            if status != None:
                sql_session = SQLManager.get_instance().get_session()
                db_status_list = sql_session.query(TableStatus).filter(
                    TableStatus.tableName == composite_table_name).all()
                if db_status_list == None or len(db_status_list) == 0:
                    sql_session.add(
                        TableStatus(composite_table_name, status, file_path))
                else:
                    db_status = db_status_list[0]
                    db_status.tableStatus = status
                    sql_session.add(db_status)
                SQLManager.get_instance().commit()
Example #3
0
    def run(self):

        Logger.get_instance().info("TemplateScriptWithClass.run: Starting...")

        #===================================================================
        # Initialising datasets
        #===================================================================

        Timer.get_instance().step("Initialising interaction datasets..")

        self.function_one()
Example #4
0
    def run(self):
        
        #===============================================================================
        # Run analysis / processing
        #===============================================================================

        Timer.get_instance().step( "Read RainetDB table..")
        self.read_rainet_db( )

        Timer.get_instance().step( "Read enrichment file..")            
        pairsToWrite, enrichmentsDict = self.read_enrichment_data()

        return pairsToWrite, enrichmentsDict
Example #5
0
    def launch_insertion_Fasta(self,
                               file_path,
                               class_name,
                               regex,
                               groups,
                               params,
                               params_values,
                               comment_char,
                               clean_table=True):

        try:
            Timer.get_instance().step("Inserting " + class_name + ":")
            if SQLUtil.insert_data_required(class_name, self.forceOverride):
                Logger.get_instance().info("|--Starting insertion...")
                status = FastaParser.parse_file(file_path, class_name, regex,
                                                groups, params, params_values,
                                                comment_char, clean_table)
            else:
                Logger.get_instance().info(
                    "|--Data already inserted: insertion bypassed.")
                status = None
        except RainetException as re:
            Logger.get_instance().error(re.to_string())
            status = Constants.STATUS_RAINET_ERROR
            raise re
        except Exception as e:
            Logger.get_instance().error(e.message)
            status = Constants.STATUS_ERROR
            raise RainetException(
                "Abnormal Exception during insertion of " + class_name, e)
        finally:
            if status != None:
                sql_session = SQLManager.get_instance().get_session()
                db_status_list = sql_session.query(TableStatus).filter(
                    TableStatus.tableName == class_name).all()
                if db_status_list == None or len(db_status_list) == 0:
                    sql_session.add(TableStatus(class_name, status, file_path))
                else:
                    db_status = db_status_list[0]
                    db_status.tableStatus = status
                    sql_session.add(db_status)
                SQLManager.get_instance().commit()
Example #6
0
    def run(self):

        Timer.get_instance().step("Reading annotation file..")
        run.read_annotation_file()

        Timer.get_instance().step("Reading external files..")
        run.read_external_files()

        Timer.get_instance().step("Reading background file..")
        run.read_background_list()

        Timer.get_instance().step("Creating output file..")
        run.calculate_odds_ratio()
Example #7
0
    def run(self):

        Logger.get_instance().info("NetworkScoreAnalysis.run: Starting...")

        #===================================================================
        # Initialising datasets
        #===================================================================

        Timer.get_instance().step("Reading network file..")

        #self.protein_cross_references()
        self.read_network_file()
        self.calculate_protein_degree()

        Timer.get_instance().step("Reading catrapid file..")

        self.read_catrapid_file()
        self.pick_top_proteins()

        Timer.get_instance().step("Calculate metrics..")

        # calculate metrics for each of the wanted topPartner values
        for topVal in self.topPartners:
            self.calculate_metrics(topVal)
Example #8
0
    def run(self):

        # Approach: filters are applied through the filteredEnrichmentResults variable, which changes depending on the filtering applied

        # Read row annotation file, if present
        if self.rowAnnotationFile != "":
            Timer.get_instance().step("Read row annotation file..")
            self.rowAnnotation = self.read_annotation_file(
                self.rowAnnotationFile)

        # Read col annotation file, if present
        if self.colAnnotationFile != "":
            Timer.get_instance().step("Read column annotation file..")
            self.colAnnotation = self.read_annotation_file(
                self.colAnnotationFile)

        Timer.get_instance().step("Read Enrichment per RNA file..")
        listRNASignificantEnrich, countRealEnrichments, countRandomEnrichments = self.read_enrichment_per_rna_file(
        )

        Timer.get_instance().step("Read Enrichment results file..")
        dictPairs, filteredEnrichmentResults = self.read_enrichment_results_file(
            listRNASignificantEnrich, countRealEnrichments,
            countRandomEnrichments)

        Timer.get_instance().step("Write specificity ranking..")
        annotDict, lncDict = self.rank_by_specificity(dictPairs,
                                                      self.filterWarningValue)

        # Filter by specificity, if wanted
        if self.annotSpecificityFilter != -1 or self.transcriptSpecificityFilter != -1:
            Timer.get_instance().step(
                "Write enrichment results file filtered by specificity..")
            filteredEnrichmentResults = self.filter_by_specificity(
                filteredEnrichmentResults, annotDict, lncDict)

        # Filter by top enrichments (observed interactions), if wanted
        if self.topEnrichmentsPerComplex != -1:
            Timer.get_instance().step(
                "Write enrichment results file filtered by top enrichments per complex.."
            )
            _, filteredEnrichmentResults = self.filter_by_observed_interactions(
                filteredEnrichmentResults)

        Timer.get_instance().step("Write filtered enrichment results file..")
        self.write_enrichment_results(filteredEnrichmentResults, dictPairs,
                                      countRealEnrichments,
                                      countRandomEnrichments)
Example #9
0
        statistic, pvalue = stats.ttest_ind(sample1, sample2, equal_var=False)

        return statistic, pvalue

    # #
    # Run multipletest correction and return pvalues
    def multiple_test_correction(self, pvalues, meth="fdr_bh"):
        return multipletests(pvalues, method=meth)[1]


if __name__ == "__main__":

    try:

        # Start chrono
        Timer.get_instance().start_chrono()
        print "STARTING " + SCRIPT_NAME

        #===============================================================================
        # Get input arguments
        #===============================================================================
        parser = argparse.ArgumentParser(description=DESC_COMMENT)

        # positional args
        parser.add_argument('interactionFile',
                            metavar='interactionFile',
                            type=str,
                            help='Catrapid interactions file.')
        parser.add_argument(
            'transcriptTypesFile',
            metavar='transcriptTypesFile',
Example #10
0
            args.outputFolder += "/"

        # Initialise class
        run = ProcessGTExData(args.annotationFile, args.expressionFile,
                              args.tissueLevel, args.minimumSamples,
                              args.outputFolder, args.writeReport)

        #===============================================================================
        # Run analysis / processing
        #===============================================================================

        # Create Logger instance by using the first log action.
        Logger.get_instance().info("ProcessGTExData : Starting...")

        # Start chrono
        Timer.get_instance().start_chrono()

        # Wipe output and log from previous runs
        run.clean_files()

        # Read annotation file
        Timer.get_instance().step("reading annotation file..")
        sampleTissue, tissueSample, problematicSamples = run.read_tissue_annotations(
        )

        Timer.get_instance().step("reading expression file..")
        # Read expression file, using annotations and Process sample data into a single value
        # Note: nested function so that resource-heavy dictionary object is not duplicated
        run.average_sample_values(
            run.read_transcript_expression(sampleTissue, tissueSample,
                                           problematicSamples))
Example #11
0
                        genes = self.geneDict[liuGeneID]
                        for ge in genes:
                            outFile2.write(ge + "\n")

        print "read_tableS10: read %s lines." % nLines

        outFile1.close()
        outFile2.close()


if __name__ == "__main__":

    try:

        # Start chrono
        Timer.get_instance().start_chrono()

        print "STARTING " + SCRIPT_NAME

        #===============================================================================
        # Get input arguments, initialise class
        #===============================================================================
        parser = argparse.ArgumentParser(description=DESC_COMMENT)

        # positional args
        parser.add_argument('rainetDB',
                            metavar='rainetDB',
                            type=str,
                            help='Rainet database to be used for ID mapping.')
        parser.add_argument(
            'tableS1',
Example #12
0
    def parse_file(file_path,
                   has_headers,
                   group_list,
                   class_name,
                   parameter_name_list,
                   parameter_value_list,
                   comment_symbol,
                   clean_table=False):

        # Initialize the status of the insertion
        status = Constants.STATUS_OK

        # Open the file to parse in 'read' mode
        input_file = FileUtils.open_text_r(file_path)

        # If required, look for the header line (first non-empty line)
        if has_headers:
            line = ''
            while line == '':
                line = input_file.readline()
            # If no headers are provided by the user, use the file headers
            if group_list == None:
                group_list = line.split("\t")

        # Build the map of parameter name to header index
        # If a parameter name is not in the header the index is set to -1
        # and a warning message is sent
        parameter_to_header_index_map = {}
        for parameter_name in parameter_name_list:
            try:
                index = group_list.index(parameter_name)
            except ValueError:
                index = -1
                Logger.get_instance().warning(
                    "TSVParser.parse_file : The parameter '" + parameter_name +
                    "' is not in the header list : " + str(group_list) +
                    ".\n Check whether it is normal or not.")
                status = Constants.STATUS_WARNING
            parameter_to_header_index_map[parameter_name] = index

        # Build the Factory to be used to create the objects
        data_factory = DataFactory(class_name)
        if clean_table:
            data_factory.clean_table()

        # Parse the lines in the files and create one object of the requested type per line
        instance_list = []
        line = input_file.readline()
        counter = 0
        while line != None and line != '':

            # Ignore the empty and comment lines
            if line != '' and not line.startswith(comment_symbol):
                line_value_list = line.split("\t")
                #Check if the line has as many columns as headers
                if len(line_value_list) > len(group_list):
                    raise RainetException(
                        "TSVParser.parse_file : Bad line format in file. Line has not the right number of values.\n"
                        + " Number of values = " + str(len(line_value_list)) +
                        " != Number of headers =" + str(len(group_list)) +
                        ".\n" + " Line =" + line)

                #Build the ordered list of value required for Object creation
                #If no parameter_value_list is provided, create a new 'None' one
                # If a parameter_value_list is provided, override only the values
                # corresponding to a parameter found in the file header
                if (parameter_value_list == None):
                    parameter_value_list = [None] * len(parameter_name_list)
                value_index = 0
                for parameter_name in parameter_name_list:
                    parameter_index = parameter_to_header_index_map[
                        parameter_name]
                    if parameter_index >= 0:
                        parameter_value_list[value_index] = line_value_list[
                            parameter_index].strip()
                    value_index = value_index + 1

                #Call for the Object creation
                new_instance = data_factory.create_object_from_tsv(
                    parameter_value_list)
                if new_instance != None:
                    instance_list.append(new_instance)

            # Read a new line
            line = input_file.readline()

            # Counter for large files, commits into database after 100000 lines
            counter += 1
            if counter % 100000 == 0:
                Logger.get_instance().info(
                    "TSVParser.parse_file : %s Read %i lines.." %
                    (file_path, counter))
                Timer.get_instance().time_point()
                SQLManager.get_instance().commit()
                instance_list = []

        # Close the input file
        input_file.close()

        # Commit the SQLAlchemy session
        Logger.get_instance().info(
            "TSVParser.parse_file : Committing SQL session.")
        SQLManager.get_instance().commit()

        # Return the list of created instances
        return status
Example #13
0
    ### write filtered table to output file
    pd.DataFrame.to_csv(filteredTable,
                        bedFile + FITLERED_OUTPUT_SUFFIX,
                        sep="\t",
                        header=False,
                        index=False)
    #Note: float precision is changed when using pandas


if __name__ == "__main__":

    try:

        # Start chrono
        Timer.get_instance().start_chrono()
        print "STARTING " + SCRIPT_NAME

        #===============================================================================
        # Get input arguments
        #===============================================================================
        parser = argparse.ArgumentParser(description=DESC_COMMENT)

        # positional args
        parser.add_argument('bedFile',
                            metavar='bedFile',
                            type=str,
                            help='Input bed file to be processed.')
        parser.add_argument(
            '--minPval',
            metavar='minPval',
Example #14
0
        #gets the arguments
        args = parser.parse_args()

        # Initialise class
        run = KnownScaffoldValidation(
            args.catRAPIDFile, args.validatedFile, args.wantedRNAFile,
            args.rainetDB, args.outputFolder, args.discriminativePowerCutoff,
            args.zscoreCutoff, args.topProportion, args.npinter,
            args.columnForPlot, args.searchSpaceFile, args.zscoreToPropensity)

        #===============================================================================
        # Run analysis / processing
        #===============================================================================

        # Start chrono
        Timer.get_instance().start_chrono()

        Timer.get_instance().step("reading catRAPID file..")

        # Read CatRAPID
        catRAPIDInteractingProteins, catRAPIDNonInteractingProteins, scoreDict, scoreDictMean = run.read_catRAPID_file(
        )

        # Read NPInter or given list of proteins

        if run.npinter:
            experimentallyValidatedProteins = run.read_NPInter_file()
        else:
            experimentallyValidatedProteins = run.read_manual_list_file()

        #===============================================================================
Example #15
0
    def insert_data(self):

        # Create Logger instance by using the first log action.
        Logger.get_instance().info(
            "InsertionStrategy.insert_data: Starting...")

        #         # Backup the database file
        #         try:
        #             Logger.get_instance().info( "InsertionStrategy.insert_data:   Backuping DB file..." )
        #             shutil.copyfile(self.DBPath, self.DBPath + ".back")
        #         except IOError as ioe:
        #             Logger.get_instance().info( " warning : Unable to backup database file : " + self.DBPath + " : " + str( ioe))

        # Create database sqlite file at the provided path
        SQLManager.get_instance().build_database(self.DBPath,
                                                 self.forceOverride)

        self.check_database_tables()

        # Retrieve the inertion properties
        PropertyManager.get_instance().read_properties(
            OptionManager.get_instance().get_option(
                OptionConstants.OPTION_INSERTION_PROPERTIES_PATH, True))

        # Start chrono
        Timer.get_instance().start_chrono()

        # Indicate insertion mode
        if self.forceOverride:
            Logger.get_instance().info(" -- MODE FORCE OVERRIDE -- ")
        else:
            Logger.get_instance().info(" -- MODE RESUME -- ")

        #=======================================================================
        # INSERTION OF DATA
        #=======================================================================
        try:

            #===================================================================
            # PROTEIN DEFINITION
            #===================================================================

            # Parse the protein file
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.PROTEIN_UNIPROT_DEFINITION_PROPERTY, True)
            self.launch_insertion_TSV(input_file, True,
                                      DataConstants.PROTEIN_HEADERS,
                                      DataConstants.PROTEIN_CLASS,
                                      DataConstants.PROTEIN_PARAMS, None,
                                      DataConstants.PROTEIN_COMMENT_CHAR)

            # Parse the protein cross references file
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.PROTEIN_CROSSREFERENCES_PROPERTY, True)
            self.launch_insertion_TSV(
                input_file, False,
                DataConstants.PROTEIN_CROSS_REFERENCE_HEADERS,
                DataConstants.PROTEIN_CROSS_REFERENCE_CLASS,
                DataConstants.PROTEIN_CROSS_REFERENCE_PARAMS, None,
                DataConstants.PROTEIN_CROSS_REFERENCE_COMMENT_CHAR)

            # Parse the protein isoform file
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.PROTEIN_ISOFORMS_PROPERTY, True)
            self.launch_insertion_Fasta(
                input_file, DataConstants.ISOFORM_CLASS,
                DataConstants.ISOFORM_REGULAR_EXPRESSION,
                DataConstants.ISOFORM_GROUPS, DataConstants.ISOFORM_PARAMS,
                DataConstants.ISOFORM_PARAMS_VALUE_ALTERNATIVE,
                DataConstants.ISOFORM_COMMENT_CHAR)

            # Parse the protein domain file of SMART DB
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.PROTEIN_DOMAIN_SMART_PROPERTY, True)
            self.launch_insertion_TSV(
                input_file, True, DataConstants.PROTEIN_DOMAIN_HEADERS_SMART,
                DataConstants.PROTEIN_DOMAIN_CLASS,
                DataConstants.PROTEIN_DOMAIN_PARAM_SMART,
                DataConstants.PROTEIN_DOMAIN_VALUE_SMART,
                DataConstants.PROTEIN_DOMAIN_COMMENT_CHAR, "SMART", False)

            # Parse the protein domain file of PFAM DB
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.PROTEIN_DOMAIN_PFAM_PROPERTY, True)
            self.launch_insertion_TSV(
                input_file, False, DataConstants.PROTEIN_DOMAIN_HEADERS_PFAM,
                DataConstants.PROTEIN_DOMAIN_CLASS,
                DataConstants.PROTEIN_DOMAIN_PARAM_PFAM,
                DataConstants.PROTEIN_DOMAIN_VALUE_PFAM,
                DataConstants.PROTEIN_DOMAIN_COMMENT_CHAR, "PFAM", False)

            #===================================================================
            # FUNCTION AND PATHWAY ANNOTATIONS
            #===================================================================

            # Parse the Gene Ontology file
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.GENE_ONTOLOGY_DEFINITION_PROPERTY, True)
            self.launch_insertion_Obo(
                input_file, DataConstants.GENE_ONTOLOGY_CLASS,
                DataConstants.GENE_ONTOLOGY_ID_TAG,
                DataConstants.GENE_ONTOLOGY_NAME_TAG,
                DataConstants.GENE_ONTOLOGY_NAMESPACE_TAG)

            # Parse the Protein Gene Ontology annotation file
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.GENE_ONTOLOGY_ANNOTATION_PROPERTY, True)
            self.launch_insertion_TSV(
                input_file, False, DataConstants.PROTEIN_GO_ANNOTATION_HEADERS,
                DataConstants.PROTEIN_GO_ANNOTATION_CLASS,
                DataConstants.PROTEIN_GO_ANNOTATION_PARAMS, None,
                DataConstants.PROTEIN_GO_ANNOTATION_COMMENT_CHAR)

            # Parse the KEGG pathway file
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.KEGG_PATHWAY_DEFINITION_PROPERTY, True)
            self.launch_insertion_TSV(input_file, True,
                                      DataConstants.KEGG_PATHWAY_HEADERS,
                                      DataConstants.KEGG_PATHWAY_CLASS,
                                      DataConstants.KEGG_PATHWAY_PARAMS, None,
                                      DataConstants.KEGG_PATHWAY_COMMENT_CHAR)

            # Parse the Protein KEGG Pathway annotation file
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.KEGG_PATHWAY_ANNOTATION_PROPERTY, True)
            self.launch_insertion_TSV(
                input_file, True,
                DataConstants.KEGG_PATHWAY_ANNOTATION_HEADERS,
                DataConstants.KEGG_PATHWAY_ANNOTATION_CLASS,
                DataConstants.KEGG_PATHWAY_ANNOTATION_PARAMS, None,
                DataConstants.KEGG_PATHWAY_ANNOTATION_COMMENT_CHAR)

            #===================================================================
            # REACTOME
            #===================================================================

            # Parse the Reactome pathway file
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.REACTOME_PATHWAY_DEFINITION_PROPERTY, True)
            self.launch_insertion_TSV(
                input_file, False, DataConstants.REACTOME_PATHWAY_HEADERS,
                DataConstants.REACTOME_PATHWAY_CLASS,
                DataConstants.REACTOME_PATHWAY_PARAMS, None,
                DataConstants.REACTOME_PATHWAY_COMMENT_CHAR)

            # Parse the Protein Reactome Pathway annotation file
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.REACTOME_PATHWAY_ANNOTATION_PROPERTY, True)
            self.launch_insertion_TSV(
                input_file, False,
                DataConstants.REACTOME_PATHWAY_ANNOTATION_HEADERS,
                DataConstants.REACTOME_PATHWAY_ANNOTATION_CLASS,
                DataConstants.REACTOME_PATHWAY_ANNOTATION_PARAMS, None,
                DataConstants.REACTOME_PATHWAY_ANNOTATION_COMMENT_CHAR)

            #===================================================================
            # BIOPLEX
            #===================================================================

            # Parse the file listing Bioplex clusters
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.BIOPLEX_CLUSTER_DEFINITION_PROPERTY, True)
            self.launch_insertion_TSV(
                input_file, False, DataConstants.BIOPLEX_CLUSTER_HEADERS,
                DataConstants.BIOPLEX_CLUSTER_CLASS,
                DataConstants.BIOPLEX_CLUSTER_PARAMS, None,
                DataConstants.BIOPLEX_CLUSTER_COMMENT_CHAR)

            # Parse the file with Bioplex annotations
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.BIOPLEX_ANNOTATION_PROPERTY, True)
            self.launch_insertion_TSV(
                input_file, False, DataConstants.BIOPLEX_ANNOTATION_HEADERS,
                DataConstants.BIOPLEX_ANNOTATION_CLASS,
                DataConstants.BIOPLEX_ANNOTATION_PARAMS, None,
                DataConstants.BIOPLEX_ANNOTATION_COMMENT_CHAR)

            #===================================================================
            # WAN CLUSTERS
            #===================================================================

            # Parse the file listing Wan clusters
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.WAN_CLUSTER_DEFINITION_PROPERTY, True)
            self.launch_insertion_TSV(input_file, False,
                                      DataConstants.WAN_CLUSTER_HEADERS,
                                      DataConstants.WAN_CLUSTER_CLASS,
                                      DataConstants.WAN_CLUSTER_PARAMS, None,
                                      DataConstants.WAN_CLUSTER_COMMENT_CHAR)

            # Parse the file with Wan annotations
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.WAN_ANNOTATION_PROPERTY, True)
            self.launch_insertion_TSV(
                input_file, False, DataConstants.WAN_ANNOTATION_HEADERS,
                DataConstants.WAN_ANNOTATION_CLASS,
                DataConstants.WAN_ANNOTATION_PARAMS, None,
                DataConstants.WAN_ANNOTATION_COMMENT_CHAR)

            #===================================================================
            # CORUM CLUSTERS
            #===================================================================

            # Parse the file listing Corum clusters
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.CORUM_CLUSTER_DEFINITION_PROPERTY, True)
            self.launch_insertion_TSV(input_file, False,
                                      DataConstants.CORUM_CLUSTER_HEADERS,
                                      DataConstants.CORUM_CLUSTER_CLASS,
                                      DataConstants.CORUM_CLUSTER_PARAMS, None,
                                      DataConstants.CORUM_CLUSTER_COMMENT_CHAR)

            # Parse the file with Corum annotations
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.CORUM_ANNOTATION_PROPERTY, True)
            self.launch_insertion_TSV(
                input_file, False, DataConstants.CORUM_ANNOTATION_HEADERS,
                DataConstants.CORUM_ANNOTATION_CLASS,
                DataConstants.CORUM_ANNOTATION_PARAMS, None,
                DataConstants.CORUM_ANNOTATION_COMMENT_CHAR)

            #===================================================================
            # CUSTOM CLUSTERS
            #===================================================================

            # Parse the file listing Custom clusters
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.CUSTOM_CLUSTER_DEFINITION_PROPERTY, True)
            self.launch_insertion_TSV(
                input_file, False, DataConstants.CUSTOM_CLUSTER_HEADERS,
                DataConstants.CUSTOM_CLUSTER_CLASS,
                DataConstants.CUSTOM_CLUSTER_PARAMS, None,
                DataConstants.CUSTOM_CLUSTER_COMMENT_CHAR)

            # Parse the file with Custom annotations
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.CUSTOM_ANNOTATION_PROPERTY, True)
            self.launch_insertion_TSV(
                input_file, False, DataConstants.CUSTOM_ANNOTATION_HEADERS,
                DataConstants.CUSTOM_ANNOTATION_CLASS,
                DataConstants.CUSTOM_ANNOTATION_PARAMS, None,
                DataConstants.CUSTOM_ANNOTATION_COMMENT_CHAR)

            #===================================================================
            # INTERACTOME
            #===================================================================

            # Parse the protein interaction file
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.INTERACTOME_DEFINITION_PROPERTY, True)
            self.launch_insertion_TSV(input_file, False,
                                      DataConstants.INTERACTOME_HEADER,
                                      DataConstants.INTERACTOME_CLASS,
                                      DataConstants.INTERACTOME_PARAMS, None,
                                      DataConstants.INTERACTOME_COMMENT_CHAR)

            # Parse the protein interaction network file
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.INTERACTOME_NETWORK_DEFINITION_PROPERTY, True)
            ppi_default_values = [None, None, os.path.basename(input_file)]
            self.launch_insertion_TSV(
                input_file, False, DataConstants.INTERACTOME_NETWORK_HEADER,
                DataConstants.INTERACTOME_NETWORK_CLASS,
                DataConstants.INTERACTOME_NETWORK_PARAMS, ppi_default_values,
                DataConstants.INTERACTOME_NETWORK_COMMENT_CHAR)

            # Parse the Network Module file
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.
                INTERACTOME_NETWORK_PARTITION_DEFINITION_PROPERTY, True)
            self.launch_insertion_NetworkModule(
                input_file, DataConstants.INTERACTOME_NETWORK_PARTITION_CLASS,
                DataConstants.INTERACTOME_NETWORK_PARTITION_CLASS_TAG,
                DataConstants.INTERACTOME_NETWORK_PARTITION_COMMENT_CHAR)

            # Parse the Network Module Annotation file
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.
                INTERACTOME_NETWORK_PARTITION_ANNOTATION_PROPERTY, True)
            self.launch_insertion_NetworkModuleAnnotation(
                input_file,
                DataConstants.INTERACTOME_NETWORK_PARTITION_ANNOTATION_CLASS,
                DataConstants.
                INTERACTOME_NETWORK_PARTITION_ANNOTATION_CLASS_TAG,
                DataConstants.
                INTERACTOME_NETWORK_PARTITION_ANNOTATION_CLASS_REGEX,
                DataConstants.
                INTERACTOME_NETWORK_PARTITION_ANNOTATION_PROTEIN_TAG,
                DataConstants.
                INTERACTOME_NETWORK_PARTITION_ANNOTATION_ANNOTATION_TAG,
                DataConstants.INTERACTOME_NETWORK_PARTITION_COMMENT_CHAR)

            # Parse the protein redundancy file
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.
                INTERACTOME_NETWORK_REDUNDANCY_DEFINITION_PROPERTY, True)
            interactome_network_redundancy_definition_value = [
                None, basename(input_file), None
            ]
            self.launch_insertion_TSV(
                input_file, False, DataConstants.
                INTERACTOME_NETWORK_REDUNDANCY_DEFINITION_HEADERS,
                DataConstants.INTERACTOME_NETWORK_REDUNDANCY_DEFINITION_CLASS,
                DataConstants.INTERACTOME_NETWORK_REDUNDANCY_DEFINITION_PARAMS,
                interactome_network_redundancy_definition_value, DataConstants.
                INTERACTOME_NETWORK_REDUNDANCY_DEFINITION_COMMENT_CHAR,
                "Redundancy", False)

            #===================================================================
            # RNA DEFINITION
            #===================================================================

            # Make query of specific type of protein cross references to speed up insertion
            DataManager.get_instance().perform_query(
                DataConstants.PROTEIN_ENSP_XREF_KW,
                "query( ProteinCrossReference.protein_id,ProteinCrossReference.crossReferenceID ).filter(ProteinCrossReference.sourceDB == DataConstants.PROTEIN_ENSP_XREF_DB).all()"
            )
            # Convert query into a dictionary
            DataManager.get_instance().query_to_dict(
                DataConstants.PROTEIN_ENSP_XREF_KW, 1, 0)

            # Parse the RNA file
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.RNA_DEFINITION_PROPERTY, True)
            self.launch_insertion_TSV(input_file, True,
                                      DataConstants.RNA_HEADERS,
                                      DataConstants.RNA_CLASS,
                                      DataConstants.RNA_PARAMS, None,
                                      DataConstants.RNA_COMMENT_CHAR)

            # Parse the RNA cross references file
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.RNA_CROSS_REFERENCE_PROPERTY, True)
            self.launch_insertion_TSV(
                input_file, False, DataConstants.RNA_CROSS_REFERENCE_HEADERS,
                DataConstants.RNA_CROSS_REFERENCE_CLASS,
                DataConstants.RNA_CROSS_REFERENCE_PARAMS, None,
                DataConstants.RNA_CROSS_REFERENCE_COMMENT_CHAR)

            #===================================================================
            # RNA TISSUE EXPRESSION
            #===================================================================

            # Make query of all RNA IDs to speed up insertion
            DataManager.get_instance().perform_query(DataConstants.RNA_ALL_KW,
                                                     "query( RNA ).all()")
            # Format query into dict data structure
            DataManager.get_instance().query_to_object_dict(
                DataConstants.RNA_ALL_KW, "transcriptID")

            # Make query of all Protein IDs (uniprotAC) to speed up insertion
            DataManager.get_instance().perform_query(DataConstants.PROT_ALL_KW,
                                                     "query( Protein ).all()")
            # Format query into dict data structure
            DataManager.get_instance().query_to_object_dict(
                DataConstants.PROT_ALL_KW, "uniprotAC")

            # Parse the RNA tissue expression file
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.RNA_TISSUE_EXPRESSION_PROPERTY, True)
            self.launch_insertion_TSV(
                input_file, True, DataConstants.RNA_TISSUE_EXPRESSION_HEADERS,
                DataConstants.RNA_TISSUE_EXPRESSION_CLASS,
                DataConstants.RNA_TISSUE_EXPRESSION_PARAMS,
                DataConstants.RNA_TISSUE_EXPRESSION_VALUE,
                DataConstants.RNA_TISSUE_EXPRESSION_COMMENT_CHAR)

            #===================================================================
            # PROTEIN RNA INTERACTION
            #===================================================================

            self.forceOverride = 1

            # Parse the file listing RNA with catRAPID data
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.INTERACTING_RNA_DEFINITION_PROPERTY, True)
            self.launch_insertion_TSV(
                input_file, True,
                DataConstants.INTERACTING_RNA_DEFINITION_HEADERS,
                DataConstants.INTERACTING_RNA_DEFINITION_CLASS,
                DataConstants.INTERACTING_RNA_DEFINITION_PARAMS, None,
                DataConstants.INTERACTING_RNA_DEFINITION_COMMENT_CHAR)

            # Parse the file listing Proteins with catRAPID data
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.INTERACTING_PROTEIN_DEFINITION_PROPERTY, True)
            self.launch_insertion_TSV(
                input_file, True,
                DataConstants.INTERACTING_PROTEIN_DEFINITION_HEADERS,
                DataConstants.INTERACTING_PROTEIN_DEFINITION_CLASS,
                DataConstants.INTERACTING_PROTEIN_DEFINITION_PARAMS, None,
                DataConstants.INTERACTING_PROTEIN_DEFINITION_COMMENT_CHAR)

            # Initialize data items to store missing interactions
            if DataConstants.PROTEIN_RNA_INTERACTION_CATRAPID_MISSING_RNA_KW not in DataManager.get_instance(
            ).data:
                DataManager.get_instance().store_data(
                    DataConstants.
                    PROTEIN_RNA_INTERACTION_CATRAPID_MISSING_RNA_KW, [])
            if DataConstants.PROTEIN_RNA_INTERACTION_CATRAPID_MISSING_PROT_KW not in DataManager.get_instance(
            ).data:
                DataManager.get_instance().store_data(
                    DataConstants.
                    PROTEIN_RNA_INTERACTION_CATRAPID_MISSING_PROT_KW, [])

            # Parse the ProteinRNAInteractionCatRAPID file
            input_file = PropertyManager.get_instance().get_property(
                DataConstants.
                PROTEIN_RNA_INTERACTION_CATRAPID_DEFINITION_PROPERTY, True)
            self.launch_insertion_TSV(
                input_file, False,
                DataConstants.PROTEIN_RNA_INTERACTION_CATRAPID_HEADERS,
                DataConstants.PROTEIN_RNA_INTERACTION_CATRAPID_CLASS,
                DataConstants.PROTEIN_RNA_INTERACTION_CATRAPID_PARAMS, None,
                DataConstants.PROTEIN_RNA_INTERACTION_CATRAPID_COMMENT_CHAR)

            self.forceOverride = 0

            # Remove data that will no longer be used to reduce memory usage
            DataManager.get_instance().delete_data(
                DataConstants.PROTEIN_ENSP_XREF_KW)
            DataManager.get_instance().delete_data(DataConstants.RNA_ALL_KW)
            DataManager.get_instance().delete_data(DataConstants.PROT_ALL_KW)

        except RainetException as re:
            Logger.get_instance().error(re.to_string())
            Timer.get_instance().stop_chrono("ERROR : Data insertion FAILED")
            return

        # # Report on potential missing data
        # self.check_missing_data()

        # Stop the chrono
        Timer.get_instance().stop_chrono("Data insertion finished")
Example #16
0
                            help='Folder where to write output files.')

        #gets the arguments
        args = parser.parse_args()

        # Initialise class
        run = ExploreInteractingRNAs(args.rainetDB, args.transcriptList,
                                     args.interactingTranscriptList,
                                     args.outputFolder)

        #===============================================================================
        # Run analysis / processing
        #===============================================================================

        # Start chrono
        Timer.get_instance().start_chrono()

        transcriptList = run.read_transcript_list(args.transcriptList)
        interactingTranscriptList = run.read_transcript_list(
            args.interactingTranscriptList)

        run.query_database(transcriptList, interactingTranscriptList)

    # Use RainetException to catch errors
    except RainetException as rainet:
        Logger.get_instance().error(
            "Error during execution of ExploreInteractingRNAs. Aborting :\n" +
            rainet.to_string())

    # Stop the chrono
    Timer.get_instance().stop_chrono("ExploreInteractingRNAs : Finished")
Example #17
0
                        omimID)

                    notFound.add(omimID)

        print "write_output: %s OMIM IDs not found." % len(notFound)
        print "write_output: wrote %s protein-disease association entries." % nEntries

        outFile.close()


if __name__ == "__main__":

    try:

        # Start chrono
        Timer.get_instance().start_chrono()

        print "STARTING " + SCRIPT_NAME

        #===============================================================================
        # Get input arguments, initialise class
        #===============================================================================
        parser = argparse.ArgumentParser(description=DESC_COMMENT)

        # positional args
        parser.add_argument(
            'mimTitlesFile',
            metavar='mimTitlesFile',
            type=str,
            help='Path to mimTitles.txt file downloadable from OMIM database.')
        parser.add_argument('outputFolder',
Example #18
0
from fr.tagc.rainet.core.util.subprocess.SubprocessUtil import SubprocessUtil

#===============================================================================
# Started 03-Oct-2016
# Diogo Ribeiro
DESC_COMMENT = "Wrapper of NetworkScoreAnalysis to run for several top numbers."
SCRIPT_NAME = "networkAnalysisWrapper.py"
#===============================================================================

if __name__ == "__main__":

    try:

        # Start chrono
        Timer.get_instance().start_chrono()
        print "STARTING " + SCRIPT_NAME

        #===============================================================================
        # Get input arguments
        #===============================================================================
        parser = argparse.ArgumentParser(description=DESC_COMMENT)

        # positional args
        parser.add_argument('script',
                            metavar='script',
                            type=str,
                            help='Script to run.')
        parser.add_argument('networkFile',
                            metavar='networkFile',
                            type=str,
Example #19
0
        #gets the arguments
        args = parser.parse_args()

        # Initialise class
        run = NPInterPredictionValidation(args.catRAPIDFile, args.NPInterFile,
                                          args.noncodeTx2noncodeGene,
                                          args.noncode2Ensembl, args.rainetDB,
                                          args.outputFolder)

        #===============================================================================
        # Run analysis / processing
        #===============================================================================

        # Start chrono
        Timer.get_instance().start_chrono()

        Timer.get_instance().step("reading RAINET DB file..")

        # Build RNA cross references
        run.rnaXrefDict = run.rna_cross_references()

        # Build Protein cross references
        run.uniprotACs, run.xrefDict = run.proteins_in_rainet()

        Timer.get_instance().step("reading NONCODE file..")

        run.read_noncode_conversion_file()

        Timer.get_instance().step("reading NPInter file..")
Example #20
0
                outputText = "%s\t%s\n" % (line, annotation)

                outFile.write(outputText)

        outFile.close()

        print "read_interaction_file: read %s lines.." % lineCount


if __name__ == "__main__":

    try:

        # Start chrono
        Timer.get_instance().start_chrono()

        print "STARTING " + SCRIPT_NAME

        #===============================================================================
        # Get input arguments, initialise class
        #===============================================================================
        parser = argparse.ArgumentParser(description=DESC_COMMENT)

        # positional args
        parser.add_argument(
            'annotationFile',
            metavar='annotationFile',
            type=str,
            help=
            'TSV file with annotation per transcript. No header. Can have several annotations for same transcript, one per line. E.g. transcriptID\tannotation.'
Example #21
0
        set2 = set(group2)

        overlap = set1.intersection(set2)

        perc1 = len(overlap) * 100.0 / len(group1)
        perc2 = len(overlap) * 100.0 / len(group2)

        return perc1, perc2


if __name__ == "__main__":

    try:

        # Start chrono
        Timer.get_instance().start_chrono()

        print "STARTING " + SCRIPT_NAME

        #===============================================================================
        # Get input arguments, initialise class
        #===============================================================================
        parser = argparse.ArgumentParser(description=DESC_COMMENT)

        # positional args
        parser.add_argument(
            'rainetDB',
            metavar='rainetDB',
            type=str,
            help=
            'Rainet database to be used. Needs to have protein annotation datasets and interactingProteins tables up-to-date.'
Example #22
0
                complexedProteins = self.complexAnnotDict[complexID]

                # overlap between proteins in complex and experimental interactions
                intersection = interactingProteins.intersection(
                    complexedProteins)

                if len(intersection) > 0:
                    outFile.write("%s\t%s\t%s\n" %
                                  (txID, complexID, ",".join(intersection)))

if __name__ == "__main__":

    try:

        # Start chrono
        Timer.get_instance().start_chrono()

        print "STARTING " + SCRIPT_NAME

        #===============================================================================
        # Get input arguments, initialise class
        #===============================================================================
        parser = argparse.ArgumentParser(description=DESC_COMMENT)

        # positional args
        parser.add_argument('rainetDB',
                            metavar='rainetDB',
                            type=str,
                            help='Rainet database to be used.')
        parser.add_argument('complexTable',
                            metavar='complexTable',
Example #23
0
        args = parser.parse_args()

        # Initialise class
        run = StarBasePredictionValidation(args.catRAPIDFile,
                                           args.starBaseFile,
                                           args.starBaseProteinConversionFile,
                                           args.rainetDB, args.outputFolder,
                                           args.minimumBioComplex,
                                           args.minimumClipReadNumber)

        #===============================================================================
        # Run analysis / processing
        #===============================================================================

        # Start chrono
        Timer.get_instance().start_chrono()

        Timer.get_instance().step("reading RAINET DB and conversion files..")

        # Build starbase RNA cross references
        run.starbaseRNAXrefDict = run.rna_cross_references()

        # Build starbase Protein cross references
        run.starbaseProtXrefDict = run.read_starbase_protein_conversion_file()

        # Build Protein cross references
        run.xrefDict = run.protein_cross_references()

        Timer.get_instance().step("reading starBase file..")

        starbasePairs, starbasePairsBiocomplex = run.read_starbase_file()
Example #24
0
    outFile.close()

    # clean output
    os.system("mkdir %s/processed" % (outputFolder))
    os.system("mv %s/*.bed %s/processed" % (outputFolder, outputFolder))

    print "produce_interactions_file: produced interactions for %s proteins.." % len(
        interactionsPerProtein)


if __name__ == "__main__":

    try:

        # Start chrono
        Timer.get_instance().start_chrono()
        print "STARTING " + SCRIPT_NAME

        #===============================================================================
        # Get input arguments
        #===============================================================================
        parser = argparse.ArgumentParser(description=DESC_COMMENT)

        # positional args
        parser.add_argument(
            'bedFolder',
            metavar='bedFolder',
            type=str,
            help='Input folder with all bed files to be processed.')
        parser.add_argument('outputFolder',
                            metavar='outputFolder',
Example #25
0
            else:
                # neither lncRNA nor mRNA
                continue

        outFile.close()

        print "read_data_file: number of lines in input data:", len(newTable)
        print "read_data_file: number of lncRNAs per group", numbersPerGroup


if __name__ == "__main__":

    try:

        # Start chrono
        Timer.get_instance().start_chrono()

        print "STARTING " + SCRIPT_NAME

        #===============================================================================
        # Get input arguments, initialise class
        #===============================================================================
        parser = argparse.ArgumentParser(description=DESC_COMMENT)

        # positional args
        parser.add_argument(
            'annotationFile',
            metavar='annotationFile',
            type=str,
            help=
            'TSV file with annotation per transcript (gene). No header. Can have several annotations for same transcript, one per line. E.g. transcriptID\tannotation.'
Example #26
0
        print "read_lncrnadisease: read %s lines: " % nLines

        print "read_lncrnadisease: IDs found: %s" % len(found)

        print "read_lncrnadisease: IDs not found: %s" % len(notFound)
        print "List of transcripts not found: "
        print notFound


if __name__ == "__main__":

    try:

        # Start chrono
        Timer.get_instance().start_chrono()

        print "STARTING " + SCRIPT_NAME

        #===============================================================================
        # Get input arguments, initialise class
        #===============================================================================
        parser = argparse.ArgumentParser(description=DESC_COMMENT)

        # positional args
        parser.add_argument('rainetDB',
                            metavar='rainetDB',
                            type=str,
                            help='Rainet database to be used.')
        parser.add_argument(
            'lncrnadiseaseData',
Example #27
0
                            type=str,
                            help='Folder where to write output files.')

        #gets the arguments
        args = parser.parse_args()

        # Initialise class
        run = ECLIPPredictionValidation(args.catRAPIDFile, args.eClipFile,
                                        args.outputFolder)

        #===============================================================================
        # Run analysis / processing
        #===============================================================================

        # Start chrono
        Timer.get_instance().start_chrono()

        Timer.get_instance().step("reading eCLIP file..")

        run.read_eclip_file()

        Timer.get_instance().step("reading catRAPID file..")

        run.read_catrapid_file()

    # Use RainetException to catch errors
    except RainetException as rainet:
        Logger.get_instance().error("Error during execution. Aborting :\n" +
                                    rainet.to_string())

    # Stop the chrono