def run(self): Timer.get_instance().step("Reading RAINET DB..") self.read_rainet_db() Timer.get_instance().step("Reading enrichment file..") self.read_enrichment_file()
def launch_insertion_TSV(self, file_path, has_headers, headers, class_name, params, default_values, comment_char, table_extension="", clean_table=True): composite_table_name = class_name + table_extension try: # Start timing Timer.get_instance().step("Inserting " + composite_table_name + ":") # Check if the insertion is required not not, depending on the fact the data # were already inserted and theforceOverride option if SQLUtil.insert_data_required(class_name, self.forceOverride, table_extension): Logger.get_instance().info("|--Starting insertion...") status = TSVParser.parse_file(file_path, has_headers, headers, class_name, params, default_values, comment_char, clean_table) else: Logger.get_instance().info( "|--Data already inserted: insertion bypassed.") status = None # If an exception is raised by controlled code, status of insertion is set to "Rainet Error" except RainetException as re: Logger.get_instance().error(re.to_string()) status = Constants.STATUS_RAINET_ERROR raise re # If an exception is raised by uncontrolled code, status of insertion is set to "Error" except Exception as e: Logger.get_instance().error(e.message) status = Constants.STATUS_ERROR raise RainetException( "Abnormal Exception during insertion of " + class_name, e) # In all case insert or update the status of the insertion to TableStatus table in DB finally: if status != None: sql_session = SQLManager.get_instance().get_session() db_status_list = sql_session.query(TableStatus).filter( TableStatus.tableName == composite_table_name).all() if db_status_list == None or len(db_status_list) == 0: sql_session.add( TableStatus(composite_table_name, status, file_path)) else: db_status = db_status_list[0] db_status.tableStatus = status sql_session.add(db_status) SQLManager.get_instance().commit()
def run(self): Logger.get_instance().info("TemplateScriptWithClass.run: Starting...") #=================================================================== # Initialising datasets #=================================================================== Timer.get_instance().step("Initialising interaction datasets..") self.function_one()
def run(self): #=============================================================================== # Run analysis / processing #=============================================================================== Timer.get_instance().step( "Read RainetDB table..") self.read_rainet_db( ) Timer.get_instance().step( "Read enrichment file..") pairsToWrite, enrichmentsDict = self.read_enrichment_data() return pairsToWrite, enrichmentsDict
def launch_insertion_Fasta(self, file_path, class_name, regex, groups, params, params_values, comment_char, clean_table=True): try: Timer.get_instance().step("Inserting " + class_name + ":") if SQLUtil.insert_data_required(class_name, self.forceOverride): Logger.get_instance().info("|--Starting insertion...") status = FastaParser.parse_file(file_path, class_name, regex, groups, params, params_values, comment_char, clean_table) else: Logger.get_instance().info( "|--Data already inserted: insertion bypassed.") status = None except RainetException as re: Logger.get_instance().error(re.to_string()) status = Constants.STATUS_RAINET_ERROR raise re except Exception as e: Logger.get_instance().error(e.message) status = Constants.STATUS_ERROR raise RainetException( "Abnormal Exception during insertion of " + class_name, e) finally: if status != None: sql_session = SQLManager.get_instance().get_session() db_status_list = sql_session.query(TableStatus).filter( TableStatus.tableName == class_name).all() if db_status_list == None or len(db_status_list) == 0: sql_session.add(TableStatus(class_name, status, file_path)) else: db_status = db_status_list[0] db_status.tableStatus = status sql_session.add(db_status) SQLManager.get_instance().commit()
def run(self): Timer.get_instance().step("Reading annotation file..") run.read_annotation_file() Timer.get_instance().step("Reading external files..") run.read_external_files() Timer.get_instance().step("Reading background file..") run.read_background_list() Timer.get_instance().step("Creating output file..") run.calculate_odds_ratio()
def run(self): Logger.get_instance().info("NetworkScoreAnalysis.run: Starting...") #=================================================================== # Initialising datasets #=================================================================== Timer.get_instance().step("Reading network file..") #self.protein_cross_references() self.read_network_file() self.calculate_protein_degree() Timer.get_instance().step("Reading catrapid file..") self.read_catrapid_file() self.pick_top_proteins() Timer.get_instance().step("Calculate metrics..") # calculate metrics for each of the wanted topPartner values for topVal in self.topPartners: self.calculate_metrics(topVal)
def run(self): # Approach: filters are applied through the filteredEnrichmentResults variable, which changes depending on the filtering applied # Read row annotation file, if present if self.rowAnnotationFile != "": Timer.get_instance().step("Read row annotation file..") self.rowAnnotation = self.read_annotation_file( self.rowAnnotationFile) # Read col annotation file, if present if self.colAnnotationFile != "": Timer.get_instance().step("Read column annotation file..") self.colAnnotation = self.read_annotation_file( self.colAnnotationFile) Timer.get_instance().step("Read Enrichment per RNA file..") listRNASignificantEnrich, countRealEnrichments, countRandomEnrichments = self.read_enrichment_per_rna_file( ) Timer.get_instance().step("Read Enrichment results file..") dictPairs, filteredEnrichmentResults = self.read_enrichment_results_file( listRNASignificantEnrich, countRealEnrichments, countRandomEnrichments) Timer.get_instance().step("Write specificity ranking..") annotDict, lncDict = self.rank_by_specificity(dictPairs, self.filterWarningValue) # Filter by specificity, if wanted if self.annotSpecificityFilter != -1 or self.transcriptSpecificityFilter != -1: Timer.get_instance().step( "Write enrichment results file filtered by specificity..") filteredEnrichmentResults = self.filter_by_specificity( filteredEnrichmentResults, annotDict, lncDict) # Filter by top enrichments (observed interactions), if wanted if self.topEnrichmentsPerComplex != -1: Timer.get_instance().step( "Write enrichment results file filtered by top enrichments per complex.." ) _, filteredEnrichmentResults = self.filter_by_observed_interactions( filteredEnrichmentResults) Timer.get_instance().step("Write filtered enrichment results file..") self.write_enrichment_results(filteredEnrichmentResults, dictPairs, countRealEnrichments, countRandomEnrichments)
statistic, pvalue = stats.ttest_ind(sample1, sample2, equal_var=False) return statistic, pvalue # # # Run multipletest correction and return pvalues def multiple_test_correction(self, pvalues, meth="fdr_bh"): return multipletests(pvalues, method=meth)[1] if __name__ == "__main__": try: # Start chrono Timer.get_instance().start_chrono() print "STARTING " + SCRIPT_NAME #=============================================================================== # Get input arguments #=============================================================================== parser = argparse.ArgumentParser(description=DESC_COMMENT) # positional args parser.add_argument('interactionFile', metavar='interactionFile', type=str, help='Catrapid interactions file.') parser.add_argument( 'transcriptTypesFile', metavar='transcriptTypesFile',
args.outputFolder += "/" # Initialise class run = ProcessGTExData(args.annotationFile, args.expressionFile, args.tissueLevel, args.minimumSamples, args.outputFolder, args.writeReport) #=============================================================================== # Run analysis / processing #=============================================================================== # Create Logger instance by using the first log action. Logger.get_instance().info("ProcessGTExData : Starting...") # Start chrono Timer.get_instance().start_chrono() # Wipe output and log from previous runs run.clean_files() # Read annotation file Timer.get_instance().step("reading annotation file..") sampleTissue, tissueSample, problematicSamples = run.read_tissue_annotations( ) Timer.get_instance().step("reading expression file..") # Read expression file, using annotations and Process sample data into a single value # Note: nested function so that resource-heavy dictionary object is not duplicated run.average_sample_values( run.read_transcript_expression(sampleTissue, tissueSample, problematicSamples))
genes = self.geneDict[liuGeneID] for ge in genes: outFile2.write(ge + "\n") print "read_tableS10: read %s lines." % nLines outFile1.close() outFile2.close() if __name__ == "__main__": try: # Start chrono Timer.get_instance().start_chrono() print "STARTING " + SCRIPT_NAME #=============================================================================== # Get input arguments, initialise class #=============================================================================== parser = argparse.ArgumentParser(description=DESC_COMMENT) # positional args parser.add_argument('rainetDB', metavar='rainetDB', type=str, help='Rainet database to be used for ID mapping.') parser.add_argument( 'tableS1',
def parse_file(file_path, has_headers, group_list, class_name, parameter_name_list, parameter_value_list, comment_symbol, clean_table=False): # Initialize the status of the insertion status = Constants.STATUS_OK # Open the file to parse in 'read' mode input_file = FileUtils.open_text_r(file_path) # If required, look for the header line (first non-empty line) if has_headers: line = '' while line == '': line = input_file.readline() # If no headers are provided by the user, use the file headers if group_list == None: group_list = line.split("\t") # Build the map of parameter name to header index # If a parameter name is not in the header the index is set to -1 # and a warning message is sent parameter_to_header_index_map = {} for parameter_name in parameter_name_list: try: index = group_list.index(parameter_name) except ValueError: index = -1 Logger.get_instance().warning( "TSVParser.parse_file : The parameter '" + parameter_name + "' is not in the header list : " + str(group_list) + ".\n Check whether it is normal or not.") status = Constants.STATUS_WARNING parameter_to_header_index_map[parameter_name] = index # Build the Factory to be used to create the objects data_factory = DataFactory(class_name) if clean_table: data_factory.clean_table() # Parse the lines in the files and create one object of the requested type per line instance_list = [] line = input_file.readline() counter = 0 while line != None and line != '': # Ignore the empty and comment lines if line != '' and not line.startswith(comment_symbol): line_value_list = line.split("\t") #Check if the line has as many columns as headers if len(line_value_list) > len(group_list): raise RainetException( "TSVParser.parse_file : Bad line format in file. Line has not the right number of values.\n" + " Number of values = " + str(len(line_value_list)) + " != Number of headers =" + str(len(group_list)) + ".\n" + " Line =" + line) #Build the ordered list of value required for Object creation #If no parameter_value_list is provided, create a new 'None' one # If a parameter_value_list is provided, override only the values # corresponding to a parameter found in the file header if (parameter_value_list == None): parameter_value_list = [None] * len(parameter_name_list) value_index = 0 for parameter_name in parameter_name_list: parameter_index = parameter_to_header_index_map[ parameter_name] if parameter_index >= 0: parameter_value_list[value_index] = line_value_list[ parameter_index].strip() value_index = value_index + 1 #Call for the Object creation new_instance = data_factory.create_object_from_tsv( parameter_value_list) if new_instance != None: instance_list.append(new_instance) # Read a new line line = input_file.readline() # Counter for large files, commits into database after 100000 lines counter += 1 if counter % 100000 == 0: Logger.get_instance().info( "TSVParser.parse_file : %s Read %i lines.." % (file_path, counter)) Timer.get_instance().time_point() SQLManager.get_instance().commit() instance_list = [] # Close the input file input_file.close() # Commit the SQLAlchemy session Logger.get_instance().info( "TSVParser.parse_file : Committing SQL session.") SQLManager.get_instance().commit() # Return the list of created instances return status
### write filtered table to output file pd.DataFrame.to_csv(filteredTable, bedFile + FITLERED_OUTPUT_SUFFIX, sep="\t", header=False, index=False) #Note: float precision is changed when using pandas if __name__ == "__main__": try: # Start chrono Timer.get_instance().start_chrono() print "STARTING " + SCRIPT_NAME #=============================================================================== # Get input arguments #=============================================================================== parser = argparse.ArgumentParser(description=DESC_COMMENT) # positional args parser.add_argument('bedFile', metavar='bedFile', type=str, help='Input bed file to be processed.') parser.add_argument( '--minPval', metavar='minPval',
#gets the arguments args = parser.parse_args() # Initialise class run = KnownScaffoldValidation( args.catRAPIDFile, args.validatedFile, args.wantedRNAFile, args.rainetDB, args.outputFolder, args.discriminativePowerCutoff, args.zscoreCutoff, args.topProportion, args.npinter, args.columnForPlot, args.searchSpaceFile, args.zscoreToPropensity) #=============================================================================== # Run analysis / processing #=============================================================================== # Start chrono Timer.get_instance().start_chrono() Timer.get_instance().step("reading catRAPID file..") # Read CatRAPID catRAPIDInteractingProteins, catRAPIDNonInteractingProteins, scoreDict, scoreDictMean = run.read_catRAPID_file( ) # Read NPInter or given list of proteins if run.npinter: experimentallyValidatedProteins = run.read_NPInter_file() else: experimentallyValidatedProteins = run.read_manual_list_file() #===============================================================================
def insert_data(self): # Create Logger instance by using the first log action. Logger.get_instance().info( "InsertionStrategy.insert_data: Starting...") # # Backup the database file # try: # Logger.get_instance().info( "InsertionStrategy.insert_data: Backuping DB file..." ) # shutil.copyfile(self.DBPath, self.DBPath + ".back") # except IOError as ioe: # Logger.get_instance().info( " warning : Unable to backup database file : " + self.DBPath + " : " + str( ioe)) # Create database sqlite file at the provided path SQLManager.get_instance().build_database(self.DBPath, self.forceOverride) self.check_database_tables() # Retrieve the inertion properties PropertyManager.get_instance().read_properties( OptionManager.get_instance().get_option( OptionConstants.OPTION_INSERTION_PROPERTIES_PATH, True)) # Start chrono Timer.get_instance().start_chrono() # Indicate insertion mode if self.forceOverride: Logger.get_instance().info(" -- MODE FORCE OVERRIDE -- ") else: Logger.get_instance().info(" -- MODE RESUME -- ") #======================================================================= # INSERTION OF DATA #======================================================================= try: #=================================================================== # PROTEIN DEFINITION #=================================================================== # Parse the protein file input_file = PropertyManager.get_instance().get_property( DataConstants.PROTEIN_UNIPROT_DEFINITION_PROPERTY, True) self.launch_insertion_TSV(input_file, True, DataConstants.PROTEIN_HEADERS, DataConstants.PROTEIN_CLASS, DataConstants.PROTEIN_PARAMS, None, DataConstants.PROTEIN_COMMENT_CHAR) # Parse the protein cross references file input_file = PropertyManager.get_instance().get_property( DataConstants.PROTEIN_CROSSREFERENCES_PROPERTY, True) self.launch_insertion_TSV( input_file, False, DataConstants.PROTEIN_CROSS_REFERENCE_HEADERS, DataConstants.PROTEIN_CROSS_REFERENCE_CLASS, DataConstants.PROTEIN_CROSS_REFERENCE_PARAMS, None, DataConstants.PROTEIN_CROSS_REFERENCE_COMMENT_CHAR) # Parse the protein isoform file input_file = PropertyManager.get_instance().get_property( DataConstants.PROTEIN_ISOFORMS_PROPERTY, True) self.launch_insertion_Fasta( input_file, DataConstants.ISOFORM_CLASS, DataConstants.ISOFORM_REGULAR_EXPRESSION, DataConstants.ISOFORM_GROUPS, DataConstants.ISOFORM_PARAMS, DataConstants.ISOFORM_PARAMS_VALUE_ALTERNATIVE, DataConstants.ISOFORM_COMMENT_CHAR) # Parse the protein domain file of SMART DB input_file = PropertyManager.get_instance().get_property( DataConstants.PROTEIN_DOMAIN_SMART_PROPERTY, True) self.launch_insertion_TSV( input_file, True, DataConstants.PROTEIN_DOMAIN_HEADERS_SMART, DataConstants.PROTEIN_DOMAIN_CLASS, DataConstants.PROTEIN_DOMAIN_PARAM_SMART, DataConstants.PROTEIN_DOMAIN_VALUE_SMART, DataConstants.PROTEIN_DOMAIN_COMMENT_CHAR, "SMART", False) # Parse the protein domain file of PFAM DB input_file = PropertyManager.get_instance().get_property( DataConstants.PROTEIN_DOMAIN_PFAM_PROPERTY, True) self.launch_insertion_TSV( input_file, False, DataConstants.PROTEIN_DOMAIN_HEADERS_PFAM, DataConstants.PROTEIN_DOMAIN_CLASS, DataConstants.PROTEIN_DOMAIN_PARAM_PFAM, DataConstants.PROTEIN_DOMAIN_VALUE_PFAM, DataConstants.PROTEIN_DOMAIN_COMMENT_CHAR, "PFAM", False) #=================================================================== # FUNCTION AND PATHWAY ANNOTATIONS #=================================================================== # Parse the Gene Ontology file input_file = PropertyManager.get_instance().get_property( DataConstants.GENE_ONTOLOGY_DEFINITION_PROPERTY, True) self.launch_insertion_Obo( input_file, DataConstants.GENE_ONTOLOGY_CLASS, DataConstants.GENE_ONTOLOGY_ID_TAG, DataConstants.GENE_ONTOLOGY_NAME_TAG, DataConstants.GENE_ONTOLOGY_NAMESPACE_TAG) # Parse the Protein Gene Ontology annotation file input_file = PropertyManager.get_instance().get_property( DataConstants.GENE_ONTOLOGY_ANNOTATION_PROPERTY, True) self.launch_insertion_TSV( input_file, False, DataConstants.PROTEIN_GO_ANNOTATION_HEADERS, DataConstants.PROTEIN_GO_ANNOTATION_CLASS, DataConstants.PROTEIN_GO_ANNOTATION_PARAMS, None, DataConstants.PROTEIN_GO_ANNOTATION_COMMENT_CHAR) # Parse the KEGG pathway file input_file = PropertyManager.get_instance().get_property( DataConstants.KEGG_PATHWAY_DEFINITION_PROPERTY, True) self.launch_insertion_TSV(input_file, True, DataConstants.KEGG_PATHWAY_HEADERS, DataConstants.KEGG_PATHWAY_CLASS, DataConstants.KEGG_PATHWAY_PARAMS, None, DataConstants.KEGG_PATHWAY_COMMENT_CHAR) # Parse the Protein KEGG Pathway annotation file input_file = PropertyManager.get_instance().get_property( DataConstants.KEGG_PATHWAY_ANNOTATION_PROPERTY, True) self.launch_insertion_TSV( input_file, True, DataConstants.KEGG_PATHWAY_ANNOTATION_HEADERS, DataConstants.KEGG_PATHWAY_ANNOTATION_CLASS, DataConstants.KEGG_PATHWAY_ANNOTATION_PARAMS, None, DataConstants.KEGG_PATHWAY_ANNOTATION_COMMENT_CHAR) #=================================================================== # REACTOME #=================================================================== # Parse the Reactome pathway file input_file = PropertyManager.get_instance().get_property( DataConstants.REACTOME_PATHWAY_DEFINITION_PROPERTY, True) self.launch_insertion_TSV( input_file, False, DataConstants.REACTOME_PATHWAY_HEADERS, DataConstants.REACTOME_PATHWAY_CLASS, DataConstants.REACTOME_PATHWAY_PARAMS, None, DataConstants.REACTOME_PATHWAY_COMMENT_CHAR) # Parse the Protein Reactome Pathway annotation file input_file = PropertyManager.get_instance().get_property( DataConstants.REACTOME_PATHWAY_ANNOTATION_PROPERTY, True) self.launch_insertion_TSV( input_file, False, DataConstants.REACTOME_PATHWAY_ANNOTATION_HEADERS, DataConstants.REACTOME_PATHWAY_ANNOTATION_CLASS, DataConstants.REACTOME_PATHWAY_ANNOTATION_PARAMS, None, DataConstants.REACTOME_PATHWAY_ANNOTATION_COMMENT_CHAR) #=================================================================== # BIOPLEX #=================================================================== # Parse the file listing Bioplex clusters input_file = PropertyManager.get_instance().get_property( DataConstants.BIOPLEX_CLUSTER_DEFINITION_PROPERTY, True) self.launch_insertion_TSV( input_file, False, DataConstants.BIOPLEX_CLUSTER_HEADERS, DataConstants.BIOPLEX_CLUSTER_CLASS, DataConstants.BIOPLEX_CLUSTER_PARAMS, None, DataConstants.BIOPLEX_CLUSTER_COMMENT_CHAR) # Parse the file with Bioplex annotations input_file = PropertyManager.get_instance().get_property( DataConstants.BIOPLEX_ANNOTATION_PROPERTY, True) self.launch_insertion_TSV( input_file, False, DataConstants.BIOPLEX_ANNOTATION_HEADERS, DataConstants.BIOPLEX_ANNOTATION_CLASS, DataConstants.BIOPLEX_ANNOTATION_PARAMS, None, DataConstants.BIOPLEX_ANNOTATION_COMMENT_CHAR) #=================================================================== # WAN CLUSTERS #=================================================================== # Parse the file listing Wan clusters input_file = PropertyManager.get_instance().get_property( DataConstants.WAN_CLUSTER_DEFINITION_PROPERTY, True) self.launch_insertion_TSV(input_file, False, DataConstants.WAN_CLUSTER_HEADERS, DataConstants.WAN_CLUSTER_CLASS, DataConstants.WAN_CLUSTER_PARAMS, None, DataConstants.WAN_CLUSTER_COMMENT_CHAR) # Parse the file with Wan annotations input_file = PropertyManager.get_instance().get_property( DataConstants.WAN_ANNOTATION_PROPERTY, True) self.launch_insertion_TSV( input_file, False, DataConstants.WAN_ANNOTATION_HEADERS, DataConstants.WAN_ANNOTATION_CLASS, DataConstants.WAN_ANNOTATION_PARAMS, None, DataConstants.WAN_ANNOTATION_COMMENT_CHAR) #=================================================================== # CORUM CLUSTERS #=================================================================== # Parse the file listing Corum clusters input_file = PropertyManager.get_instance().get_property( DataConstants.CORUM_CLUSTER_DEFINITION_PROPERTY, True) self.launch_insertion_TSV(input_file, False, DataConstants.CORUM_CLUSTER_HEADERS, DataConstants.CORUM_CLUSTER_CLASS, DataConstants.CORUM_CLUSTER_PARAMS, None, DataConstants.CORUM_CLUSTER_COMMENT_CHAR) # Parse the file with Corum annotations input_file = PropertyManager.get_instance().get_property( DataConstants.CORUM_ANNOTATION_PROPERTY, True) self.launch_insertion_TSV( input_file, False, DataConstants.CORUM_ANNOTATION_HEADERS, DataConstants.CORUM_ANNOTATION_CLASS, DataConstants.CORUM_ANNOTATION_PARAMS, None, DataConstants.CORUM_ANNOTATION_COMMENT_CHAR) #=================================================================== # CUSTOM CLUSTERS #=================================================================== # Parse the file listing Custom clusters input_file = PropertyManager.get_instance().get_property( DataConstants.CUSTOM_CLUSTER_DEFINITION_PROPERTY, True) self.launch_insertion_TSV( input_file, False, DataConstants.CUSTOM_CLUSTER_HEADERS, DataConstants.CUSTOM_CLUSTER_CLASS, DataConstants.CUSTOM_CLUSTER_PARAMS, None, DataConstants.CUSTOM_CLUSTER_COMMENT_CHAR) # Parse the file with Custom annotations input_file = PropertyManager.get_instance().get_property( DataConstants.CUSTOM_ANNOTATION_PROPERTY, True) self.launch_insertion_TSV( input_file, False, DataConstants.CUSTOM_ANNOTATION_HEADERS, DataConstants.CUSTOM_ANNOTATION_CLASS, DataConstants.CUSTOM_ANNOTATION_PARAMS, None, DataConstants.CUSTOM_ANNOTATION_COMMENT_CHAR) #=================================================================== # INTERACTOME #=================================================================== # Parse the protein interaction file input_file = PropertyManager.get_instance().get_property( DataConstants.INTERACTOME_DEFINITION_PROPERTY, True) self.launch_insertion_TSV(input_file, False, DataConstants.INTERACTOME_HEADER, DataConstants.INTERACTOME_CLASS, DataConstants.INTERACTOME_PARAMS, None, DataConstants.INTERACTOME_COMMENT_CHAR) # Parse the protein interaction network file input_file = PropertyManager.get_instance().get_property( DataConstants.INTERACTOME_NETWORK_DEFINITION_PROPERTY, True) ppi_default_values = [None, None, os.path.basename(input_file)] self.launch_insertion_TSV( input_file, False, DataConstants.INTERACTOME_NETWORK_HEADER, DataConstants.INTERACTOME_NETWORK_CLASS, DataConstants.INTERACTOME_NETWORK_PARAMS, ppi_default_values, DataConstants.INTERACTOME_NETWORK_COMMENT_CHAR) # Parse the Network Module file input_file = PropertyManager.get_instance().get_property( DataConstants. INTERACTOME_NETWORK_PARTITION_DEFINITION_PROPERTY, True) self.launch_insertion_NetworkModule( input_file, DataConstants.INTERACTOME_NETWORK_PARTITION_CLASS, DataConstants.INTERACTOME_NETWORK_PARTITION_CLASS_TAG, DataConstants.INTERACTOME_NETWORK_PARTITION_COMMENT_CHAR) # Parse the Network Module Annotation file input_file = PropertyManager.get_instance().get_property( DataConstants. INTERACTOME_NETWORK_PARTITION_ANNOTATION_PROPERTY, True) self.launch_insertion_NetworkModuleAnnotation( input_file, DataConstants.INTERACTOME_NETWORK_PARTITION_ANNOTATION_CLASS, DataConstants. INTERACTOME_NETWORK_PARTITION_ANNOTATION_CLASS_TAG, DataConstants. INTERACTOME_NETWORK_PARTITION_ANNOTATION_CLASS_REGEX, DataConstants. INTERACTOME_NETWORK_PARTITION_ANNOTATION_PROTEIN_TAG, DataConstants. INTERACTOME_NETWORK_PARTITION_ANNOTATION_ANNOTATION_TAG, DataConstants.INTERACTOME_NETWORK_PARTITION_COMMENT_CHAR) # Parse the protein redundancy file input_file = PropertyManager.get_instance().get_property( DataConstants. INTERACTOME_NETWORK_REDUNDANCY_DEFINITION_PROPERTY, True) interactome_network_redundancy_definition_value = [ None, basename(input_file), None ] self.launch_insertion_TSV( input_file, False, DataConstants. INTERACTOME_NETWORK_REDUNDANCY_DEFINITION_HEADERS, DataConstants.INTERACTOME_NETWORK_REDUNDANCY_DEFINITION_CLASS, DataConstants.INTERACTOME_NETWORK_REDUNDANCY_DEFINITION_PARAMS, interactome_network_redundancy_definition_value, DataConstants. INTERACTOME_NETWORK_REDUNDANCY_DEFINITION_COMMENT_CHAR, "Redundancy", False) #=================================================================== # RNA DEFINITION #=================================================================== # Make query of specific type of protein cross references to speed up insertion DataManager.get_instance().perform_query( DataConstants.PROTEIN_ENSP_XREF_KW, "query( ProteinCrossReference.protein_id,ProteinCrossReference.crossReferenceID ).filter(ProteinCrossReference.sourceDB == DataConstants.PROTEIN_ENSP_XREF_DB).all()" ) # Convert query into a dictionary DataManager.get_instance().query_to_dict( DataConstants.PROTEIN_ENSP_XREF_KW, 1, 0) # Parse the RNA file input_file = PropertyManager.get_instance().get_property( DataConstants.RNA_DEFINITION_PROPERTY, True) self.launch_insertion_TSV(input_file, True, DataConstants.RNA_HEADERS, DataConstants.RNA_CLASS, DataConstants.RNA_PARAMS, None, DataConstants.RNA_COMMENT_CHAR) # Parse the RNA cross references file input_file = PropertyManager.get_instance().get_property( DataConstants.RNA_CROSS_REFERENCE_PROPERTY, True) self.launch_insertion_TSV( input_file, False, DataConstants.RNA_CROSS_REFERENCE_HEADERS, DataConstants.RNA_CROSS_REFERENCE_CLASS, DataConstants.RNA_CROSS_REFERENCE_PARAMS, None, DataConstants.RNA_CROSS_REFERENCE_COMMENT_CHAR) #=================================================================== # RNA TISSUE EXPRESSION #=================================================================== # Make query of all RNA IDs to speed up insertion DataManager.get_instance().perform_query(DataConstants.RNA_ALL_KW, "query( RNA ).all()") # Format query into dict data structure DataManager.get_instance().query_to_object_dict( DataConstants.RNA_ALL_KW, "transcriptID") # Make query of all Protein IDs (uniprotAC) to speed up insertion DataManager.get_instance().perform_query(DataConstants.PROT_ALL_KW, "query( Protein ).all()") # Format query into dict data structure DataManager.get_instance().query_to_object_dict( DataConstants.PROT_ALL_KW, "uniprotAC") # Parse the RNA tissue expression file input_file = PropertyManager.get_instance().get_property( DataConstants.RNA_TISSUE_EXPRESSION_PROPERTY, True) self.launch_insertion_TSV( input_file, True, DataConstants.RNA_TISSUE_EXPRESSION_HEADERS, DataConstants.RNA_TISSUE_EXPRESSION_CLASS, DataConstants.RNA_TISSUE_EXPRESSION_PARAMS, DataConstants.RNA_TISSUE_EXPRESSION_VALUE, DataConstants.RNA_TISSUE_EXPRESSION_COMMENT_CHAR) #=================================================================== # PROTEIN RNA INTERACTION #=================================================================== self.forceOverride = 1 # Parse the file listing RNA with catRAPID data input_file = PropertyManager.get_instance().get_property( DataConstants.INTERACTING_RNA_DEFINITION_PROPERTY, True) self.launch_insertion_TSV( input_file, True, DataConstants.INTERACTING_RNA_DEFINITION_HEADERS, DataConstants.INTERACTING_RNA_DEFINITION_CLASS, DataConstants.INTERACTING_RNA_DEFINITION_PARAMS, None, DataConstants.INTERACTING_RNA_DEFINITION_COMMENT_CHAR) # Parse the file listing Proteins with catRAPID data input_file = PropertyManager.get_instance().get_property( DataConstants.INTERACTING_PROTEIN_DEFINITION_PROPERTY, True) self.launch_insertion_TSV( input_file, True, DataConstants.INTERACTING_PROTEIN_DEFINITION_HEADERS, DataConstants.INTERACTING_PROTEIN_DEFINITION_CLASS, DataConstants.INTERACTING_PROTEIN_DEFINITION_PARAMS, None, DataConstants.INTERACTING_PROTEIN_DEFINITION_COMMENT_CHAR) # Initialize data items to store missing interactions if DataConstants.PROTEIN_RNA_INTERACTION_CATRAPID_MISSING_RNA_KW not in DataManager.get_instance( ).data: DataManager.get_instance().store_data( DataConstants. PROTEIN_RNA_INTERACTION_CATRAPID_MISSING_RNA_KW, []) if DataConstants.PROTEIN_RNA_INTERACTION_CATRAPID_MISSING_PROT_KW not in DataManager.get_instance( ).data: DataManager.get_instance().store_data( DataConstants. PROTEIN_RNA_INTERACTION_CATRAPID_MISSING_PROT_KW, []) # Parse the ProteinRNAInteractionCatRAPID file input_file = PropertyManager.get_instance().get_property( DataConstants. PROTEIN_RNA_INTERACTION_CATRAPID_DEFINITION_PROPERTY, True) self.launch_insertion_TSV( input_file, False, DataConstants.PROTEIN_RNA_INTERACTION_CATRAPID_HEADERS, DataConstants.PROTEIN_RNA_INTERACTION_CATRAPID_CLASS, DataConstants.PROTEIN_RNA_INTERACTION_CATRAPID_PARAMS, None, DataConstants.PROTEIN_RNA_INTERACTION_CATRAPID_COMMENT_CHAR) self.forceOverride = 0 # Remove data that will no longer be used to reduce memory usage DataManager.get_instance().delete_data( DataConstants.PROTEIN_ENSP_XREF_KW) DataManager.get_instance().delete_data(DataConstants.RNA_ALL_KW) DataManager.get_instance().delete_data(DataConstants.PROT_ALL_KW) except RainetException as re: Logger.get_instance().error(re.to_string()) Timer.get_instance().stop_chrono("ERROR : Data insertion FAILED") return # # Report on potential missing data # self.check_missing_data() # Stop the chrono Timer.get_instance().stop_chrono("Data insertion finished")
help='Folder where to write output files.') #gets the arguments args = parser.parse_args() # Initialise class run = ExploreInteractingRNAs(args.rainetDB, args.transcriptList, args.interactingTranscriptList, args.outputFolder) #=============================================================================== # Run analysis / processing #=============================================================================== # Start chrono Timer.get_instance().start_chrono() transcriptList = run.read_transcript_list(args.transcriptList) interactingTranscriptList = run.read_transcript_list( args.interactingTranscriptList) run.query_database(transcriptList, interactingTranscriptList) # Use RainetException to catch errors except RainetException as rainet: Logger.get_instance().error( "Error during execution of ExploreInteractingRNAs. Aborting :\n" + rainet.to_string()) # Stop the chrono Timer.get_instance().stop_chrono("ExploreInteractingRNAs : Finished")
omimID) notFound.add(omimID) print "write_output: %s OMIM IDs not found." % len(notFound) print "write_output: wrote %s protein-disease association entries." % nEntries outFile.close() if __name__ == "__main__": try: # Start chrono Timer.get_instance().start_chrono() print "STARTING " + SCRIPT_NAME #=============================================================================== # Get input arguments, initialise class #=============================================================================== parser = argparse.ArgumentParser(description=DESC_COMMENT) # positional args parser.add_argument( 'mimTitlesFile', metavar='mimTitlesFile', type=str, help='Path to mimTitles.txt file downloadable from OMIM database.') parser.add_argument('outputFolder',
from fr.tagc.rainet.core.util.subprocess.SubprocessUtil import SubprocessUtil #=============================================================================== # Started 03-Oct-2016 # Diogo Ribeiro DESC_COMMENT = "Wrapper of NetworkScoreAnalysis to run for several top numbers." SCRIPT_NAME = "networkAnalysisWrapper.py" #=============================================================================== if __name__ == "__main__": try: # Start chrono Timer.get_instance().start_chrono() print "STARTING " + SCRIPT_NAME #=============================================================================== # Get input arguments #=============================================================================== parser = argparse.ArgumentParser(description=DESC_COMMENT) # positional args parser.add_argument('script', metavar='script', type=str, help='Script to run.') parser.add_argument('networkFile', metavar='networkFile', type=str,
#gets the arguments args = parser.parse_args() # Initialise class run = NPInterPredictionValidation(args.catRAPIDFile, args.NPInterFile, args.noncodeTx2noncodeGene, args.noncode2Ensembl, args.rainetDB, args.outputFolder) #=============================================================================== # Run analysis / processing #=============================================================================== # Start chrono Timer.get_instance().start_chrono() Timer.get_instance().step("reading RAINET DB file..") # Build RNA cross references run.rnaXrefDict = run.rna_cross_references() # Build Protein cross references run.uniprotACs, run.xrefDict = run.proteins_in_rainet() Timer.get_instance().step("reading NONCODE file..") run.read_noncode_conversion_file() Timer.get_instance().step("reading NPInter file..")
outputText = "%s\t%s\n" % (line, annotation) outFile.write(outputText) outFile.close() print "read_interaction_file: read %s lines.." % lineCount if __name__ == "__main__": try: # Start chrono Timer.get_instance().start_chrono() print "STARTING " + SCRIPT_NAME #=============================================================================== # Get input arguments, initialise class #=============================================================================== parser = argparse.ArgumentParser(description=DESC_COMMENT) # positional args parser.add_argument( 'annotationFile', metavar='annotationFile', type=str, help= 'TSV file with annotation per transcript. No header. Can have several annotations for same transcript, one per line. E.g. transcriptID\tannotation.'
set2 = set(group2) overlap = set1.intersection(set2) perc1 = len(overlap) * 100.0 / len(group1) perc2 = len(overlap) * 100.0 / len(group2) return perc1, perc2 if __name__ == "__main__": try: # Start chrono Timer.get_instance().start_chrono() print "STARTING " + SCRIPT_NAME #=============================================================================== # Get input arguments, initialise class #=============================================================================== parser = argparse.ArgumentParser(description=DESC_COMMENT) # positional args parser.add_argument( 'rainetDB', metavar='rainetDB', type=str, help= 'Rainet database to be used. Needs to have protein annotation datasets and interactingProteins tables up-to-date.'
complexedProteins = self.complexAnnotDict[complexID] # overlap between proteins in complex and experimental interactions intersection = interactingProteins.intersection( complexedProteins) if len(intersection) > 0: outFile.write("%s\t%s\t%s\n" % (txID, complexID, ",".join(intersection))) if __name__ == "__main__": try: # Start chrono Timer.get_instance().start_chrono() print "STARTING " + SCRIPT_NAME #=============================================================================== # Get input arguments, initialise class #=============================================================================== parser = argparse.ArgumentParser(description=DESC_COMMENT) # positional args parser.add_argument('rainetDB', metavar='rainetDB', type=str, help='Rainet database to be used.') parser.add_argument('complexTable', metavar='complexTable',
args = parser.parse_args() # Initialise class run = StarBasePredictionValidation(args.catRAPIDFile, args.starBaseFile, args.starBaseProteinConversionFile, args.rainetDB, args.outputFolder, args.minimumBioComplex, args.minimumClipReadNumber) #=============================================================================== # Run analysis / processing #=============================================================================== # Start chrono Timer.get_instance().start_chrono() Timer.get_instance().step("reading RAINET DB and conversion files..") # Build starbase RNA cross references run.starbaseRNAXrefDict = run.rna_cross_references() # Build starbase Protein cross references run.starbaseProtXrefDict = run.read_starbase_protein_conversion_file() # Build Protein cross references run.xrefDict = run.protein_cross_references() Timer.get_instance().step("reading starBase file..") starbasePairs, starbasePairsBiocomplex = run.read_starbase_file()
outFile.close() # clean output os.system("mkdir %s/processed" % (outputFolder)) os.system("mv %s/*.bed %s/processed" % (outputFolder, outputFolder)) print "produce_interactions_file: produced interactions for %s proteins.." % len( interactionsPerProtein) if __name__ == "__main__": try: # Start chrono Timer.get_instance().start_chrono() print "STARTING " + SCRIPT_NAME #=============================================================================== # Get input arguments #=============================================================================== parser = argparse.ArgumentParser(description=DESC_COMMENT) # positional args parser.add_argument( 'bedFolder', metavar='bedFolder', type=str, help='Input folder with all bed files to be processed.') parser.add_argument('outputFolder', metavar='outputFolder',
else: # neither lncRNA nor mRNA continue outFile.close() print "read_data_file: number of lines in input data:", len(newTable) print "read_data_file: number of lncRNAs per group", numbersPerGroup if __name__ == "__main__": try: # Start chrono Timer.get_instance().start_chrono() print "STARTING " + SCRIPT_NAME #=============================================================================== # Get input arguments, initialise class #=============================================================================== parser = argparse.ArgumentParser(description=DESC_COMMENT) # positional args parser.add_argument( 'annotationFile', metavar='annotationFile', type=str, help= 'TSV file with annotation per transcript (gene). No header. Can have several annotations for same transcript, one per line. E.g. transcriptID\tannotation.'
print "read_lncrnadisease: read %s lines: " % nLines print "read_lncrnadisease: IDs found: %s" % len(found) print "read_lncrnadisease: IDs not found: %s" % len(notFound) print "List of transcripts not found: " print notFound if __name__ == "__main__": try: # Start chrono Timer.get_instance().start_chrono() print "STARTING " + SCRIPT_NAME #=============================================================================== # Get input arguments, initialise class #=============================================================================== parser = argparse.ArgumentParser(description=DESC_COMMENT) # positional args parser.add_argument('rainetDB', metavar='rainetDB', type=str, help='Rainet database to be used.') parser.add_argument( 'lncrnadiseaseData',
type=str, help='Folder where to write output files.') #gets the arguments args = parser.parse_args() # Initialise class run = ECLIPPredictionValidation(args.catRAPIDFile, args.eClipFile, args.outputFolder) #=============================================================================== # Run analysis / processing #=============================================================================== # Start chrono Timer.get_instance().start_chrono() Timer.get_instance().step("reading eCLIP file..") run.read_eclip_file() Timer.get_instance().step("reading catRAPID file..") run.read_catrapid_file() # Use RainetException to catch errors except RainetException as rainet: Logger.get_instance().error("Error during execution. Aborting :\n" + rainet.to_string()) # Stop the chrono