def parse_table(reference, key, args_dict=None): if 'source_id' in reference[key].columns.tolist(): column_names = [ 'analyte_id', 'analyte_name', 'reaction_id', 'reaction_name', 'source_id' ] else: column_names = [ 'analyte_id', 'analyte_name', 'reaction_id', 'reaction_name' ] reference_parsed = reference[key][column_names].copy() reference_parsed['analyte'] = reference_parsed['analyte_name'].str.split( ' \[').str[0] reference_parsed['compartment'] = reference_parsed[ 'analyte_name'].str.split(' \[').str[1].str.split('\]').str[0] reference_dictionary = {} counter = 0 total = len(reference_parsed.index.tolist()) for index, row in reference_parsed.iterrows(): reference_dictionary[row[0]] = {} reference_dictionary[row[0]]['analyte_id'] = row[0] reference_dictionary[row[0]]['reaction_id'] = row[2] reference_dictionary[row[0]]['reaction_name'] = row[3] if 'source_id' in reference[key].columns.tolist(): reference_dictionary[row[0]]['source_id'] = row[4] reference_dictionary[row[0]]['analyte'] = row[5] reference_dictionary[row[0]]['compartment'] = row[6] else: reference_dictionary[row[0]]['analyte'] = row[4] reference_dictionary[row[0]]['compartment'] = row[5] if int(counter % (total / 15)) == 0 and args_dict != None: progress_feed(args_dict, "reactions") counter += 1 return reference_dictionary
def __main__(args_dict): """Analyze data on network model """ # Get network curation info network = read_network(network_url=args_dict['network']) progress_feed(args_dict, "model", 2) if args_dict['organism_curation'] != 'None': args_dict['species_id'] = network['species_id'] # Read in data (if any) if str(args_dict['transcriptomics']).lower() != 'none' \ or str(args_dict['proteomics']).lower() != 'none' \ or str(args_dict['metabolomics']).lower() != 'none': data, stats = prepare_data( network=network, transcriptomics_url=args_dict['transcriptomics'], proteomics_url=args_dict['proteomics'], metabolomics_url=args_dict['metabolomics']) progress_feed(args_dict, "model", 3) flag_data = False else: data = pd.DataFrame() data['NoSample'] = [0, 0, 0] data.index = ['dummy_index1', 'dummy_index2', 'dummy_index3'] stats = pd.DataFrame() stats['NoSample'] = [0, 0, 0] stats.index = ['dummy_index1', 'dummy_index2', 'dummy_index3'] progress_feed(args_dict, "model", 3) flag_data = True # Generate graph graph_name = model(args_dict=args_dict, network=network, data=data, stats=stats, species_id=args_dict['species_id'], output_file=args_dict['output_file'], flag_data=flag_data) # Search network for motifs motif_search(model_file=graph_name) progress_feed(args_dict, "model", 10)
def __main__(species_id, output_dir, args_dict): """Fetch all reactions for a given organism """ ############# # Make pathway id and reaction ids non R-HSA-etc ############# # Get pathways files pathways_dir = unpack_pathways(output_dir=output_dir) progress_feed(args_dict, "curate", 10) pathways_list = get_pathways(species_id=species_id, pathways_dir=pathways_dir) progress_feed(args_dict, "curate", 7) # Get list of reaction files to use for populating database pathway_database, reaction_database, species_database, \ name_database, compartment_database, compartment_dictionary, \ components_database = process_components( output_dir=output_dir, pathways_dir=pathways_dir, pathways_list=pathways_list, species_id=species_id, args_dict=args_dict) progress_feed(args_dict, "curate", 5) if 'sbml' in pathways_dir: shutil.rmtree(pathways_dir) else: print( 'Could not find SMBL file directory, skipping removal of this directory...' ) return (pathway_database, reaction_database, species_database, name_database, compartment_dictionary, components_database)
def __main__(args_dict): """Curate reactome database """ # Load reactions print( 'Curating Reactome network database. Please be patient, this will take several minutes...' ) print('Loading reactions...') progress_feed(args_dict, "curate", 3) pathway_database, reaction_database, species_database, \ name_database, compartment_dictionary, \ components_database = load_reactions( species_id=args_dict['species_id'], output_dir=args_dict['output'], args_dict=args_dict) print('Loading complex database...') complexes_reference = load_complexes(output_dir=args_dict['output']) progress_feed(args_dict, "curate", 3) print('Parsing complex database...') complexes_reference['complex_dictionary'] = parse_complexes( complexes_reference) progress_feed(args_dict, "curate", 2) print('Finalizing complex database...') complexes_reference['complex_dictionary'] = reference_complex_species( reference=complexes_reference['complex_dictionary'], name_database=name_database) progress_feed(args_dict, "curate", 2) print('Parsing Ensembl database...') ensembl_reference = parse_ensembl_synonyms( output_dir=args_dict['output'], species_id=args_dict['species_id']) progress_feed(args_dict, "curate", 3) print('Adding gene IDs to name database...') name_database = add_genes(name_database=name_database, ensembl_reference=ensembl_reference) progress_feed(args_dict, "curate", 2) print('Parsing UniProt database...') uniprot_reference = parse_uniprot_synonyms( output_dir=args_dict['output'], species_id=args_dict['species_id']) progress_feed(args_dict, "curate", 3) print('Parsing ChEBI database...') chebi_reference, uniprot_metabolites = parse_chebi_synonyms( output_dir=args_dict['output']) progress_feed(args_dict, "curate", 5) metaboverse_db = { 'species_id': args_dict['species_id'], 'pathway_database': pathway_database, 'reaction_database': reaction_database, 'species_database': species_database, 'name_database': name_database, 'ensembl_synonyms': ensembl_reference, 'uniprot_synonyms': uniprot_reference, 'chebi_synonyms': chebi_reference, 'uniprot_metabolites': uniprot_metabolites, 'complex_dictionary': complexes_reference['complex_dictionary'], 'compartment_dictionary': compartment_dictionary, 'components_database': components_database } # Write database to file print('Writing metaboverse database to file...') args_dict['curation'] = args_dict['species_id'] + '_metaboverse_db.pickle' write_database(output=args_dict['output'], file=args_dict['curation'], database=metaboverse_db) progress_feed(args_dict, "curate", 5) print('Metaboverse database curation complete.') return args_dict
def __main__(args_dict, network, data, stats, species_id, output_file, flag_data=False): """Generate graph object for visualization """ print('Preparing metadata...') # Generate output file name graph_name = name_graph(output_file=output_file, species_id=species_id) # Prepare uniprot to ensembl name mapper reverse_genes = {v: k for k, v in network['ensembl_synonyms'].items()} protein_dictionary = uniprot_ensembl_reference( uniprot_reference=network['uniprot_synonyms'], ensembl_reference=reverse_genes) progress_feed(args_dict, "model", 1) reverse_metabolite_dictionary = make_metabolite_synonym_dictionary( network=network) # Generate graph # Name mapping print('Building network...') G, network['reaction_database'] = build_graph( network=network['reaction_database'], species_reference=network['species_database'], name_reference=network['name_database'], protein_reference=protein_dictionary, uniprot_reference=network['uniprot_synonyms'], complexes=network['complex_dictionary'], species_id=species_id, gene_reference=network['ensembl_synonyms'], compartment_reference=network['compartment_dictionary'], component_database=network['components_database'], reverse_metabolite_dictionary=reverse_metabolite_dictionary) progress_feed(args_dict, "model", 9) # For gene and protein components, add section to reaction database #for additional_components and list # Pull those in with everything else in JS # Overlay data and stats, calculate heatmap values for p-value # and expression value print('Mapping user data...') degree_dictionary = compile_node_degrees(graph=G) name_reference = {} for k, v in network['ensembl_synonyms'].items(): name_reference[v] = k name_reference[k] = k for k, v in network['uniprot_synonyms'].items(): name_reference[v] = k name_reference[k] = k for k, v in network['chebi_synonyms'].items(): name_reference[k] = v name_reference[v] = v G, max_value, max_stat = map_attributes( graph=G, data=data, stats=stats, name_reference=name_reference, degree_dictionary=degree_dictionary) progress_feed(args_dict, "graph", 5) if flag_data == True: max_value = 5 max_stat = 1 print('Broadcasting values where available...') categories = data.columns.tolist() G = broadcast_values(graph=G, categories=categories, max_value=max_value, max_stat=max_stat) progress_feed(args_dict, "graph", 10) print('Compiling collapsed reaction reference...') # Collapse reactions G, updated_reactions, changed_reactions = collapse_nodes( graph=G, reaction_dictionary=network['reaction_database'], samples=len(categories)) updated_pathway_dictionary = generate_updated_dictionary( original_database=network['pathway_database'], update_dictionary=changed_reactions) progress_feed(args_dict, "graph", 8) # Generate list of super pathways (those with more than 200 reactions) print('Compiling super pathways...') scale_factor = int(len(network['reaction_database'].keys()) * 0.0157) super_pathways = compile_pathway_degree( pathways=network['pathway_database'], scale_factor=scale_factor) motif_reaction_dictionary = make_motif_reaction_dictionary( network=network, updated_reactions=updated_reactions, updated_pathway_dictionary=updated_pathway_dictionary) mod_collapsed_pathways = {} for k, v in updated_pathway_dictionary.items(): mod_collapsed_pathways[v['id']] = v # Export graph, pathway membership, pathway degree, other refs print('Exporting graph...') output_graph(graph=G, output_name=graph_name, pathway_dictionary=network['pathway_database'], collapsed_pathway_dictionary=updated_pathway_dictionary, super_pathways=super_pathways, reaction_dictionary=network['reaction_database'], collapsed_reaction_dictionary=updated_reactions, motif_reaction_dictionary=motif_reaction_dictionary, mod_collapsed_pathways=mod_collapsed_pathways, max_value=max_value, max_stat=max_stat, categories=categories) print('Graphing complete.') progress_feed(args_dict, "graph", 2) return graph_name
def main(args=None): check_dependencies() # Read in arguments args, args_dict = parse_arguments(args, __version__) if args_dict['cmd'] == 'preprocess': print('Preprocessing ' + args_dict['type'] + ' dataset...') preprocess(args_dict) sys.stdout.flush() sys.exit(1) # Run metaboverse-curate elif args_dict['cmd'] == 'curate': print(args_dict) if str(args_dict['organism_curation']) != 'None': progress_feed(args_dict=args_dict, process="curate", amount=50) # Update args_dict with path for network model with open(args_dict['organism_curation'], 'rb') as network_file: network = pickle.load(network_file) args_dict['species_id'] = network['species_id'] args_dict['output_file'] = args_dict['output'] \ + args_dict['species_id'] \ + '_global_reactions.json' args_dict['network'] = args_dict['organism_curation'] # add variables back to session data json file session_file = args_dict['session_data'] update_session(session_file=session_file, key='species_id', value=args_dict['species_id']) update_session(session_file=session_file, key='output_file', value=args_dict['output_file']) update_session(session_file=session_file, key='database_url', value=args_dict['output_file']) print('Skipping organism network modeling as one was provided by' \ + ' the user...') sys.stdout.flush() else: print('Curating network model...') args_dict['network'] = args_dict['model_file'] args_dict = curate(args_dict) sys.stdout.flush() print('Curating data onto the network model...') analyze(args_dict) sys.stdout.flush() sys.exit(1) # Print some error messaging else: raise Exception('Invalid sub-module selected') # Exit # Check log file for errors and exceptions #get_dependencies(args_dict) sys.stdout.flush()