def enhance_metabolite_references( name, references_novel, summary_hmdb): # Enhance references to HMDB references_hmdb_novel = references_novel['hmdb'] references_hmdb_novel = match_hmdb_entries_by_identifiers_names( identifiers=references_hmdb_novel, names=[name], summary_hmdb=summary_hmdb) # Extract references from entries in HMDB hmdb_references = collect_hmdb_entries_references( keys=references_hmdb_novel, summary_hmdb=summary_hmdb) # Combine supplemental references to original references references_novel['hmdb'] = collect_unique_elements( hmdb_references['hmdb']) references_novel['pubchem'] = collect_unique_elements( references_novel['pubchem'] + hmdb_references['pubchem']) references_novel['chebi'] = collect_unique_elements( references_novel['chebi'] + hmdb_references['chebi']) references_novel['kegg'] = collect_unique_elements( references_novel['kegg'] + hmdb_references['kegg']) return references_novel
def convert_reactions_text(reactions): records = [] for reaction in reactions.values(): # Participants compartments = collect_value_from_records( key='compartment', records=reaction['participants']) compartments_unique = collect_unique_elements( elements_original=compartments) metabolites = collect_value_from_records( key='metabolite', records=reaction['participants']) metabolites_unique = collect_unique_elements( elements_original=metabolites) # Transports transport_metabolites = collect_value_from_records( key='metabolite', records=reaction['transports']) transport_compartments = collect_values_from_records( key='compartments', records=reaction['transports']) transport_compartments_unique = collect_unique_elements( elements_original=transport_compartments) # Compile information record = { 'identifier': reaction['identifier'], 'name': reaction['name'], 'equation': reaction['equation'], 'metabolites': ';'.join(metabolites_unique), 'compartments': ';'.join(compartments_unique), 'processes': ';'.join(reaction['processes']), 'reversibility': reaction['reversibility'], 'conversion': reaction['conversion'], 'dispersal': reaction['dispersal'], 'transport': reaction['transport'], 'transport_metabolites': ';'.join(transport_metabolites), 'transport_compartments': ';'.join(transport_compartments_unique), 'replication': reaction['replication'], 'replicates': ';'.join(reaction['replicates']), 'reference_metanetx': ';'.join(reaction['references']['metanetx']), 'reference_recon': ';'.join(reaction['references']['recon']), 'reference_gene': ';'.join(reaction['references']['gene']), 'reference_enzyme': ';'.join(reaction['references']['enzyme']), 'reference_kegg': ';'.join(reaction['references']['kegg']), 'reference_reactome': ';'.join(reaction['references']['reactome']), 'reference_metacyc': ';'.join(reaction['references']['metacyc']), 'reference_bigg': ';'.join(reaction['references']['bigg']), 'reference_rhea': ';'.join(reaction['references']['rhea']), 'reference_sabiork': ';'.join(reaction['references']['sabiork']), 'reference_seed': ';'.join(reaction['references']['seed']) } records.append(record) return records
def access_reactions_summary(reactions_interest, reactions, directory): # Collect information about reactions of interest identifiers = collect_value_from_records(key='identifier', records=reactions_interest) identifiers_unique = collect_unique_elements(identifiers) reactions_summary = [] for identifier in identifiers_unique: reaction = reactions[identifier] name = reaction['name'] metanetx = ';'.join(reaction['references']['metanetx']) gene = ';'.join(reaction['references']['gene']) enzyme = ';'.join(reaction['references']['enzyme']) record_product = { 'identifier': identifier, 'name': name, 'metanetx': metanetx, 'gene': gene, 'enzyme': enzyme } reactions_summary.append(record_product) return reactions_summary
def curate_metabolites_references(metabolite_curation, metabolites): # Interpretation identifier = metabolite_curation["identifier_novel"] hmdb_novel = metabolite_curation["hmdb_novel"] hmdb_error = metabolite_curation["hmdb_error"] pubchem_novel = metabolite_curation["pubchem_novel"] pubchem_error = metabolite_curation["pubchem_error"] if identifier in metabolites.keys(): if len(hmdb_novel) > 0: references_original = metabolites[identifier]["references"]["hmdb"] references_original.append(hmdb_novel) references_novel = collect_unique_elements(references_original) metabolites[identifier]["references"]["hmdb"] = references_novel if len(hmdb_error) > 0: def match(reference): return reference != hmdb_error references_original = metabolites[identifier]["references"]["hmdb"] references_novel = list(filter(match, references_original)) metabolites[identifier]["references"]["hmdb"] = references_novel if len(pubchem_novel) > 0: references_original = metabolites[identifier]["references"][ "pubchem"] references_original.append(pubchem_novel) references_novel = collect_unique_elements(references_original) metabolites[identifier]["references"]["pubchem"] = references_novel if len(pubchem_error) > 0: def match(reference): return reference != pubchem_error references_original = metabolites[identifier]["references"][ "pubchem"] references_novel = list(filter(match, references_original)) metabolites[identifier]["references"]["pubchem"] = references_novel return metabolites
def define_reaction_node(reaction_candidacy_identifier, reactions_candidacy, reactions, metabolites, compartments, processes): # Access information reaction_candidacy = reactions_candidacy[reaction_candidacy_identifier] reaction = reactions[reaction_candidacy['reaction']] # Compartments compartments_reaction = collect_value_from_records( key='compartment', records=reaction['participants']) compartments_unique = collect_unique_elements( elements_original=compartments_reaction) compartments_names = collect_values_from_records_in_reference( key='name', identifiers=compartments_unique, reference=compartments) # Processes processes_names = collect_values_from_records_in_reference( key='name', identifiers=reaction['processes'], reference=processes) # Metabolites metabolites_reaction = collect_value_from_records( key='metabolite', records=reaction['participants']) metabolites_unique = collect_unique_elements( elements_original=metabolites_reaction) metabolites_names = collect_values_from_records_in_reference( key='name', identifiers=metabolites_unique, reference=metabolites) # Compile information reaction_node = { 'identifier': reaction_candidacy['identifier'], 'type': 'reaction', 'entity': reaction['identifier'], 'name': reaction_candidacy['name'], 'reversibility': reaction_candidacy['reversibility'], 'metabolites': ';'.join(metabolites_names), 'compartments': ';'.join(compartments_names), 'processes': ';'.join(processes_names), 'replicates': reaction_candidacy['replicates'] } return reaction_node
def collect_reaction_transports( reaction): metabolites_reactant = collect_reaction_participants_value( key='metabolite', criteria={'roles': ['reactant']}, participants=reaction['participants']) metabolites_product = collect_reaction_participants_value( key='metabolite', criteria={'roles': ['product']}, participants=reaction['participants']) # Collect metabolites that participate as both reactants and products metabolites = filter_common_elements( list_one=metabolites_product, list_two=metabolites_reactant) transports = [] for metabolite in metabolites: # Determine metabolite's compartments as reactant and product compartments_reactant = collect_reaction_participants_value( key='compartment', criteria={ 'metabolites': [metabolite], 'roles': ['reactant']}, participants=reaction['participants']) compartments_product = collect_reaction_participants_value( key='compartment', criteria={ 'metabolites': [metabolite], 'roles': ['product']}, participants=reaction['participants']) # Determine whether there is a difference between the metabolite's # compartments as reactant and product transport = not compare_lists_by_mutual_inclusion( list_one=compartments_reactant, list_two=compartments_product) if transport: compartments = compartments_reactant + compartments_product compartments_unique = collect_unique_elements( elements_original=compartments) record = { 'metabolite': metabolite, 'compartments': compartments_unique} transports.append(record) return transports
def include_reaction_transport_processes( reaction_original, processes_transports): # Determine processes in which reaction participates by transport processes_transport = collect_reaction_transport_processes( reaction=reaction_original, processes_transports=processes_transports) processes_original = reaction_original['processes'] processes_total = processes_original + processes_transport processes_unique = collect_unique_elements( elements_original=processes_total) # Compile information reaction_novel = copy.deepcopy(reaction_original) reaction_novel['processes'] = processes_unique return reaction_novel
def convert_reactions_export_text(reactions, metabolites, compartments, processes): records = [] for reaction in reactions.values(): # Get participants # Write a function to compose identifier (name) human readable... # Compartments compartments_identifiers = collect_value_from_records( key='compartment', records=reaction['participants']) compartments_identifiers_unique = collect_unique_elements( elements_original=compartments_identifiers) compartments_names = collect_values_from_records_in_reference( key='name', identifiers=compartments_identifiers_unique, reference=compartments) # Processes processes_names = collect_values_from_records_in_reference( key='name', identifiers=reaction['processes'], reference=processes) # Metabolites reactants_identifiers = collect_reaction_participants_value( key='metabolite', criteria={'roles': ['reactant']}, participants=reaction['participants']) reactants_names = collect_values_from_records_in_reference( key='name', identifiers=reactants_identifiers, reference=metabolites) products_identifiers = collect_reaction_participants_value( key='metabolite', criteria={'roles': ['product']}, participants=reaction['participants']) products_names = collect_values_from_records_in_reference( key='name', identifiers=products_identifiers, reference=metabolites) # Compile information record = { 'identifier': reaction['identifier'], 'name': reaction['name'], 'reactants': '; '.join(reactants_names), 'products': '; '.join(products_names), 'compartments': '; '.join(compartments_names), 'processes': ';'.join(processes_names), 'reversibility': reaction['reversibility'], 'reference_metanetx': ('; '.join(reaction['references']['metanetx'])), 'reference_recon': ('; '.join(reaction['references']['recon'])), 'reference_gene': '; '.join(reaction['references']['gene']), 'reference_enzyme': '; '.join(reaction['references']['enzyme']), 'reference_kegg': '; '.join(reaction['references']['kegg']), 'reference_reactome': ('; '.join(reaction['references']['reactome'])), 'reference_metacyc': '; '.join(reaction['references']['metacyc']), 'reference_bigg': '; '.join(reaction['references']['bigg']) } records.append(record) return records
def curate_processes(processes_curation, processes_original, reactions_original): # Copy information processes_novel = copy.deepcopy(processes_original) reactions_novel = copy.deepcopy(reactions_original) for record in processes_curation: # Interpretation identifier_original = record['identifier_original'] identifier_novel = record['identifier_novel'] name_original = record['name_original'] name_novel = record['name_novel'] # Determine method to change information match_identifiers = identifier_original == identifier_novel match_names = name_original == name_novel if identifier_novel == 'null': if identifier_original in processes_novel: del processes_novel[identifier_original] # remove process # carry-over from original: # TODO: also remove the process from any reactions' processes # Removal of a process does not justify removal of any # reactions that participate in that process. else: if not match_identifiers: # Change identifier # Remove original if identifier_original in processes_novel: del processes_novel[identifier_original] # Replace with novel if identifier_novel in processes_novel: for reaction in reactions_novel.values(): processes = reaction['processes'] if identifier_original in processes: for index, process in enumerate(processes): if process == identifier_original: processes[index] = identifier_novel # Collect unique values. processes_unique = collect_unique_elements( processes) reaction['processes'] = processes_unique reactions_novel[reaction['identifier']] = reaction if not match_names: # Change name if identifier_novel in processes_novel: processes_novel[identifier_novel]['name'] = name_novel return {'processes': processes_novel, 'reactions': reactions_novel}
def extract_hmdb_record_summary(hmdb_element, name_space): record = {} # HMDB identifiers. hmdb_primary = extract_subelement_value(element=hmdb_element, tag=accession_tag, name_space=name_space) hmdb_secondary = extract_subelement(element=hmdb_element, tag=secondary_accessions_tag, name_space=name_space) references_hmdb_values = extract_subelement_values(element=hmdb_secondary, tag=accession_tag, name_space=name_space) references_hmdb_values.append(hmdb_primary) references_hmdb = collect_unique_elements(references_hmdb_values) record['identifier'] = hmdb_primary record['references_hmdb'] = references_hmdb # Synonyms name = extract_subelement_value(element=hmdb_element, tag=name_tag, name_space=name_space) synonyms_element = extract_subelement(element=hmdb_element, tag=synonyms_tag, name_space=name_space) synonyms_values = extract_subelement_values(element=synonyms_element, tag=synonym_tag, name_space=name_space) synonyms_values.append(name) synonyms = collect_unique_elements(synonyms_values) record['synonyms'] = synonyms record['name'] = name # Pubchem references pubchem_tentative = extract_subelement_value(element=hmdb_element, tag=pubchem_tag, name_space=name_space) if pubchem_tentative is not None \ and pubchem_tentative == '0': # Remove non-sense identifier '0' for PubChem records reference_pubchem = None else: reference_pubchem = pubchem_tentative record['reference_pubchem'] = reference_pubchem # Extract other source information reference_chebi = extract_subelement_value(element=hmdb_element, tag=chebi_tag, name_space=name_space) record['reference_chebi'] = reference_chebi reference_kegg = extract_subelement_value(element=hmdb_element, tag=kegg_tag, name_space=name_space) record['reference_kegg'] = reference_kegg return record