def load_gene_synonym(session, gene_db, synonym, data_source_id): """Load the synonym for this gene from the given genome.""" data_source_id = get_or_create_data_source(session, data_source_id) synonym_db, _ = get_or_create(session, Synonym, type='gene', ome_id=gene_db.id, synonym=synonym, data_source_id=data_source_id) return synonym_db.id
def __init__(self, name, data_source_id=None, group_name=None, attributes=None): session = Session() if data_source_id is None: data_source, exists = get_or_create(session, DataSource, cobra_id='-1', name='generic', url_prefix='') data_source_id = data_source.id session.close() self.name = name self.data_source_id = data_source_id self.group_name = group_name self.attributes = attributes
def load_reactions(session, model_db_id, model, old_reaction_ids, comp_comp_db_ids, final_metabolite_ids): """Load the reactions and stoichiometries into the model. TODO if the reaction is already loaded, we need to check the stoichometry has. If that doesn't match, then add a new reaction with an incremented ID (e.g. ACALD_1) Arguments --------- session: An SQLAlchemy session. model_db_id: The database ID for the model. model: The COBRApy model. old_reaction_ids: A dictionary where keys are new IDs and values are old IDs for reactions. comp_comp_db_ids: A dictionary where keys are the original compartmentalized metabolite ids and the values are the database IDs for the compartmentalized components. final_metabolite_ids: A new dictionary where keys are original compartmentalized metabolite IDs from the model and values are the new compartmentalized metabolite IDs. Returns ------- A dictionary with keys for reaction BiGG IDs in the model and values for the associated ModelReaction.id in the database. """ # only grab this once data_source_id = get_or_create_data_source(session, 'old_bigg_id') # get reaction hash_prefs hash_prefs = load_tsv(settings.reaction_hash_prefs) def _check_hash_prefs(a_hash, is_pseudoreaction): """Return the preferred BiGG ID for a_hash, or None.""" for row in hash_prefs: marked_pseudo = len(row) > 2 and row[2] == 'pseudoreaction' if row[0] == a_hash and marked_pseudo == is_pseudoreaction: return row[1] return None # Generate reaction hashes, and find reactions in the same model in opposite # directions. reaction_hashes = {r.id: parse.hash_reaction(r, final_metabolite_ids) for r in model.reactions} reverse_reaction_hashes = {r.id: parse.hash_reaction(r, final_metabolite_ids, reverse=True) for r in model.reactions} reverse_reaction_hashes_rev = {v: k for k, v in six.iteritems(reverse_reaction_hashes)} reactions_not_to_reverse = set() for r_id, h in six.iteritems(reaction_hashes): if h in reverse_reaction_hashes_rev: reactions_not_to_reverse.add(r_id) reactions_not_to_reverse.add(reverse_reaction_hashes_rev[h]) model_db_rxn_ids = {} for reaction in model.reactions: # Drop duplicates label reaction_id = parse.remove_duplicate_tag(reaction.id) # Get the reaction reaction_db = (session .query(Reaction) .filter(Reaction.bigg_id == reaction_id) .first()) # check for pseudoreaction is_pseudoreaction = check_pseudoreaction(reaction_id) # calculate the hash reaction_hash = reaction_hashes[reaction.id] hash_db = (session .query(Reaction) .filter(Reaction.reaction_hash == reaction_hash) .filter(Reaction.pseudoreaction == is_pseudoreaction) .first()) # If there wasn't a match for the forward hash, also check the reverse # hash. Do not check reverse hash for reactions with both directions # defined in the same model (e.g. SUCDi and FRD7). if not hash_db and reaction.id not in reactions_not_to_reverse: reverse_hash_db = (session .query(Reaction) .filter(Reaction.reaction_hash == reverse_reaction_hashes[reaction.id]) .filter(Reaction.pseudoreaction == is_pseudoreaction) .first()) else: reverse_hash_db = None # bigg_id match hash match b==h pseudoreaction example function # n n n first GAPD _new_reaction (1) # n n y first EX_glc_e _new_reaction (1) # y n n incorrect GAPD _new_reaction & increment (2) # y n y incorrect EX_glc_e _new_reaction & increment (2) # n y n GAPDH after GAPD reaction = hash_reaction (3a) # n y y EX_glc__e after EX_glc_e reaction = hash_reaction (3a) # y y n n ? reaction = hash_reaction (3a) # y y n y ? reaction = hash_reaction (3a) # y y y n second GAPD reaction = bigg_reaction (3b) # y y y y second EX_glc_e reaction = bigg_reaction (3b) # NOTE: only check pseudoreaction hash against other pseudoreactions # 4a and 4b are 3a and 3b with a reversed reaction def _find_new_incremented_id(session, original_id): """Look for a reaction bigg_id that is not already taken.""" new_id = increment_id(original_id) while True: # Check for existing and deprecated reaction ids if (session.query(Reaction).filter(Reaction.bigg_id == new_id).first() is None and not _is_deprecated_reaction_id(session, new_id)): return new_id new_id = increment_id(new_id) # Check for a preferred ID in the preferences, based on the forward # hash. Don't check the reverse hash in preferences. preferred_id = _check_hash_prefs(reaction_hash, is_pseudoreaction) # no reversed by default is_reversed = False is_new = False # (0) If there is a preferred ID, make that the new ID, and increment any old IDs if preferred_id is not None: # if the reaction already matches, just continue if hash_db is not None and hash_db.bigg_id == preferred_id: reaction_db = hash_db # otherwise, make the new reaction else: # if existing reactions match the preferred reaction find a new, # incremented id for the existing match preferred_id_db = session.query(Reaction).filter(Reaction.bigg_id == preferred_id).first() if preferred_id_db is not None: new_id = _find_new_incremented_id(session, preferred_id) logging.warning('Incrementing database reaction {} to {} and prefering {} (from model {}) based on hash preferences' .format(preferred_id, new_id, preferred_id, model.id)) preferred_id_db.bigg_id = new_id session.commit() # make a new reaction for the preferred_id reaction_db = _new_reaction(session, reaction, preferred_id, reaction_hash, model_db_id, model, is_pseudoreaction, comp_comp_db_ids) is_new = True # (1) no bigg_id matches, no stoichiometry match or pseudoreaction, then # make a new reaction elif reaction_db is None and hash_db is None and reverse_hash_db is None: # check that the id is not deprecated if _is_deprecated_reaction_id(session, reaction.id): logging.error(('Keeping bigg_id {} (hash {} - from model {}) ' 'even though it is on the deprecated ID list. ' 'You should add it to reaction-hash-prefs.txt') .format(reaction_id, reaction_hash, model.id)) reaction_db = _new_reaction(session, reaction, reaction_id, reaction_hash, model_db_id, model, is_pseudoreaction, comp_comp_db_ids) is_new = True # (2) bigg_id matches, but not the hash, then increment the BIGG_ID elif reaction_db is not None and hash_db is None and reverse_hash_db is None: # loop until we find a non-matching find non-matching ID new_id = _find_new_incremented_id(session, reaction.id) logging.warning('Incrementing bigg_id {} to {} (from model {}) based on conflicting reaction hash' .format(reaction_id, new_id, model.id)) reaction_db = _new_reaction(session, reaction, new_id, reaction_hash, model_db_id, model, is_pseudoreaction, comp_comp_db_ids) is_new = True # (3) but found a stoichiometry match, then use the hash reaction match. elif hash_db is not None: # WARNING TODO this requires that loaded metabolites always match on # bigg_id, which should be the case. # (3a) if reaction_db is None or reaction_db.id != hash_db.id: reaction_db = hash_db # (3b) BIGG ID matches a reaction with the same hash, then just continue else: pass # (4) but found a stoichiometry match, then use the hash reaction match. elif reverse_hash_db is not None: # WARNING TODO this requires that loaded metabolites always match on # bigg_id, which should be the case. # Remember to switch upper and lower bounds is_reversed = True logging.info('Matched {} to {} based on reverse hash' .format(reaction_id, reverse_hash_db.bigg_id)) # (4a) if reaction_db is None or reaction_db.id != reverse_hash_db.id: reaction_db = reverse_hash_db # (4b) BIGG ID matches a reaction with the same hash, then just continue else: pass else: raise Exception('Should not get here') # If the reaction is not new, consider improving the descriptive name if not is_new: new_name = scrub_name(check_none(getattr(reaction, 'name', None))) improve_name(session, reaction_db, new_name) # Add reaction to deprecated ID list if necessary if reaction_db.bigg_id != reaction_id: get_or_create(session, DeprecatedID, deprecated_id=reaction_id, type='reaction', ome_id=reaction_db.id) # If the reaction is reversed, then switch upper and lower bound lower_bound = -reaction.upper_bound if is_reversed else reaction.lower_bound upper_bound = -reaction.lower_bound if is_reversed else reaction.upper_bound # subsystem subsystem = check_none(reaction.subsystem.strip()) # get the model reaction model_reaction_db = (session .query(ModelReaction) .filter(ModelReaction.reaction_id == reaction_db.id) .filter(ModelReaction.model_id == model_db_id) .filter(ModelReaction.lower_bound == lower_bound) .filter(ModelReaction.upper_bound == upper_bound) .filter(ModelReaction.gene_reaction_rule == reaction.gene_reaction_rule) .filter(ModelReaction.objective_coefficient == reaction.objective_coefficient) .filter(ModelReaction.subsystem == subsystem) .first()) if model_reaction_db is None: # get the number of existing copies of this reaction in the model copy_number = (session .query(ModelReaction) .filter(ModelReaction.reaction_id == reaction_db.id) .filter(ModelReaction.model_id == model_db_id) .count()) + 1 # make a new reaction model_reaction_db = ModelReaction(model_id=model_db_id, reaction_id=reaction_db.id, gene_reaction_rule=reaction.gene_reaction_rule, original_gene_reaction_rule=reaction.gene_reaction_rule, upper_bound=upper_bound, lower_bound=lower_bound, objective_coefficient=reaction.objective_coefficient, copy_number=copy_number, subsystem=subsystem) session.add(model_reaction_db) session.commit() # remember the changed ids model_db_rxn_ids[reaction.id] = model_reaction_db.id # add synonyms # # get the id from the published model for old_bigg_id in old_reaction_ids[reaction.id]: # add a synonym synonym_db = (session .query(Synonym) .filter(Synonym.type == 'reaction') .filter(Synonym.ome_id == reaction_db.id) .filter(Synonym.synonym == old_bigg_id) .filter(Synonym.data_source_id == data_source_id) .first()) if synonym_db is None: synonym_db = Synonym(type='reaction', ome_id=reaction_db.id, synonym=old_bigg_id, data_source_id=data_source_id) session.add(synonym_db) session.commit() # add OldIDSynonym old_id_db = (session .query(OldIDSynonym) .filter(OldIDSynonym.type == 'model_reaction') .filter(OldIDSynonym.ome_id == model_reaction_db.id) .filter(OldIDSynonym.synonym_id == synonym_db.id) .first()) if old_id_db is None: old_id_db = OldIDSynonym(type='model_reaction', ome_id=model_reaction_db.id, synonym_id=synonym_db.id) session.add(old_id_db) session.commit() return model_db_rxn_ids
def load_metabolites(session, model_id, model, compartment_names, old_metabolite_ids): """Load the metabolites as components and model components. Arguments: --------- session: An SQLAlchemy session. model_id: The database ID for the model. model: The COBRApy model. old_metabolite_ids: A dictionary where keys are new IDs and values are old IDs for compartmentalized metabolites. Returns ------- comp_comp_db_ids: A dictionary where keys are the original compartmentalized metabolite ids and the values are the database IDs for the compartmentalized components. final_metabolite_ids: A new dictionary where keys are original compartmentalized metabolite IDs from the model and values are the new compartmentalized metabolite IDs. """ comp_comp_db_ids = {} final_metabolite_ids = {} # only grab this once data_source_id = get_or_create_data_source(session, 'old_bigg_id') # get metabolite id duplicates met_dups = load_tsv(settings.metabolite_duplicates) def _check_metabolite_duplicates(bigg_id): """Return a new ID if there is a preferred ID, otherwise None.""" for row in met_dups: if bigg_id in row[1:]: return row[0] return None # for each metabolite in the model for metabolite in model.metabolites: metabolite_id = parse.remove_duplicate_tag(metabolite.id) try: component_bigg_id, compartment_bigg_id = parse.split_compartment(metabolite_id) except Exception: logging.error(('Could not find compartment for metabolite %s in' 'model %s' % (metabolite_id, model.id))) continue preferred = _check_metabolite_duplicates(component_bigg_id) new_bigg_id = preferred if preferred else component_bigg_id # look for the formula in these places formula_fns = [lambda m: getattr(m, 'formula', None), # support cobra v0.3 and 0.4 lambda m: m.notes.get('FORMULA', None), lambda m: m.notes.get('FORMULA1', None)] # Cast to string, but not for None strip_str_or_none = lambda v: str(v).strip() if v is not None else None # Ignore the empty string ignore_empty_str = lambda s: s if s != '' else None # Use a generator for lazy evaluation values = (ignore_empty_str(strip_str_or_none(formula_fn(metabolite))) for formula_fn in formula_fns) # Get the first non-null result. Otherwise _formula = None. _formula = format_formula(next(filter(None, values), None)) # Check for non-valid formulas if parse.invalid_formula(_formula): logging.warning('Invalid formula %s for metabolite %s in model %s' % (_formula, metabolite_id, model.id)) _formula = None # get charge try: charge = int(metabolite.charge) # check for float charge if charge != metabolite.charge: logging.warning('Could not load charge {} for {} in model {}' .format(metabolite.charge, metabolite_id, model.id)) charge = None except Exception: if hasattr(metabolite, 'charge') and metabolite.charge is not None: logging.debug('Could not convert charge to integer for metabolite {} in model {}: {}' .format(metabolite_id, model.id, metabolite.charge)) charge = None # If there is no metabolite, add a new one. metabolite_db = (session .query(Component) .filter(Component.bigg_id == new_bigg_id) .first()) # if necessary, add the new metabolite, and keep track of the ID new_name = scrub_name(getattr(metabolite, 'name', None)) if metabolite_db is None: # make the new metabolite metabolite_db = Component(bigg_id=new_bigg_id, name=new_name) session.add(metabolite_db) session.commit() else: # If the metabolite is not new, consider improving the descriptive name improve_name(session, metabolite_db, new_name) # add the deprecated id if necessary if metabolite_db.bigg_id != component_bigg_id: get_or_create(session, DeprecatedID, deprecated_id=component_bigg_id, type='component', ome_id=metabolite_db.id) # if there is no compartment, add a new one compartment_db = (session .query(Compartment) .filter(Compartment.bigg_id == compartment_bigg_id) .first()) if compartment_db is None: try: name = compartment_names[compartment_bigg_id] except KeyError: logging.warning('No name found for compartment %s' % compartment_bigg_id) name = '' compartment_db = Compartment(bigg_id=compartment_bigg_id, name=name) session.add(compartment_db) session.commit() # if there is no compartmentalized compartment, add a new one comp_component_db = (session .query(CompartmentalizedComponent) .filter(CompartmentalizedComponent.component_id == metabolite_db.id) .filter(CompartmentalizedComponent.compartment_id == compartment_db.id) .first()) if comp_component_db is None: comp_component_db = CompartmentalizedComponent(component_id=metabolite_db.id, compartment_id=compartment_db.id) session.add(comp_component_db) session.commit() # remember for adding the reaction comp_comp_db_ids[metabolite.id] = comp_component_db.id final_metabolite_ids[metabolite.id] = '%s_%s' % (new_bigg_id, compartment_bigg_id) # if there is no model compartmentalized compartment, add a new one model_comp_comp_db = (session .query(ModelCompartmentalizedComponent) .filter(ModelCompartmentalizedComponent.compartmentalized_component_id == comp_component_db.id) .filter(ModelCompartmentalizedComponent.model_id == model_id) .first()) if model_comp_comp_db is None: model_comp_comp_db = ModelCompartmentalizedComponent(model_id=model_id, compartmentalized_component_id=comp_component_db.id, formula=_formula, charge=charge) session.add(model_comp_comp_db) session.commit() else: if model_comp_comp_db.formula is None: model_comp_comp_db.formula = _formula if model_comp_comp_db.charge is None: model_comp_comp_db.charge = charge session.commit() # add synonyms for old_bigg_id_c in old_metabolite_ids[metabolite.id]: # Add Synonym and OldIDSynonym synonym_db = (session .query(Synonym) .filter(Synonym.type == 'compartmentalized_component') .filter(Synonym.ome_id == comp_component_db.id) .filter(Synonym.synonym == old_bigg_id_c) .filter(Synonym.data_source_id == data_source_id) .first()) if synonym_db is None: synonym_db = Synonym(type='compartmentalized_component', ome_id=comp_component_db.id, synonym=old_bigg_id_c, data_source_id=data_source_id) session.add(synonym_db) session.commit() old_id_db = (session .query(OldIDSynonym) .filter(OldIDSynonym.type == 'model_compartmentalized_component') .filter(OldIDSynonym.ome_id == model_comp_comp_db.id) .filter(OldIDSynonym.synonym_id == synonym_db.id) .first()) if old_id_db is None: old_id_db = OldIDSynonym(type='model_compartmentalized_component', ome_id=model_comp_comp_db.id, synonym_id=synonym_db.id) session.add(old_id_db) session.commit() # Also add Synonym and OldIDSynonym for the universal metabolite try: new_style_id = parse.id_for_new_id_style( parse.fix_legacy_id(old_bigg_id_c, use_hyphens=False), is_metabolite=True ) old_bigg_id_c_without_compartment = parse.split_compartment(new_style_id)[0] except Exception as e: logging.warning(e.message) else: synonym_db_2 = (session .query(Synonym) .filter(Synonym.type == 'component') .filter(Synonym.ome_id == metabolite_db.id) .filter(Synonym.synonym == old_bigg_id_c_without_compartment) .filter(Synonym.data_source_id == data_source_id) .first()) if synonym_db_2 is None: synonym_db_2 = Synonym(type='component', ome_id=metabolite_db.id, synonym=old_bigg_id_c_without_compartment, data_source_id=data_source_id) session.add(synonym_db_2) session.commit() old_id_db = (session .query(OldIDSynonym) .filter(OldIDSynonym.type == 'model_compartmentalized_component') .filter(OldIDSynonym.ome_id == model_comp_comp_db.id) .filter(OldIDSynonym.synonym_id == synonym_db_2.id) .first()) if old_id_db is None: old_id_db = OldIDSynonym(type='model_compartmentalized_component', ome_id=model_comp_comp_db.id, synonym_id=synonym_db_2.id) session.add(old_id_db) session.commit() return comp_comp_db_ids, final_metabolite_ids
def load_reactions(session, model_db_id, model, old_reaction_ids, comp_comp_db_ids, final_metabolite_ids): """Load the reactions and stoichiometries into the model. TODO if the reaction is already loaded, we need to check the stoichometry has. If that doesn't match, then add a new reaction with an incremented ID (e.g. ACALD_1) Arguments --------- session: An SQLAlchemy session. model_db_id: The database ID for the model. model: The COBRApy model. old_reaction_ids: A dictionary where keys are new IDs and values are old IDs for reactions. comp_comp_db_ids: A dictionary where keys are the original compartmentalized metabolite ids and the values are the database IDs for the compartmentalized components. final_metabolite_ids: A new dictionary where keys are original compartmentalized metabolite IDs from the model and values are the new compartmentalized metabolite IDs. Returns ------- A dictionary with keys for reaction BiGG IDs in the model and values for the associated ModelReaction.id in the database. """ # only grab this once data_source_id = get_or_create_data_source(session, 'old_bigg_id') # get reaction hash_prefs hash_prefs = load_tsv(settings.reaction_hash_prefs) def _check_hash_prefs(a_hash, is_pseudoreaction): """Return the preferred BiGG ID for a_hash, or None.""" for row in hash_prefs: marked_pseudo = len(row) > 2 and row[2] == 'pseudoreaction' if row[0] == a_hash and marked_pseudo == is_pseudoreaction: return row[1] return None # Generate reaction hashes, and find reactions in the same model in opposite # directions. reaction_hashes = {r.id: parse.hash_reaction(r, final_metabolite_ids) for r in model.reactions} reverse_reaction_hashes = {r.id: parse.hash_reaction(r, final_metabolite_ids, reverse=True) for r in model.reactions} reverse_reaction_hashes_rev = {v: k for k, v in six.iteritems(reverse_reaction_hashes)} reactions_not_to_reverse = set() for r_id, h in six.iteritems(reaction_hashes): if h in reverse_reaction_hashes_rev: reactions_not_to_reverse.add(r_id) reactions_not_to_reverse.add(reverse_reaction_hashes_rev[h]) model_db_rxn_ids = {} for reaction in model.reactions: # Drop duplicates label reaction_id = parse.remove_duplicate_tag(reaction.id) # Get the reaction reaction_db = (session .query(Reaction) .filter(Reaction.bigg_id == reaction_id) .first()) # check for pseudoreaction is_pseudoreaction = check_pseudoreaction(reaction_id) # calculate the hash reaction_hash = reaction_hashes[reaction.id] hash_db = (session .query(Reaction) .filter(Reaction.reaction_hash == reaction_hash) .filter(Reaction.pseudoreaction == is_pseudoreaction) .first()) # If there wasn't a match for the forward hash, also check the reverse # hash. Do not check reverse hash for reactions with both directions # defined in the same model (e.g. SUCDi and FRD7). if not hash_db and reaction.id not in reactions_not_to_reverse: reverse_hash_db = (session .query(Reaction) .filter(Reaction.reaction_hash == reverse_reaction_hashes[reaction.id]) .filter(Reaction.pseudoreaction == is_pseudoreaction) .first()) else: reverse_hash_db = None # bigg_id match hash match b==h pseudoreaction example function # n n n first GAPD _new_reaction (1) # n n y first EX_glc_e _new_reaction (1) # y n n incorrect GAPD _new_reaction & increment (2) # y n y incorrect EX_glc_e _new_reaction & increment (2) # n y n GAPDH after GAPD reaction = hash_reaction (3a) # n y y EX_glc__e after EX_glc_e reaction = hash_reaction (3a) # y y n n ? reaction = hash_reaction (3a) # y y n y ? reaction = hash_reaction (3a) # y y y n second GAPD reaction = bigg_reaction (3b) # y y y y second EX_glc_e reaction = bigg_reaction (3b) # NOTE: only check pseudoreaction hash against other pseudoreactions # 4a and 4b are 3a and 3b with a reversed reaction def _find_new_incremented_id(session, original_id): """Look for a reaction bigg_id that is not already taken.""" new_id = increment_id(original_id) while True: # Check for existing and deprecated reaction ids if (session.query(Reaction).filter(Reaction.bigg_id == new_id).first() is None and not _is_deprecated_reaction_id(session, new_id)): return new_id new_id = increment_id(new_id) # Check for a preferred ID in the preferences, based on the forward # hash. Don't check the reverse hash in preferences. preferred_id = _check_hash_prefs(reaction_hash, is_pseudoreaction) # no reversed by default is_reversed = False is_new = False # (0) If there is a preferred ID, make that the new ID, and increment any old IDs if preferred_id is not None: # if the reaction already matches, just continue if hash_db is not None and hash_db.bigg_id == preferred_id: reaction_db = hash_db # otherwise, make the new reaction else: # if existing reactions match the preferred reaction find a new, # incremented id for the existing match preferred_id_db = session.query(Reaction).filter(Reaction.bigg_id == preferred_id).first() if preferred_id_db is not None: new_id = _find_new_incremented_id(session, preferred_id) logging.warn('Incrementing database reaction {} to {} and prefering {} (from model {}) based on hash preferences' .format(preferred_id, new_id, preferred_id, model.id)) preferred_id_db.bigg_id = new_id session.commit() # make a new reaction for the preferred_id reaction_db = _new_reaction(session, reaction, preferred_id, reaction_hash, model_db_id, model, is_pseudoreaction, comp_comp_db_ids) is_new = True # (1) no bigg_id matches, no stoichiometry match or pseudoreaction, then # make a new reaction elif reaction_db is None and hash_db is None and reverse_hash_db is None: # check that the id is not deprecated if _is_deprecated_reaction_id(session, reaction.id): logging.error(('Keeping bigg_id {} (hash {} - from model {}) ' 'even though it is on the deprecated ID list. ' 'You should add it to reaction-hash-prefs.txt') .format(reaction_id, reaction_hash, model.id)) reaction_db = _new_reaction(session, reaction, reaction_id, reaction_hash, model_db_id, model, is_pseudoreaction, comp_comp_db_ids) is_new = True # (2) bigg_id matches, but not the hash, then increment the BIGG_ID elif reaction_db is not None and hash_db is None and reverse_hash_db is None: # loop until we find a non-matching find non-matching ID new_id = _find_new_incremented_id(session, reaction.id) logging.warn('Incrementing bigg_id {} to {} (from model {}) based on conflicting reaction hash' .format(reaction_id, new_id, model.id)) reaction_db = _new_reaction(session, reaction, new_id, reaction_hash, model_db_id, model, is_pseudoreaction, comp_comp_db_ids) is_new = True # (3) but found a stoichiometry match, then use the hash reaction match. elif hash_db is not None: # WARNING TODO this requires that loaded metabolites always match on # bigg_id, which should be the case. # (3a) if reaction_db is None or reaction_db.id != hash_db.id: reaction_db = hash_db # (3b) BIGG ID matches a reaction with the same hash, then just continue else: pass # (4) but found a stoichiometry match, then use the hash reaction match. elif reverse_hash_db is not None: # WARNING TODO this requires that loaded metabolites always match on # bigg_id, which should be the case. # Remember to switch upper and lower bounds is_reversed = True logging.info('Matched {} to {} based on reverse hash' .format(reaction_id, reverse_hash_db.bigg_id)) # (4a) if reaction_db is None or reaction_db.id != reverse_hash_db.id: reaction_db = reverse_hash_db # (4b) BIGG ID matches a reaction with the same hash, then just continue else: pass else: raise Exception('Should not get here') # If the reaction is not new, consider improving the descriptive name if not is_new: new_name = scrub_name(check_none(getattr(reaction, 'name', None))) improve_name(session, reaction_db, new_name) # Add reaction to deprecated ID list if necessary if reaction_db.bigg_id != reaction_id: get_or_create(session, DeprecatedID, deprecated_id=reaction_id, type='reaction', ome_id=reaction_db.id) # If the reaction is reversed, then switch upper and lower bound lower_bound = -reaction.upper_bound if is_reversed else reaction.lower_bound upper_bound = -reaction.lower_bound if is_reversed else reaction.upper_bound # subsystem subsystem = check_none(reaction.subsystem.strip()) # get the model reaction model_reaction_db = (session .query(ModelReaction) .filter(ModelReaction.reaction_id == reaction_db.id) .filter(ModelReaction.model_id == model_db_id) .filter(ModelReaction.lower_bound == lower_bound) .filter(ModelReaction.upper_bound == upper_bound) .filter(ModelReaction.gene_reaction_rule == reaction.gene_reaction_rule) .filter(ModelReaction.objective_coefficient == reaction.objective_coefficient) .filter(ModelReaction.subsystem == subsystem) .first()) if model_reaction_db is None: # get the number of existing copies of this reaction in the model copy_number = (session .query(ModelReaction) .filter(ModelReaction.reaction_id == reaction_db.id) .filter(ModelReaction.model_id == model_db_id) .count()) + 1 # make a new reaction model_reaction_db = ModelReaction(model_id=model_db_id, reaction_id=reaction_db.id, gene_reaction_rule=reaction.gene_reaction_rule, original_gene_reaction_rule=reaction.gene_reaction_rule, upper_bound=upper_bound, lower_bound=lower_bound, objective_coefficient=reaction.objective_coefficient, copy_number=copy_number, subsystem=subsystem) session.add(model_reaction_db) session.commit() # remember the changed ids model_db_rxn_ids[reaction.id] = model_reaction_db.id # add synonyms # # get the id from the published model for old_bigg_id in old_reaction_ids[reaction.id]: # add a synonym synonym_db = (session .query(Synonym) .filter(Synonym.type == 'reaction') .filter(Synonym.ome_id == reaction_db.id) .filter(Synonym.synonym == old_bigg_id) .filter(Synonym.data_source_id == data_source_id) .first()) if synonym_db is None: synonym_db = Synonym(type='reaction', ome_id=reaction_db.id, synonym=old_bigg_id, data_source_id=data_source_id) session.add(synonym_db) session.commit() # add OldIDSynonym old_id_db = (session .query(OldIDSynonym) .filter(OldIDSynonym.type == 'model_reaction') .filter(OldIDSynonym.ome_id == model_reaction_db.id) .filter(OldIDSynonym.synonym_id == synonym_db.id) .first()) if old_id_db is None: old_id_db = OldIDSynonym(type='model_reaction', ome_id=model_reaction_db.id, synonym_id=synonym_db.id) session.add(old_id_db) session.commit() return model_db_rxn_ids
def load_metabolites(session, model_id, model, compartment_names, old_metabolite_ids): """Load the metabolites as components and model components. Arguments: --------- session: An SQLAlchemy session. model_id: The database ID for the model. model: The COBRApy model. old_metabolite_ids: A dictionary where keys are new IDs and values are old IDs for compartmentalized metabolites. Returns ------- comp_comp_db_ids: A dictionary where keys are the original compartmentalized metabolite ids and the values are the database IDs for the compartmentalized components. final_metabolite_ids: A new dictionary where keys are original compartmentalized metabolite IDs from the model and values are the new compartmentalized metabolite IDs. """ comp_comp_db_ids = {} final_metabolite_ids = {} # only grab this once data_source_id = get_or_create_data_source(session, 'old_bigg_id') # get metabolite id duplicates met_dups = load_tsv(settings.metabolite_duplicates) def _check_metabolite_duplicates(bigg_id): """Return a new ID if there is a preferred ID, otherwise None.""" for row in met_dups: if bigg_id in row[1:]: return row[0] return None # for each metabolite in the model for metabolite in model.metabolites: metabolite_id = parse.remove_duplicate_tag(metabolite.id) try: component_bigg_id, compartment_bigg_id = parse.split_compartment(metabolite_id) except Exception: logging.error(('Could not find compartment for metabolite %s in' 'model %s' % (metabolite_id, model.id))) continue preferred = _check_metabolite_duplicates(component_bigg_id) new_bigg_id = preferred if preferred else component_bigg_id # look for the formula in these places formula_fns = [lambda m: getattr(m, 'formula', None), # support cobra v0.3 and 0.4 lambda m: m.notes.get('FORMULA', None), lambda m: m.notes.get('FORMULA1', None)] # Cast to string, but not for None strip_str_or_none = lambda v: str(v).strip() if v is not None else None # Ignore the empty string ignore_empty_str = lambda s: s if s != '' else None # Use a generator for lazy evaluation values = (ignore_empty_str(strip_str_or_none(formula_fn(metabolite))) for formula_fn in formula_fns) # Get the first non-null result. Otherwise _formula = None. _formula = format_formula(next(filter(None, values), None)) # Check for non-valid formulas if parse.invalid_formula(_formula): logging.warn('Invalid formula %s for metabolite %s in model %s' % (_formula, metabolite_id, model.id)) _formula = None # get charge try: charge = int(metabolite.charge) # check for float charge if charge != metabolite.charge: logging.warn('Could not load charge {} for {} in model {}' .format(metabolite.charge, metabolite_id, model.id)) charge = None except Exception: if hasattr(metabolite, 'charge') and metabolite.charge is not None: logging.debug('Could not convert charge to integer for metabolite {} in model {}: {}' .format(metabolite_id, model.id, metabolite.charge)) charge = None # If there is no metabolite, add a new one. metabolite_db = (session .query(Component) .filter(Component.bigg_id == new_bigg_id) .first()) # if necessary, add the new metabolite, and keep track of the ID new_name = scrub_name(getattr(metabolite, 'name', None)) if metabolite_db is None: # make the new metabolite metabolite_db = Component(bigg_id=new_bigg_id, name=new_name) session.add(metabolite_db) session.commit() else: # If the metabolite is not new, consider improving the descriptive name improve_name(session, metabolite_db, new_name) # add the deprecated id if necessary if metabolite_db.bigg_id != component_bigg_id: get_or_create(session, DeprecatedID, deprecated_id=component_bigg_id, type='component', ome_id=metabolite_db.id) # if there is no compartment, add a new one compartment_db = (session .query(Compartment) .filter(Compartment.bigg_id == compartment_bigg_id) .first()) if compartment_db is None: try: name = compartment_names[compartment_bigg_id] except KeyError: logging.warn('No name found for compartment %s' % compartment_bigg_id) name = '' compartment_db = Compartment(bigg_id=compartment_bigg_id, name=name) session.add(compartment_db) session.commit() # if there is no compartmentalized compartment, add a new one comp_component_db = (session .query(CompartmentalizedComponent) .filter(CompartmentalizedComponent.component_id == metabolite_db.id) .filter(CompartmentalizedComponent.compartment_id == compartment_db.id) .first()) if comp_component_db is None: comp_component_db = CompartmentalizedComponent(component_id=metabolite_db.id, compartment_id=compartment_db.id) session.add(comp_component_db) session.commit() # remember for adding the reaction comp_comp_db_ids[metabolite.id] = comp_component_db.id final_metabolite_ids[metabolite.id] = '%s_%s' % (new_bigg_id, compartment_bigg_id) # if there is no model compartmentalized compartment, add a new one model_comp_comp_db = (session .query(ModelCompartmentalizedComponent) .filter(ModelCompartmentalizedComponent.compartmentalized_component_id == comp_component_db.id) .filter(ModelCompartmentalizedComponent.model_id == model_id) .first()) if model_comp_comp_db is None: model_comp_comp_db = ModelCompartmentalizedComponent(model_id=model_id, compartmentalized_component_id=comp_component_db.id, formula=_formula, charge=charge) session.add(model_comp_comp_db) session.commit() else: if model_comp_comp_db.formula is None: model_comp_comp_db.formula = _formula if model_comp_comp_db.charge is None: model_comp_comp_db.charge = charge session.commit() # add synonyms for old_bigg_id_c in old_metabolite_ids[metabolite.id]: # Add Synonym and OldIDSynonym synonym_db = (session .query(Synonym) .filter(Synonym.type == 'compartmentalized_component') .filter(Synonym.ome_id == comp_component_db.id) .filter(Synonym.synonym == old_bigg_id_c) .filter(Synonym.data_source_id == data_source_id) .first()) if synonym_db is None: synonym_db = Synonym(type='compartmentalized_component', ome_id=comp_component_db.id, synonym=old_bigg_id_c, data_source_id=data_source_id) session.add(synonym_db) session.commit() old_id_db = (session .query(OldIDSynonym) .filter(OldIDSynonym.type == 'model_compartmentalized_component') .filter(OldIDSynonym.ome_id == model_comp_comp_db.id) .filter(OldIDSynonym.synonym_id == synonym_db.id) .first()) if old_id_db is None: old_id_db = OldIDSynonym(type='model_compartmentalized_component', ome_id=model_comp_comp_db.id, synonym_id=synonym_db.id) session.add(old_id_db) session.commit() # Also add Synonym and OldIDSynonym for the universal metabolite try: new_style_id = parse.id_for_new_id_style( parse.fix_legacy_id(old_bigg_id_c, use_hyphens=False), is_metabolite=True ) old_bigg_id_c_without_compartment = parse.split_compartment(new_style_id)[0] except Exception as e: logging.warn(e.message) else: synonym_db_2 = (session .query(Synonym) .filter(Synonym.type == 'component') .filter(Synonym.ome_id == metabolite_db.id) .filter(Synonym.synonym == old_bigg_id_c_without_compartment) .filter(Synonym.data_source_id == data_source_id) .first()) if synonym_db_2 is None: synonym_db_2 = Synonym(type='component', ome_id=metabolite_db.id, synonym=old_bigg_id_c_without_compartment, data_source_id=data_source_id) session.add(synonym_db_2) session.commit() old_id_db = (session .query(OldIDSynonym) .filter(OldIDSynonym.type == 'model_compartmentalized_component') .filter(OldIDSynonym.ome_id == model_comp_comp_db.id) .filter(OldIDSynonym.synonym_id == synonym_db_2.id) .first()) if old_id_db is None: old_id_db = OldIDSynonym(type='model_compartmentalized_component', ome_id=model_comp_comp_db.id, synonym_id=synonym_db_2.id) session.add(old_id_db) session.commit() return comp_comp_db_ids, final_metabolite_ids