def setUp(self): super().setUp() # self.logger.setLevel('DEBUG') # objects below have to be created for each test instance # since some tests can modified them self.rpsbml_none = rpSBML() self.rpsbml_lycopene = rpSBML(inFile=self.rpsbml_lycopene_path, logger=self.logger)
def test_check_SBML_rxnid(self): rpsbml = rpSBML(self.e_coli_model_path) # Return types. res = check_SBML_rxnid( rpsbml=rpsbml, rxn_id='biomass' ) self.assertIsInstance( res, str ) # Values self.assertEqual( res, 'biomass' ) # Challenge - 1 res = check_SBML_rxnid( rpsbml=rpsbml, rxn_id='undefined' ) self.assertIs( res, None )
def test_is_boundary_type(self): # TODO: implement test which doesn't account abount SBO terms, to see how compartment_id ... are managed # Load. rpsbml_ecoli = rpSBML(inFile=self.rpsbml_ecoli_path, logger=self.logger) reactions = rpsbml_ecoli.getModel().getListOfReactions() cobra_model = cobra_io.read_sbml_model(self.rpsbml_ecoli_path, use_fbs_package=True) # Return type. self.assertIsInstance( rpsbml_ecoli.is_boundary_type(reactions[0], 'exchange', ''), bool) # Exchange. rpsbml_exchange = [ x for x in reactions if rpsbml_ecoli.is_boundary_type(x, 'exchange', 'e') ] self.assertEqual(len(cobra_model.exchanges), len(rpsbml_exchange)) rpsbml_exchange = [ x for x in reactions if rpsbml_ecoli.is_boundary_type(x, 'exchange', '') ] self.assertEqual(len(cobra_model.exchanges), len(rpsbml_exchange)) # Demand. rpsbml_demands = [ x for x in reactions if rpsbml_ecoli.is_boundary_type(x, 'demand', '') ] self.assertEqual(len(cobra_model.demands), len(rpsbml_demands)) # Sinks. rpsbml_sinks = [ x for x in reactions if rpsbml_ecoli.is_boundary_type(x, 'sink', '') ] self.assertEqual(len(cobra_model.sinks), len(rpsbml_sinks))
def test_check_SBML_compartment(self): rpsbml = rpSBML(self.e_coli_model_path) # Return types. comp_id = 'cytosol' res = check_SBML_compartment( rpsbml=rpsbml, compartment_id=comp_id ) self.assertIsInstance( res, str ) # Values self.assertEqual(res, comp_id) # Challenge - 1 comp_id = 'periplasm' res = check_SBML_compartment( rpsbml=rpsbml, compartment_id=comp_id ) self.assertEqual(res, comp_id) # Challenge - 2 comp_id = 'x' res = check_SBML_compartment( rpsbml=rpsbml, compartment_id=comp_id ) self.assertIs(res, None)
def _removeDeadEnd(sbml_path): cobraModel = cobra_io.read_sbml_model(sbml_path, use_fbc_package=True) cobraModel = _reduce_model(cobraModel) with TemporaryDirectory() as tmpOutputFolder: cobra_io.write_sbml_model(cobraModel, os_path.join(tmpOutputFolder, 'tmp.xml')) rpsbml = rpSBML(os_path.join(tmpOutputFolder, 'tmp.xml')) return rpsbml
def genSink(cache, input_sbml, output_sink, remove_dead_end=False, compartment_id=default_comp, logger: Logger = getLogger(__name__)): ### because cobrapy can be terrible and cause infinite loop depending on the input SBML model if remove_dead_end: try: rpsbml = _removeDeadEnd(input_sbml) except TimeoutError: logger.warning( 'removeDeadEnd reached its timeout... parsing the whole model') rpsbml = rpSBML(input_sbml) else: rpsbml = rpSBML(input_sbml) ### open the cache ### species = [] for i in rpsbml.getModel().getListOfSpecies(): if i.getCompartment() == compartment_id: species.append(i) if not species: logger.error('Could not retreive any species in the compartment: ' + str(compartment_id)) logger.error('Is the right compartment set?') return False with open(output_sink, 'w', encoding='utf-8') as outS: # writer = csv_writer(outS, delimiter=',', quotechar='"', quoting=QUOTE_NONNUMERIC) # writer.writerow(['Name','InChI']) write(outS, ['Name', 'InChI']) for i in species: res = rpsbml.readMIRIAMAnnotation(i.getAnnotation()) # extract the MNX id's try: mnx = res['metanetx'][0] except KeyError: logger.warning('Cannot find MetaNetX ID for ' + str(i.getId())) continue try: inchi = cache.get('cid_strc')[mnx]['inchi'] except KeyError: inchi = None if inchi: write(outS, [mnx, inchi])
def test_build_exchange_reaction(self): # Load. rpsbml_ecoli = rpSBML(inFile=self.rpsbml_ecoli_path, logger=self.logger) df = rpsbml_ecoli.build_exchange_reaction('c') # Return type. self.assertIsInstance(df, pd.DataFrame) # Fmt dataframe. self.assertEqual(df.shape, (331, 2)) self.assertIn('model_id', df.columns) self.assertIn('libsbml_reaction', df.columns) self.assertEqual(df.loc[0, 'model_id'], 'M_12ppd__R_e') self.assertIsInstance(df.loc[0, 'libsbml_reaction'], libsbml.Reaction)
def parse_all_pathways(input_files: list) -> tuple: network = {'elements': {'nodes': [], 'edges': []}} all_nodes = {} all_edges = {} pathways_info = {} from pprint import pprint for sbml_path in input_files: rpsbml = rpSBML(str(sbml_path)) pathway = rpPathway.from_rpSBML(rpsbml=rpsbml) nodes, edges, pathway = parse_one_pathway(pathway) # Store pathway pathways_info[pathway['path_id']] = pathway # Store nodes for node_id, node_dict in nodes.items(): if node_id in all_nodes: all_nodes[node_id] = _merge_nodes(node_dict, all_nodes[node_id]) else: all_nodes[node_id] = node_dict # Store edges for edge_id, edge_dict in edges.items(): if edge_id in all_edges: all_edges[edge_id] = _merge_edges(edge_dict, all_edges[edge_id]) else: all_edges[edge_id] = edge_dict # Finally store nodes for node in all_nodes.values(): network['elements']['nodes'].append({'data': node}) for edge in all_edges.values(): network['elements']['edges'].append({'data': edge}) # Finally, sort node and edge IDs everywhere for node in network['elements']['nodes']: node['data']['path_ids'] = sorted(node['data']['path_ids']) for node in network['elements']['edges']: node['data']['path_ids'] = sorted(node['data']['path_ids']) # Finally, sort pathway_info by pathway ID pathways_info_ordered = {} path_ids_ordered = sorted(pathways_info.keys()) for path_id in path_ids_ordered: pathways_info_ordered[path_id] = pathways_info[path_id] return network, pathways_info_ordered
def addInChiKey(self, input_sbml, output_sbml): """Check the MIRIAM annotation for MetaNetX or CHEBI id's and try to recover the inchikey from cache and add it to MIRIAM :param input_sbml: SBML file input :param output_sbml: Output SBML file :type input_sbml: str :type output_sbml: str :rtype: bool :return: Success or failure of the function """ filename = input_sbml.split('/')[-1].replace('.rpsbml', '').replace('.sbml', '').replace('.xml', '') self.logger.debug(filename) rpsbml = rpSBML(inFile=input_sbml, logger=self.logger) for spe in rpsbml.getModel().getListOfSpecies(): inchikey = None miriam_dict = rpsbml.readMIRIAMAnnotation(spe.getAnnotation()) if 'inchikey' in miriam_dict: self.logger.info('The species '+str(spe.id)+' already has an inchikey... skipping') continue try: for mnx in miriam_dict['metanetx']: inchikey = self.cid_strc[self.cache._checkCIDdeprecated(mnx, self.deprecatedCID_cid)]['inchikey'] if inchikey: rpsbml.addUpdateMIRIAM(spe, 'species', {'inchikey': [inchikey]}) else: self.logger.warning('The inchikey is empty for: '+str(spe.id)) continue except KeyError: try: for chebi in miriam_dict['chebi']: inchikey = self.cid_strc[self.cache._checkCIDdeprecated(self.chebi_cid[chebi], self.deprecatedCID_cid)]['inchikey'] if inchikey: rpsbml.addUpdateMIRIAM(spe, 'species', {'inchikey': [inchikey]}) else: self.logger.warning('The inchikey is empty for: '+str(spe.id)) continue except KeyError: self.logger.warning('Cannot find the inchikey for: '+str(spe.id)) writeSBMLToFile(rpsbml.document, output_sbml) return True
def get_selenzyme_annotation(self, rpsbml_path: str) -> Dict: rpsbml = rpSBML(str(rpsbml_path)) pathway = rpPathway.from_rpSBML(rpsbml=rpsbml) for idx_rxn, rxn_id in enumerate(pathway.get_reactions_ids()): # Stop if too many reactions if idx_rxn > self._max_rxn_per_construct: raise ValueError( f'Number of reactions exceed the defined allowed number of ', f'enzymes : {self._max_rxn_per_construct}. Execution cancelled.' ) # rxn = pathway.get_reaction(rxn_id) enzymes = rxn.get_selenzy() # Stop if no enzyme available if len(enzymes) == 0: raise ValueError( f'Missing UniProt IDs from selenzyme annotation for ' f'reaction {rxn_id}. Execution cancelled.') # Collect enzyme ordered by score, the first is the best for idx_enz, enz in enumerate(sorted( enzymes.items(), key=lambda x: getitem(x[1], 'score'), reverse=True), start=1): # Skip worst enzyme if too many if idx_enz > self._max_enz_per_rxn: logging.warning( f'Max number of enzyme per reaction reached ({self._max_enz_per_rxn}) ' f'for reaction {rxn_id}. Only the best one(s) are kept.' ) break uniprot_id, _ = enz if uniprot_id in self._parts: self._parts[uniprot_id].cds_steps.append(rxn_id) else: self._parts[uniprot_id] = Part(id=uniprot_id, basic_role='part', biological_role='cds', cds_steps=[rxn_id], seq='atgc')
def setUp(self): super().setUp() self.rpsbml = rpSBML(inFile=self.rpsbml_path, logger=self.logger) self.rpgraph = rpGraph(rpsbml=self.rpsbml, logger=self.logger)
def test_from_cobra(self): rpsbml_ecoli = rpSBML(inFile=self.rpsbml_ecoli_path, logger=self.logger) rpsbml = rpSBML.from_cobra(rpsbml_ecoli.to_cobra()) self.assertIsInstance(rpsbml, rpSBML)
def test_to_cobra(self): rpsbml_ecoli = rpSBML(inFile=self.rpsbml_ecoli_path, logger=self.logger) model = rpsbml_ecoli.to_cobra() self.assertIsInstance(model, cobra.Model)