def entry_point(): parser = build_args_parser( prog='stats', description='Statistics on SBML file(s)', m_add_args=add_arguments ) args = parser.parse_args() from rptools.__main__ import init logger = init(parser, args) # Build the list of pathways to rank pathways = [ rpPathway.from_rpSBML( infile=pathway_filename, logger=logger ) for pathway_filename in args.pathways ] # Rank pathways stats = counts(pathways) print_stats( pathways=pathways, reactions=stats['reactions'], species=stats['species'], )
def setUp(self): self.logger = create_logger(__name__, 'ERROR') self.pathway = rpPathway.from_rpSBML( infile=self.rpsbml_path, logger=self.logger )
def to_data_js(sbml_files: list, source_path: str, output_folder: str, verbose: bool = False, dev: bool = False): """Return a list of dictionaries parsed from sbml files Parameters ---------- sbml_files : list Name(s) of sbml files source_path : str Path pointing to the rpSBML file(s) output_folder : str Path to the directory where report file(s) will be generated. verbose : bool, optional if True, turn on console verbose mode. By default False dev : bool, optional For dev purpose only : create supplementary files into a dev folder. By default False Returns ------- list Relevant data of sbml file(s) that will be displayed on the report """ # creating list where all necessary elements will be compiled rp_list = [] # loop and operations for each sbml.xml files found for name in sbml_files: if verbose: print("Parsing", name) pathway = rpPathway.from_rpSBML(infile=os.path.join(source_path, name)) rp_name = pathway.get_id() if verbose: print("Path_id found:", rp_name) dfG_prime_m = pathway.get_thermo_dGm_prime() fba_obj_fraction = pathway.get_fba_fraction() nb_reactions = pathway.get_nb_reactions() reactions = pathway.get_reactions() global_score = pathway.get_global_score() mean_rule_score = pathway.get_mean_rule_score() # adding necessary values to the list rp_list.append({ 'pathway_name': rp_name, 'dfG_prime_m': dfG_prime_m.get('value'), 'global_score': global_score, 'fba_obj_fraction': fba_obj_fraction.get('value'), 'mean_rule_score': mean_rule_score, 'nb_reactions': nb_reactions, 'reactions': get_reactions_data(reactions) }) # sorting list by pathway_name rp_list = sorted(rp_list, key=lambda k: k['pathway_name']) return rp_list
def _extract_res_from_file(filename): rp_pathway = rpPathway.from_rpSBML( infile=filename ) res = {} res['pathway'] = rp_pathway.get_fba() res['reactions'] = {} for rid in rp_pathway.get_reactions(): res['reactions'][rid] = rp_pathway.get_reaction(rid).get_fba() return res
def entry_point(): parser = build_args_parser( prog='rpscore', description= 'Calculate global score by combining all scores (rules, FBA, Thermo)', m_add_args=add_arguments) args = parser.parse_args() from rptools.__main__ import init logger = init(parser, args) # if len(args.pathways) == 1: # if args.outfile is None or args.outfile == '': # logger.error('Option --outfile has to be set in case of single input pathway, exiting...') # exit(1) # pathways = [] # for pathway in args.pathways: # pathways.append( # rpPathway.from_rpSBML( # infile=pathway, # logger=logger # ) # ) pathway = rpPathway.from_rpSBML(infile=args.infile, logger=logger) score = predict_score( pathway=pathway, # data_train_file=args.data_train_file, # models_path=models_path, no_of_rxns_thres=args.no_of_rxns_thres) # if len(pathways) > 1: # if not os_path.exists(args.outdir): # makedirs(args.outdir) # for i in range(len(pathways)): # # Write results into the pathway # pathways[i].set_global_score( # scores[i] # ) # # Write pathway into file # pathways[i].to_rpSBML().write_to_file( # os_path.join( # args.outdir, # os_path.basename(args.pathways[i]) # ) # ) # else: # Write results into the pathway pathway.set_global_score(score) # Write pathway into file pathway.to_rpSBML().write_to_file(args.outfile)
def test_rpSBML_file(self): with NamedTemporaryFile(delete=False) as tempf: self.pathway.to_rpSBML().write_to_file(tempf.name) tempf.close() self.assertEqual( self.pathway, rpPathway.from_rpSBML( infile=tempf.name ) ) tempf.close() remove(tempf.name)
def _cli(): parser = build_args_parser( prog = 'rpthermo', description = 'Calculate score by processing thermodynamics', m_add_args = add_arguments ) args = parser.parse_args() from rptools.__main__ import init logger = init(parser, args) msg = f'Parameters\n----------\n' for param in ['pH', 'ionic_strength', 'pMg']: value = getattr(args, param) msg += f'- {param}: {value}\n' logger.info( '{color}{msg}{rst}'.format( color=fg('light_cyan'), msg=msg, rst=attr('reset') ) ) ## READ PATHWAY FROM FILE pathway = rpPathway.from_rpSBML( infile=args.infile, logger=logger ) # RUN THERMO results = runThermo( pathway=pathway, ph=args.pH, ionic_strength=args.ionic_strength, pMg=args.pMg, logger=logger ) # Print results print_results(pathway, results, logger) # Write pathway into file pathway.to_rpSBML().write_to_file(args.outfile) logger.info( "{color}{typo}Written into file: {file}{rst}".format( color=fg('white'), typo=attr('bold'), rst=attr('reset'), file=args.outfile ) )
def parse_all_pathways(input_files: list) -> tuple: network = {'elements': {'nodes': [], 'edges': []}} all_nodes = {} all_edges = {} pathways_info = {} from pprint import pprint for sbml_path in input_files: rpsbml = rpSBML(str(sbml_path)) pathway = rpPathway.from_rpSBML(rpsbml=rpsbml) nodes, edges, pathway = parse_one_pathway(pathway) # Store pathway pathways_info[pathway['path_id']] = pathway # Store nodes for node_id, node_dict in nodes.items(): if node_id in all_nodes: all_nodes[node_id] = _merge_nodes(node_dict, all_nodes[node_id]) else: all_nodes[node_id] = node_dict # Store edges for edge_id, edge_dict in edges.items(): if edge_id in all_edges: all_edges[edge_id] = _merge_edges(edge_dict, all_edges[edge_id]) else: all_edges[edge_id] = edge_dict # Finally store nodes for node in all_nodes.values(): network['elements']['nodes'].append({'data': node}) for edge in all_edges.values(): network['elements']['edges'].append({'data': edge}) # Finally, sort node and edge IDs everywhere for node in network['elements']['nodes']: node['data']['path_ids'] = sorted(node['data']['path_ids']) for node in network['elements']['edges']: node['data']['path_ids'] = sorted(node['data']['path_ids']) # Finally, sort pathway_info by pathway ID pathways_info_ordered = {} path_ids_ordered = sorted(pathways_info.keys()) for path_id in path_ids_ordered: pathways_info_ordered[path_id] = pathways_info[path_id] return network, pathways_info_ordered
def get_selenzyme_annotation(self, rpsbml_path: str) -> Dict: rpsbml = rpSBML(str(rpsbml_path)) pathway = rpPathway.from_rpSBML(rpsbml=rpsbml) for idx_rxn, rxn_id in enumerate(pathway.get_reactions_ids()): # Stop if too many reactions if idx_rxn > self._max_rxn_per_construct: raise ValueError( f'Number of reactions exceed the defined allowed number of ', f'enzymes : {self._max_rxn_per_construct}. Execution cancelled.' ) # rxn = pathway.get_reaction(rxn_id) enzymes = rxn.get_selenzy() # Stop if no enzyme available if len(enzymes) == 0: raise ValueError( f'Missing UniProt IDs from selenzyme annotation for ' f'reaction {rxn_id}. Execution cancelled.') # Collect enzyme ordered by score, the first is the best for idx_enz, enz in enumerate(sorted( enzymes.items(), key=lambda x: getitem(x[1], 'score'), reverse=True), start=1): # Skip worst enzyme if too many if idx_enz > self._max_enz_per_rxn: logging.warning( f'Max number of enzyme per reaction reached ({self._max_enz_per_rxn}) ' f'for reaction {rxn_id}. Only the best one(s) are kept.' ) break uniprot_id, _ = enz if uniprot_id in self._parts: self._parts[uniprot_id].cds_steps.append(rxn_id) else: self._parts[uniprot_id] = Part(id=uniprot_id, basic_role='part', biological_role='cds', cds_steps=[rxn_id], seq='atgc')
def entry_point(): parser = build_args_parser(prog='rprank', description='Rank pathways', m_add_args=add_arguments) args = parser.parse_args() from rptools.__main__ import init logger = init(parser, args) # Build the list of pathways to rank pathways = [ rpPathway.from_rpSBML(infile=pathway_filename, logger=logger) for pathway_filename in args.pathways ] # Rank pathways sorted_pathways = rank(pathways) sorted_pathways_str = '\n'.join( args.delimiter.join(item) for item in sorted_pathways.items()) print(f'#Name{args.delimiter}Score') print(sorted_pathways_str)
def _test_file(self, infile: str, expected_result_file: str): # Build the list of pathways to rank pathway_filenames = glob(f'{infile}/*') pathways = [ rpPathway.from_rpSBML(infile=pathway_filename) for pathway_filename in pathway_filenames ] # Rank pathways sorted_pathways = rank(pathways) # Output computed_scores = {} for name, score in sorted_pathways.items(): if score in computed_scores.keys(): computed_scores[score].update([name]) else: computed_scores[score] = set([name]) score_list = list(computed_scores.keys()) name_list = list(computed_scores.values()) with open(expected_result_file, mode='r') as infile: reader = csv_reader(infile) next(reader) expected_scores = {} for row in reader: if row[1] in expected_scores.keys(): expected_scores[row[1]].update([row[0]]) else: expected_scores[row[1]] = set([row[0]]) self.assertListEqual(list(expected_scores.keys()), list(computed_scores.keys())) for score in expected_scores.keys(): self.assertSetEqual(expected_scores[score], computed_scores[score])
def test_runFBA(self): #TODO: taking accound extra args, like medium def _extract_var(dirname): files = glob( os_path.join( dirname, '*xml' ) ) basenames = [os_path.basename(x) for x in files] names = [x.split('.')[0] for x in basenames] sims = [x.split('.')[1] for x in basenames] assert len(files) == len(names) == len(sims) return (files, names, sims) def _extract_res_from_file(filename): rp_pathway = rpPathway.from_rpSBML( infile=filename ) res = {} res['pathway'] = rp_pathway.get_fba() res['reactions'] = {} for rid in rp_pathway.get_reactions(): res['reactions'][rid] = rp_pathway.get_reaction(rid).get_fba() return res def _format_dict(old, new={}): print('old', old, 'new', new) for k, v in old.items(): if isinstance(v, dict): new[k] = _format_dict(old.get(k, {}), v) else: if isinstance(v, str): new[k] = v else: new[k] = round(float(v), 2) return new files, names, sims = _extract_var(os_path.join(self.temp_d, 'lycopene_fba')) for ix in range(len(files)): pathway_cr = rpPathway.from_rpSBML( infile=os_path.join( self.temp_d, 'cr_fba', names[ix]+'.xml' ) ) res = runFBA( pathway=pathway_cr, gem_sbml_path=self.e_coli_model_path, compartment_id='c', objective_rxn_id='rxn_target', biomass_rxn_id='biomass', sim_type=sims[ix] ) res_previous = _format_dict( _extract_res_from_file( files[ix] ) ) res_run_fba = _format_dict( {x:y for x,y in res.items() if x in ['pathway', 'reactions']} ) self.assertDictEqual( res_previous, res_run_fba )
def test_rpSBML_rpsbml(self): self.assertEqual( self.pathway, rpPathway.from_rpSBML(rpsbml=self.pathway.to_rpSBML()) )