def main(): args = docopt.docopt(__doc__) output = args["--output"] verbose = args["-v"] if args["--padmetRef"]: padmetRef = PadmetRef(args["--padmetRef"]) else: padmetRef = None padmet_path = args["--padmet"] compare_padmet.compare_padmet(padmet_path, output, padmetRef, verbose)
def analysis_on_group(group_name, groups, config_data, verbose): """Create reaction dendrogram and extract specific reactions using metabolic networks. Args: group_name (str): Name of the group from group_template.tsv. groups (list): All the species inside the group. config_data (dict): Dictionary with all configuration paths. verbose (bool): Verbose. """ database_path = config_data['database_path'] padmet_from_networks_path = config_data['padmet_from_networks_path'] analysis_path = config_data['analysis_path'] all_padmet_path = [ os.path.join(padmet_from_networks_path, name + ".padmet") for name in groups ] group_analysis_path = analysis_path + '/' + group_name if not os.path.isdir(group_analysis_path): if len(groups) == 1: sys.exit('A group must contain more than one member.') for padmet_path in all_padmet_path: if not os.path.exists(padmet_path): org_name = os.path.splitext(os.path.basename(padmet_path))[0] sys.exit( "Padmet file of organism %s from group %s not found in %s" % (org_name, group_name, padmet_from_networks_path)) # Compare the padmet to create the reactions.csv file needed to create the reaction dendrogram. compare_padmet.compare_padmet(padmet_path=",".join(all_padmet_path), output=group_analysis_path, padmetRef=database_path, verbose=verbose) dendrogram_reactions_distance.reaction_figure_creation( reaction_file=group_analysis_path + '/reactions.csv', output=group_analysis_path + '/dendrogram_output', padmetRef=database_path, verbose=verbose) else: print( group_analysis_path + ' already exists. Delete it if you want to relaunch the analysis.')
def run_compare(run_id, nb_cpu_to_use, verbose): """Compare the gorup specified by the user. Args: run_id (str): ID of the run nb_cpu_to_use (int): number of CPU for multiprocessing verbose (boolean): verbose """ if verbose: print('--- Running compare step ---') compare_start_time = time.time() config_data = parse_config_file(run_id) analysis_path = config_data['analysis_path'] analysis_group_file_path = config_data['analysis_group_file_path'] compare_output_path = analysis_path + '/compare_group' database_path = config_data['database_path'] padmet_from_networks_path = config_data['padmet_from_networks_path'] # Create a dictionary containing the group name and the species inside the group. group_data = {} padmets = [] with open(analysis_group_file_path, 'r') as group_file: group_reader = csv.reader(group_file, delimiter='\t') cluster_reactions = {} for row in group_reader: group_name = row[0] groups = [species for species in row[1:] if species != ''] group_data[group_name] = groups if group_name != 'all': padmets.extend([ padmet_from_networks_path + '/' + species + '.padmet' for species in groups ]) padmets = list(set(padmets)) if not os.path.isdir(compare_output_path): os.mkdir(compare_output_path) padmetref = PadmetRef(database_path) # Create the reactions.tsv file needed to create dendrogram. padmet_path = ','.join(padmets) compare_padmet.compare_padmet(padmet_path=padmet_path, output=compare_output_path, padmetRef=padmetref, verbose=verbose) # Read the reactions.tsv file and remove the column unused. reactions_file = compare_output_path + '/' + 'reactions.tsv' reactions_dataframe = pa.read_csv(reactions_file, sep='\t') columns = [ column for column in reactions_dataframe.columns if '(sep=;)' not in column and '_formula' not in column ] reactions_dataframe = reactions_dataframe[columns].copy() reactions_dataframe.set_index('reaction', inplace=True) # For each group, extract the reactions present in its species to create supervenn sets. supervenn_sets = [] supervenn_labels = [] for group_name in group_data: if group_name != 'all': groups = group_data[group_name] reactions_temp = [] for species in groups: species_reactions_dataframe = reactions_dataframe[ reactions_dataframe[species] == 1] reactions_temp.extend( species_reactions_dataframe.index.tolist()) supervenn_sets.append(set(reactions_temp)) supervenn_labels.append(group_name) cluster_reactions[group_name] = set(reactions_temp) supervenn(supervenn_sets, supervenn_labels, chunks_ordering='occurence', sets_ordering='minimize gaps') plt.savefig(compare_output_path + '/compare_group.png', bbox_inches='tight') plt.clf() dendrogram_reactions_distance.reaction_figure_creation( reactions_file, os.path.join(compare_output_path, "dendrogram_output"), padmetRef_file=database_path, verbose=verbose) compare_end_time = (time.time() - compare_start_time) integer_part, decimal_part = str(compare_end_time).split('.') compare_time = ".".join([integer_part, decimal_part[:3]]) if verbose: print("--- compare step done in: %ss ---" % compare_time)
def test_compare_padmet(): fabo_1_padmetSpec = from_pgdb_to_padmet('test_data/pgdb', extract_gene=True) fabo_1_padmetSpec.delNode('ACYLCOASYN-RXN') fabo_1_padmetSpec.generateFile('fabo_1.padmet') fabo_2_padmetSpec = from_pgdb_to_padmet('test_data/pgdb', extract_gene=True) fabo_2_padmetSpec.delNode('ACYLCOADEHYDROG-RXN') fabo_2_padmetSpec.generateFile('fabo_2.padmet') compare_padmet('fabo_1.padmet,fabo_2.padmet', 'output', padmetRef=None, verbose=False) genes_fabo_1 = [] genes_fabo_2 = [] with open('output/genes.tsv', 'r') as genes_file: csvreader = csv.reader(genes_file, delimiter='\t') for row in csvreader: if row[1] == '1': genes_fabo_1.append(row[0]) if row[2] == '1': genes_fabo_2.append(row[0]) assert set(FABO_GENES).issubset(set(genes_fabo_1)) assert set(FABO_GENES).issubset(set(genes_fabo_2)) reactions_fabo_1 = [] reactions_fabo_2 = [] with open('output/reactions.tsv', 'r') as reactions_file: csvreader = csv.reader(reactions_file, delimiter='\t') for row in csvreader: if row[1] == '1': reactions_fabo_1.append(row[0]) if row[2] == '1': reactions_fabo_2.append(row[0]) expected_fabo_1_rxns = [ rxn for rxn in FABO_RXNS if rxn != 'ACYLCOASYN-RXN' ] expected_fabo_2_rxns = [ rxn for rxn in FABO_RXNS if rxn != 'ACYLCOADEHYDROG-RXN' ] assert set(expected_fabo_1_rxns).issubset(set(reactions_fabo_1)) assert set(expected_fabo_2_rxns).issubset(set(reactions_fabo_2)) pathway_fabo_1 = [] pathway_fabo_2 = [] with open('output/pathways.tsv', 'r') as pathways_file: csvreader = csv.reader(pathways_file, delimiter='\t') for row in csvreader: if row[0] != 'pathway': pathway_fabo_1.append(row[0]) pathway_fabo_2.append(row[0]) if row[3] != 'fabo_1_rxn_assoc (sep=;)': pwy_reactions_fabo_1 = row[3].split(';') if row[4] != 'fabo_2_rxn_assoc (sep=;)': pwy_reactions_fabo_2 = row[4].split(';') assert pathway_fabo_1 == ['FAO-PWY'] assert pathway_fabo_2 == ['FAO-PWY'] assert set(expected_fabo_1_rxns).issubset(set(pwy_reactions_fabo_1)) assert set(expected_fabo_2_rxns).issubset(set(pwy_reactions_fabo_2)) metabolites_fabo_1 = [] metabolites_fabo_2 = [] with open('output/metabolites.tsv', 'r') as metabolites_file: csvreader = csv.reader(metabolites_file, delimiter='\t') for row in csvreader: if row[1] != 'fabo_1_rxn_consume' or row[1] != '': if row[0] != 'metabolite': metabolites_fabo_1.append(row[0]) if row[3] != 'fabo_1_rxn_produce' or row[3] != '': if row[0] != 'metabolite': metabolites_fabo_1.append(row[0]) if row[2] != 'fabo_2_rxn_consume' or row[2] != '': if row[0] != 'metabolite': metabolites_fabo_2.append(row[0]) if row[2] != 'fabo_2_rxn_produce' or row[2] != '': if row[0] != 'metabolite': metabolites_fabo_2.append(row[0]) metabolites_fabo_1 = list(set(metabolites_fabo_1)) metabolites_fabo_2 = list(set(metabolites_fabo_2)) assert set(FABO_CPDS).issubset(set(metabolites_fabo_1)) assert set(FABO_CPDS).issubset(set(metabolites_fabo_2)) os.remove('fabo_1.padmet') os.remove('fabo_2.padmet') shutil.rmtree('output')