Beispiel #1
0
def parse_table(reference, key, args_dict=None):

    if 'source_id' in reference[key].columns.tolist():
        column_names = [
            'analyte_id', 'analyte_name', 'reaction_id', 'reaction_name',
            'source_id'
        ]
    else:
        column_names = [
            'analyte_id', 'analyte_name', 'reaction_id', 'reaction_name'
        ]

    reference_parsed = reference[key][column_names].copy()
    reference_parsed['analyte'] = reference_parsed['analyte_name'].str.split(
        ' \[').str[0]
    reference_parsed['compartment'] = reference_parsed[
        'analyte_name'].str.split(' \[').str[1].str.split('\]').str[0]

    reference_dictionary = {}

    counter = 0
    total = len(reference_parsed.index.tolist())

    for index, row in reference_parsed.iterrows():

        reference_dictionary[row[0]] = {}
        reference_dictionary[row[0]]['analyte_id'] = row[0]
        reference_dictionary[row[0]]['reaction_id'] = row[2]
        reference_dictionary[row[0]]['reaction_name'] = row[3]

        if 'source_id' in reference[key].columns.tolist():
            reference_dictionary[row[0]]['source_id'] = row[4]
            reference_dictionary[row[0]]['analyte'] = row[5]
            reference_dictionary[row[0]]['compartment'] = row[6]

        else:
            reference_dictionary[row[0]]['analyte'] = row[4]
            reference_dictionary[row[0]]['compartment'] = row[5]

        if int(counter % (total / 15)) == 0 and args_dict != None:
            progress_feed(args_dict, "reactions")

        counter += 1

    return reference_dictionary
Beispiel #2
0
def __main__(args_dict):
    """Analyze data on network model
    """

    # Get network curation info
    network = read_network(network_url=args_dict['network'])
    progress_feed(args_dict, "model", 2)

    if args_dict['organism_curation'] != 'None':
        args_dict['species_id'] = network['species_id']

    # Read in data (if any)
    if str(args_dict['transcriptomics']).lower() != 'none' \
    or str(args_dict['proteomics']).lower() != 'none' \
    or str(args_dict['metabolomics']).lower() != 'none':

        data, stats = prepare_data(
            network=network,
            transcriptomics_url=args_dict['transcriptomics'],
            proteomics_url=args_dict['proteomics'],
            metabolomics_url=args_dict['metabolomics'])
        progress_feed(args_dict, "model", 3)
        flag_data = False

    else:
        data = pd.DataFrame()
        data['NoSample'] = [0, 0, 0]
        data.index = ['dummy_index1', 'dummy_index2', 'dummy_index3']

        stats = pd.DataFrame()
        stats['NoSample'] = [0, 0, 0]
        stats.index = ['dummy_index1', 'dummy_index2', 'dummy_index3']

        progress_feed(args_dict, "model", 3)
        flag_data = True

    # Generate graph
    graph_name = model(args_dict=args_dict,
                       network=network,
                       data=data,
                       stats=stats,
                       species_id=args_dict['species_id'],
                       output_file=args_dict['output_file'],
                       flag_data=flag_data)

    # Search network for motifs
    motif_search(model_file=graph_name)
    progress_feed(args_dict, "model", 10)
def __main__(species_id, output_dir, args_dict):
    """Fetch all reactions for a given organism
    """

    #############
    # Make pathway id and reaction ids non R-HSA-etc
    #############

    # Get pathways files
    pathways_dir = unpack_pathways(output_dir=output_dir)
    progress_feed(args_dict, "curate", 10)

    pathways_list = get_pathways(species_id=species_id,
                                 pathways_dir=pathways_dir)
    progress_feed(args_dict, "curate", 7)

    # Get list of reaction files to use for populating database
    pathway_database, reaction_database, species_database, \
    name_database, compartment_database, compartment_dictionary, \
    components_database = process_components(
        output_dir=output_dir,
        pathways_dir=pathways_dir,
        pathways_list=pathways_list,
        species_id=species_id,
        args_dict=args_dict)
    progress_feed(args_dict, "curate", 5)

    if 'sbml' in pathways_dir:
        shutil.rmtree(pathways_dir)
    else:
        print(
            'Could not find SMBL file directory, skipping removal of this directory...'
        )

    return (pathway_database, reaction_database, species_database,
            name_database, compartment_dictionary, components_database)
Beispiel #4
0
def __main__(args_dict):
    """Curate reactome database
    """

    # Load reactions
    print(
        'Curating Reactome network database. Please be patient, this will take several minutes...'
    )
    print('Loading reactions...')
    progress_feed(args_dict, "curate", 3)
    pathway_database, reaction_database, species_database, \
    name_database, compartment_dictionary, \
    components_database = load_reactions(
        species_id=args_dict['species_id'],
        output_dir=args_dict['output'],
        args_dict=args_dict)

    print('Loading complex database...')
    complexes_reference = load_complexes(output_dir=args_dict['output'])
    progress_feed(args_dict, "curate", 3)

    print('Parsing complex database...')
    complexes_reference['complex_dictionary'] = parse_complexes(
        complexes_reference)
    progress_feed(args_dict, "curate", 2)

    print('Finalizing complex database...')
    complexes_reference['complex_dictionary'] = reference_complex_species(
        reference=complexes_reference['complex_dictionary'],
        name_database=name_database)
    progress_feed(args_dict, "curate", 2)

    print('Parsing Ensembl database...')
    ensembl_reference = parse_ensembl_synonyms(
        output_dir=args_dict['output'], species_id=args_dict['species_id'])
    progress_feed(args_dict, "curate", 3)

    print('Adding gene IDs to name database...')
    name_database = add_genes(name_database=name_database,
                              ensembl_reference=ensembl_reference)
    progress_feed(args_dict, "curate", 2)

    print('Parsing UniProt database...')
    uniprot_reference = parse_uniprot_synonyms(
        output_dir=args_dict['output'], species_id=args_dict['species_id'])
    progress_feed(args_dict, "curate", 3)

    print('Parsing ChEBI database...')
    chebi_reference, uniprot_metabolites = parse_chebi_synonyms(
        output_dir=args_dict['output'])
    progress_feed(args_dict, "curate", 5)

    metaboverse_db = {
        'species_id': args_dict['species_id'],
        'pathway_database': pathway_database,
        'reaction_database': reaction_database,
        'species_database': species_database,
        'name_database': name_database,
        'ensembl_synonyms': ensembl_reference,
        'uniprot_synonyms': uniprot_reference,
        'chebi_synonyms': chebi_reference,
        'uniprot_metabolites': uniprot_metabolites,
        'complex_dictionary': complexes_reference['complex_dictionary'],
        'compartment_dictionary': compartment_dictionary,
        'components_database': components_database
    }

    # Write database to file
    print('Writing metaboverse database to file...')
    args_dict['curation'] = args_dict['species_id'] + '_metaboverse_db.pickle'
    write_database(output=args_dict['output'],
                   file=args_dict['curation'],
                   database=metaboverse_db)
    progress_feed(args_dict, "curate", 5)

    print('Metaboverse database curation complete.')

    return args_dict
Beispiel #5
0
def __main__(args_dict,
             network,
             data,
             stats,
             species_id,
             output_file,
             flag_data=False):
    """Generate graph object for visualization
    """

    print('Preparing metadata...')
    # Generate output file name
    graph_name = name_graph(output_file=output_file, species_id=species_id)

    # Prepare uniprot to ensembl name mapper
    reverse_genes = {v: k for k, v in network['ensembl_synonyms'].items()}
    protein_dictionary = uniprot_ensembl_reference(
        uniprot_reference=network['uniprot_synonyms'],
        ensembl_reference=reverse_genes)
    progress_feed(args_dict, "model", 1)

    reverse_metabolite_dictionary = make_metabolite_synonym_dictionary(
        network=network)

    # Generate graph
    # Name mapping
    print('Building network...')
    G, network['reaction_database'] = build_graph(
        network=network['reaction_database'],
        species_reference=network['species_database'],
        name_reference=network['name_database'],
        protein_reference=protein_dictionary,
        uniprot_reference=network['uniprot_synonyms'],
        complexes=network['complex_dictionary'],
        species_id=species_id,
        gene_reference=network['ensembl_synonyms'],
        compartment_reference=network['compartment_dictionary'],
        component_database=network['components_database'],
        reverse_metabolite_dictionary=reverse_metabolite_dictionary)
    progress_feed(args_dict, "model", 9)

    # For gene and protein components, add section to reaction database
    #for additional_components and list
    # Pull those in with everything else in JS

    # Overlay data and stats, calculate heatmap values for p-value
    # and expression value
    print('Mapping user data...')
    degree_dictionary = compile_node_degrees(graph=G)

    name_reference = {}
    for k, v in network['ensembl_synonyms'].items():
        name_reference[v] = k
        name_reference[k] = k
    for k, v in network['uniprot_synonyms'].items():
        name_reference[v] = k
        name_reference[k] = k
    for k, v in network['chebi_synonyms'].items():
        name_reference[k] = v
        name_reference[v] = v

    G, max_value, max_stat = map_attributes(
        graph=G,
        data=data,
        stats=stats,
        name_reference=name_reference,
        degree_dictionary=degree_dictionary)
    progress_feed(args_dict, "graph", 5)

    if flag_data == True:
        max_value = 5
        max_stat = 1

    print('Broadcasting values where available...')
    categories = data.columns.tolist()
    G = broadcast_values(graph=G,
                         categories=categories,
                         max_value=max_value,
                         max_stat=max_stat)
    progress_feed(args_dict, "graph", 10)

    print('Compiling collapsed reaction reference...')
    # Collapse reactions
    G, updated_reactions, changed_reactions = collapse_nodes(
        graph=G,
        reaction_dictionary=network['reaction_database'],
        samples=len(categories))
    updated_pathway_dictionary = generate_updated_dictionary(
        original_database=network['pathway_database'],
        update_dictionary=changed_reactions)
    progress_feed(args_dict, "graph", 8)

    # Generate list of super pathways (those with more than 200 reactions)
    print('Compiling super pathways...')

    scale_factor = int(len(network['reaction_database'].keys()) * 0.0157)
    super_pathways = compile_pathway_degree(
        pathways=network['pathway_database'], scale_factor=scale_factor)

    motif_reaction_dictionary = make_motif_reaction_dictionary(
        network=network,
        updated_reactions=updated_reactions,
        updated_pathway_dictionary=updated_pathway_dictionary)

    mod_collapsed_pathways = {}
    for k, v in updated_pathway_dictionary.items():
        mod_collapsed_pathways[v['id']] = v

    # Export graph, pathway membership, pathway degree, other refs
    print('Exporting graph...')
    output_graph(graph=G,
                 output_name=graph_name,
                 pathway_dictionary=network['pathway_database'],
                 collapsed_pathway_dictionary=updated_pathway_dictionary,
                 super_pathways=super_pathways,
                 reaction_dictionary=network['reaction_database'],
                 collapsed_reaction_dictionary=updated_reactions,
                 motif_reaction_dictionary=motif_reaction_dictionary,
                 mod_collapsed_pathways=mod_collapsed_pathways,
                 max_value=max_value,
                 max_stat=max_stat,
                 categories=categories)
    print('Graphing complete.')
    progress_feed(args_dict, "graph", 2)

    return graph_name
Beispiel #6
0
def main(args=None):

    check_dependencies()

    # Read in arguments
    args, args_dict = parse_arguments(args, __version__)

    if args_dict['cmd'] == 'preprocess':

        print('Preprocessing ' + args_dict['type'] + ' dataset...')
        preprocess(args_dict)
        sys.stdout.flush()
        sys.exit(1)

    # Run metaboverse-curate
    elif args_dict['cmd'] == 'curate':

        print(args_dict)
        if str(args_dict['organism_curation']) != 'None':
            progress_feed(args_dict=args_dict, process="curate", amount=50)
            # Update args_dict with path for network model
            with open(args_dict['organism_curation'], 'rb') as network_file:
                network = pickle.load(network_file)
                args_dict['species_id'] = network['species_id']
                args_dict['output_file'] = args_dict['output'] \
                    + args_dict['species_id'] \
                    + '_global_reactions.json'
                args_dict['network'] = args_dict['organism_curation']

                # add variables back to session data json file
                session_file = args_dict['session_data']
                update_session(session_file=session_file,
                               key='species_id',
                               value=args_dict['species_id'])
                update_session(session_file=session_file,
                               key='output_file',
                               value=args_dict['output_file'])
                update_session(session_file=session_file,
                               key='database_url',
                               value=args_dict['output_file'])


            print('Skipping organism network modeling as one was provided by' \
            + ' the user...')
            sys.stdout.flush()
        else:
            print('Curating network model...')
            args_dict['network'] = args_dict['model_file']
            args_dict = curate(args_dict)
            sys.stdout.flush()

        print('Curating data onto the network model...')
        analyze(args_dict)
        sys.stdout.flush()
        sys.exit(1)

    # Print some error messaging
    else:
        raise Exception('Invalid sub-module selected')

    # Exit
    # Check log file for errors and exceptions
    #get_dependencies(args_dict)
    sys.stdout.flush()