Python Metabolite Examples

Programming Language: Python

Namespace/Package Name: DB_schema

Class/Type: Metabolite

Examples at hotexamples.com: 6

Python Metabolite - 6 examples found. These are the top rated real world Python examples of DB_schema.Metabolite extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Metabolite(6)

chebi(1)

pubchem(1)

Frequently Used Methods

Metabolite (6)

chebi (1)

pubchem (1)

Example #1

Show file

def parse_ecocyc(strain, session):
    for path in ecocyc_paths:
        interaction_file_name = "Data/Ecoli/ecocyc_files/interactions_sif/" + path + "_interactions.txt"
        #if there was a problem with obtaining the sif files for a pathway, they may not exist
        if not exists(interaction_file_name): continue
        with open(interaction_file_name) as file:
            reader = csv.DictReader(file)
            for interaction_row in reader:
                interactors_A, interactors_B = [], []
                new_metabolite_A, new_metabolite_B = None, None

                id_A = interaction_row['PARTICIPANT_A']
                id_B = interaction_row['PARTICIPANT_B']

                #if id_A isn't in ecocyc_compounds, it's a uniprot id; search for ecoli orthologs matching id_A
                if id_A not in ecocyc_compounds:
                    for ortholog in session.query(OrthologEcoli).filter_by(
                            ortholog_uniprot=id_A,
                            strain_protein=strain).all():
                        if ortholog is not None:
                            # add both the pseudomonas protein and the ortholog id (will be needed later to
                            # create interaction reference) to interactors_A
                            interactors_A.append(
                                [ortholog.protein, ortholog.ortholog_id])
                # if id_A is in ecocyc_compounds, it means it's a metabolite id
                else:
                    A_ecocyc = ecocyc_compounds[id_A]['ecocyc']
                    #check if the metabolite already exists in database (only need to search ecocyc id since
                    # update_metabolites_ecocyc was called)
                    metabolite = session.query(Metabolite).filter_by(
                        ecocyc=A_ecocyc).first()
                    if metabolite is not None:
                        # if metabolite exists, add both the metabolite and it's name (will be needed later
                        # to create interaction reference) to interactors_A
                        interactors_A.append([metabolite, metabolite.name])
                    else:
                        # if metabolite doesn't exist yet, store it's id to create it later (don't create it now
                        # since if interactor_B is invalid, there is no need for new metabolite to be created)
                        new_metabolite_A = A_ecocyc

                # same as for id_A above, now with second interactor
                if id_B not in ecocyc_compounds:
                    for ortholog in session.query(OrthologEcoli).filter_by(
                            ortholog_uniprot=id_B,
                            strain_protein=strain).all():
                        if ortholog is not None:
                            interactors_B.append(
                                [ortholog.protein, ortholog.ortholog_id])
                else:
                    B_ecocyc = ecocyc_compounds[id_B]['ecocyc']
                    metabolite = session.query(Metabolite).filter_by(
                        ecocyc=B_ecocyc).first()
                    if metabolite is not None:
                        interactors_B.append([metabolite, metabolite.name])
                    else:
                        new_metabolite_B = B_ecocyc

                # store new interactor pairs from which to create interactions here
                interactor_pairs = []

                # case where no unknown metabolites were found
                if (new_metabolite_A is None) and (new_metabolite_B is None):
                    # iterate through known protein interactors, add them together to interactor_pairs
                    for interactor_A in interactors_A:
                        for interactor_B in interactors_B:
                            # only add the interactor pair if at least one of them is not a metabolite
                            if (interactor_A[0].type !=
                                    'm') | (interactor_B[0].type != 'm'):
                                interactor_pairs.append(
                                    [interactor_A, interactor_B])
                # case where there is one new metabolite (new_metabolite_A)
                elif new_metabolite_A is not None:
                    for interactor_B in interactors_B:
                        # don't add a new interactor pair if both are metabolites
                        if interactor_B[0].type != 'm':
                            # check if new metabolite exists in database (eg. if more than one ortholog was found for
                            # interactors_B, you don't want to create the same new metabolite twice)
                            metabolite = session.query(Metabolite).filter_by(
                                ecocyc=new_metabolite_A).first()
                            # create a new metabolite if it doesn't exist
                            if metabolite is None:
                                metabolite = Metabolite(
                                    id=new_metabolite_A,
                                    name=id_A,
                                    ecocyc=new_metabolite_A,
                                    pubchem=ecocyc_compounds[id_A]['pubchem'],
                                    kegg=ecocyc_compounds[id_A]['kegg'],
                                    cas=ecocyc_compounds[id_A]['cas'],
                                    chebi=ecocyc_compounds[id_A]['chebi'])
                                session.add(metabolite)
                            # add the interactor pair (for the new metabolite, make sure to add it's name (for
                            # reference later)
                            interactor_pairs.append(
                                [interactor_B, [metabolite, id_A]])
                # same as previous case, but if new metabolite is new_metabolite_B
                elif new_metabolite_B is not None:
                    for interactor_A in interactors_A:
                        if interactor_A[0].type != 'm':
                            metabolite = session.query(Metabolite).filter_by(
                                ecocyc=new_metabolite_B).first()
                            if metabolite is None:
                                metabolite = Metabolite(
                                    id=new_metabolite_B,
                                    name=id_B,
                                    ecocyc=new_metabolite_B,
                                    pubchem=ecocyc_compounds[id_B]['pubchem'],
                                    kegg=ecocyc_compounds[id_B]['kegg'],
                                    cas=ecocyc_compounds[id_B]['cas'],
                                    chebi=ecocyc_compounds[id_B]['chebi'])
                                session.add(metabolite)
                            interactor_pairs.append(
                                [interactor_A, [metabolite, id_B]])

                # iterate through all interactor pairs and create new interactions
                # note interactor_pairs will be empty if:
                #   1) both interactors were new metabolites
                #   2) one or both ecoli interactors did not have orthologs in Pseudomonas
                for interactor_pair in interactor_pairs:
                    homogenous = (
                        interactor_pair[0][0] == interactor_pair[1][0])
                    interaction = session.query(Interaction).filter(
                        Interaction.interactors.contains(
                            interactor_pair[0][0]),
                        Interaction.interactors.contains(
                            interactor_pair[1][0]),
                        Interaction.homogenous == homogenous).first()

                    source = session.query(InteractionSource).filter_by(
                        data_source='EcoCyc').first()

                    # if interaction doesn't exist, add it, and EcoCyc as a source
                    if interaction is None:
                        # if this interaction is created for first time, mark it as ortholog derived from Ecoli
                        interaction = Interaction(
                            type=(interactor_pair[0][0].type + '-' +
                                  interactor_pair[1][0].type),
                            strain=strain,
                            homogenous=homogenous,
                            interactors=[
                                interactor_pair[0][0], interactor_pair[1][0]
                            ],
                            ortholog_derived='Ecoli')
                        interaction.sources.append(source)
                        session.add(interaction), session.commit()
                    # add EcoCyc as source for interaction if it isn't already
                    elif source not in interaction.sources:
                        interaction.sources.append(source)

                    # in case the interaction already existed, make sure interactor_a and interactor_b variables for
                    # new interaction reference match up with the first and second interactors of the existing
                    # interaction (so it's easy to see which Pseudomonas interactor matches up with which Ecoli
                    # ortholog)
                    interactor_a, interactor_b = None, None
                    if interaction.interactors[0] == interactor_pair[0][0]:
                        interactor_a = interactor_pair[0][1]
                        interactor_b = interactor_pair[1][1]
                    else:
                        interactor_b = interactor_pair[0][1]
                        interactor_a = interactor_pair[1][1]

                    comment = interactor_pair[0][1] + interaction_row[
                        "INTERACTION_TYPE"] + interactor_pair[1][1]

                    # iterate through all the pmids listed as reference for given interaction
                    for pmid in interaction_row["INTERACTION_PUBMED_ID"].split(
                            ';'):
                        # check if interaction reference already exists in db
                        reference = session.query(
                            InteractionReference).filter_by(
                                pmid=pmid,
                                source_db='ecocyc',
                                comment=comment,
                                interactor_a=interactor_a,
                                interactor_b=interactor_b).first()
                        # if reference doesn't exist, create it, add the interaction to its references, and the
                        # EcoCyc source to its sources
                        if reference is None:
                            reference = InteractionReference(
                                pmid=pmid,
                                source_db='ecocyc',
                                comment=comment,
                                interactor_a=interactor_a,
                                interactor_b=interactor_b)
                            interaction.references.append(reference)
                            reference.sources.append(source)
                        # if reference does exist, add interaction to its interactions and source to its sources
                        # (if it doesn't have them already)
                        else:
                            if interaction not in reference.interactions:
                                reference.interactions.append(interaction)
                            if source not in reference.sources:
                                reference.sources.append(source)
    session.commit()
    print('ecocyc', session.query(Interaction).count())

Example #2

Show file

File: IMEx.py Project: solodova/PaintDB

def parse_ecoli_imex(session):
    with open('Data/Ecoli/PSICQUIC/IMEx.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            if (row['#ID(s) interactor A'] == '-') | (row['ID(s) interactor B'] == '-'): continue
            interactors = []

            orthologs_B = []
            id_B = row['ID(s) interactor B'].split(':')
            if id_B[0] == 'uniprotkb':
                orthologs_B = session.query(OrthologEcoli).filter(OrthologEcoli.ortholog_uniprot == id_B[1]).all()
            elif id_B[0] == 'refseq':
                orthologs_B = session.query(OrthologEcoli).filter(OrthologEcoli.ortholog_refseq == id_B[1]).all()

            if len(orthologs_B) == 0: continue

            orthologs_A = []
            metabolite = None
            id_A = row['#ID(s) interactor A'].split(':')
            if id_A[0] == 'uniprotkb':
                orthologs_A = session.query(OrthologEcoli).filter(OrthologEcoli.ortholog_uniprot == id_A[1]).all()
            elif id_A[0] == 'refseq':
                orthologs_A = session.query(OrthologEcoli).filter(OrthologEcoli.ortholog_refseq == id_A[1]).all()
            elif id_A[0] == 'chebi':
                metabolite = session.query(Metabolite).filter(Metabolite.chebi == id_A[1]).first()
                if metabolite is None:
                    metabolite = Metabolite(id = id_A[1], chebi = id_A[1])
                    session.add(metabolite), session.commit()

            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    if (ortholog_A is not None) and (ortholog_B is not None):
                        if ortholog_A.strain_protein == ortholog_B.strain_protein:
                            interactors.append([[ortholog_A.protein, ortholog_A.ortholog_id],
                                                [ortholog_B.protein, ortholog_B.ortholog_id]])

            if metabolite is not None:
                for ortholog_B in orthologs_B:
                    interactors.append([[metabolite, metabolite.id], [ortholog_B.protein, ortholog_B.ortholog_id]])

            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(Interaction.interactors.contains(interactor_pair[0][0]),
                                                                Interaction.interactors.contains(interactor_pair[1][0]),
                                                                Interaction.homogenous == homogenous).first()
                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    strain = None
                    if interactor_pair[0][0].type == 'p':
                        strain = interactor_pair[0][0].strain
                    else:
                        strain = interactor_pair[1][0].strain
                    interaction = Interaction(strain=strain,
                                              interactors=[interactor_pair[0][0], interactor_pair[1][0]],
                                              type=(interactor_pair[0][0].type + '-' + interactor_pair[1][0].type),
                                              ortholog_derived='fe')
                    session.add(interaction), session.commit()

                # interactor_a, interactor_b = None, None
                # if interaction.interactors[0] == interactor_pair[0][0]:
                #     interactor_a = interactor_pair[0][1]
                #     interactor_b = interactor_pair[1][1]
                # else:
                #     interactor_b = interactor_pair[0][1]
                #     interactor_a = interactor_pair[1][1]
                #
                # psimi_detection, psimi_db, psimi_type, author, date, confidences = None, None, None, None, None, [None]
                # if 'MI' in row['Interaction detection method(s)']:
                #     psimi_detection=row['Interaction detection method(s)'].split('MI:')[1][:4]
                # if 'MI' in row['Interaction type(s)']:
                #     psimi_type = row['Interaction type(s)'].split('MI:')[1][:4]
                # if 'MI' in row['Source database(s)']:
                #     psimi_db = row['Source database(s)'].split('MI:')[1][:4]
                # if row['Publication 1st author(s)'] != '-':
                #     author = row['Publication 1st author(s)'].split(' ')[0]
                #     date=row['Publication 1st author(s)'].split('(')[1][:-1]
                # if ('intact-miscore' in row['Confidence value(s)']) | ('author score' in row['Confidence value(s)']):
                #     del confidences[0]
                #     confidence_ids = row['Confidence value(s)'].split('|')
                #     for confidence in confidence_ids:
                #         if (confidence.split(':')[0] == 'intact-miscore') | \
                #             (confidence.split(':')[0] == 'author score'):
                #             confidences.append(confidence)
                # for confidence in confidences:
                #     reference = InteractionReference(interaction_id=interaction.id,
                #                                      psimi_detection=psimi_detection,
                #                                      detection_method=
                #                                      row['Interaction detection method(s)'].split('(')[1][:-1],
                #                                      author_ln=author,
                #                                      pub_date=date,
                #                                      pmid=
                #                                      row['Publication Identifier(s)'].split('pubmed:')[1].split('|')[0],
                #                                      psimi_type=psimi_type,
                #                                      interaction_type=row['Interaction type(s)'].split('(')[1][:-1],
                #                                      psimi_db=psimi_db,
                #                                      source_db=row['Source database(s)'].split('(')[1][:-1],
                #                                      confidence=confidence,
                #                                      interactor_a_id=interactor_a,
                #                                      interactor_b_id=interactor_b)
                #     session.add(reference)
                #
                # source = session.query(InteractionSource).filter(
                #     InteractionSource.interaction_id == interaction.id,
                #     InteractionSource.data_source == 'IMEx').first()
                #
                # if source is None:
                #     source = InteractionSource(interaction_id=interaction.id, data_source='IMEx')
                #     session.add(source)
        session.commit()
        print(session.query(Interaction).count())

Example #3

Show file

def parse_ecoli_uniprot(session):
    with open('Ecoli/PSICQUIC/UniProt.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            interactors = []
            orthologs_B = []
            id_B = row['ID(s) interactor B'].split(':')
            if id_B[0] == 'uniprotkb':
                orthologs_B = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == id_B[1]).all()

            if len(orthologs_B) == 0: continue

            orthologs_A = []
            metabolite = None
            id_A = row['#ID(s) interactor A'].split(':')
            if id_A[0] == 'uniprotkb':
                orthologs_A = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == id_A[1]).all()
            elif id_A[0] == 'chebi':
                metabolite = session.query(Metabolite).filter(
                    Metabolite.chebi == id_A[1]).first()
                if metabolite is None:
                    metabolite = Metabolite(id=id_A[1], chebi=id_A[1])
                    session.add(metabolite), session.commit()

            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    if (ortholog_A is not None) and (ortholog_B is not None):
                        if ortholog_A.strain_protein == ortholog_B.strain_protein:
                            interactors.append(
                                [[ortholog_A.protein, ortholog_A.ortholog_id],
                                 [ortholog_B.protein, ortholog_B.ortholog_id]])

            if metabolite is not None:
                for ortholog_B in orthologs_B:
                    interactors.append(
                        [[metabolite, metabolite.id],
                         [ortholog_B.protein, ortholog_B.ortholog_id]])

            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()
                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    interaction = Interaction(
                        strain=interactor_pair[0][0].strain,
                        interactors=[
                            interactor_pair[0][0], interactor_pair[1][0]
                        ],
                        type=(interactor_pair[0][0].type + '-' +
                              interactor_pair[1][0]),
                        ortholog_derived='fe')
                    if 'MI:' in row['Interaction detection method(s)']:
                        if is_experimental_psimi(
                                row['Interaction detection method(s)'].split(
                                    'MI:')[1][:4]):
                            interaction.is_experimental = 1
                    session.add(interaction), session.commit()

                interactor_a, interactor_b = None, None
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]

                reference = InteractionReference(
                    interaction_id=interaction.id,
                    psimi_detection=row['Interaction detection method(s)'].
                    split('MI:')[1][:4],
                    detection_method=row['Interaction detection method(s)'].
                    split('(')[1][:-1],
                    author_ln=row['Publication 1st author(s)'].split(' ')[0],
                    pub_date=row['Publication 1st author(s)'].split(
                        '(')[1][:-1],
                    pmid=row['Publication Identifier(s)'].split(
                        'pubmed:')[1].split('|')[0],
                    psimi_type=row['Interaction type(s)'].split('MI:')[1][:4],
                    interaction_type=row['Interaction type(s)'].split(
                        '(')[1][:-1],
                    psimi_db=row['Source database(s)'].split('MI:')[1][:4],
                    source_db=row['Source database(s)'].split('(')[1][:-1],
                    confidence_score=row['Confidence value(s)'],
                    interactor_a_id=interactor_a,
                    interactor_b_id=interactor_b)
                session.add(reference)

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source == 'UniProt').first()

                if source is None:
                    source = InteractionSource(interaction_id=interaction.id,
                                               data_source='UniProt')
                    session.add(source)

        session.commit()
        print(session.query(Interaction).count())

Example #4

Show file

File: BindingDB.py Project: solodova/PaintDB

def parse_ecoli_bindingdb(session):
    with open('Data/Ecoli/PSICQUIC/BindingDB.txt') as csvfile:
        reader = csv.DictReader(csvfile)

        # iterate through each interaction
        for row in reader:
            uniprot_protein = None

            # check if interactor B has uniprot ID
            if 'uniprotkb' in row['ID(s) interactor B']:
                uniprot_protein = row['ID(s) interactor B'].split(
                    'uniprotkb:')[1].split('|')[0]

            if uniprot_protein is None: continue

            orthologs = []
            for ecoli_ortholog in session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == uniprot_protein).all():
                if ecoli_ortholog is not None:
                    orthologs.append(
                        [ecoli_ortholog.protein, ecoli_ortholog.ortholog_id])

            if len(orthologs) == 0: continue

            ids_metabolite = row['#ID(s) interactor A'].split('|')
            chebi_metabolite, pubchem_metabolite = None, None

            # check if interactor A has ChEBI id
            for id in ids_metabolite:
                if id.split(':')[0] == 'chebi':
                    chebi_metabolite = id.split(':')[1][1:-1]

            metabolite = None

            # if interactor A has ChEBI id, query for matching metabolite
            if chebi_metabolite is not None:
                metabolite = session.query(Metabolite).filter(
                    Metabolite.chebi == chebi_metabolite).first()

            # if unable to identify metabolite based on ChEBI id, try using pubchem id
            if metabolite is None:
                alt_ids_metabolite = row['Alt. ID(s) interactor A'].split('|')

                for id in alt_ids_metabolite:
                    if id.split(':')[0] == 'pubchem':
                        pubchem_metabolite = id.split(':')[1]

                metabolite = session.query(Metabolite).filter(
                    Metabolite.id == pubchem_metabolite).first()

            # if unable to find interactor A in database, create new metabolite
            if metabolite is None:
                metabolite = Metabolite(id=pubchem_metabolite,
                                        pubchem=pubchem_metabolite,
                                        chebi=chebi_metabolite)
                session.add(metabolite), session.commit()

            for interactor in orthologs:
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor[0]),
                    Interaction.interactors.contains(metabolite)).first()

                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    interaction = Interaction(
                        strain=interactor.strain,
                        interactors=[metabolite, interactor[0]],
                        type='p-m',
                        ortholog_derived='fe')
                    # should ortholog interactions be marked as experimental?
                    if is_experimental_psimi(
                            row['Interaction detection method(s)'].split(
                                'MI:')[1][:4]):
                        interaction.is_experimental = 1
                    session.add(interaction), session.commit()

                interactor_a, interactor_b = '', ''
                if interaction.interactors[0] == metabolite:
                    interactor_a = metabolite.id
                    interactor_b = interactor[1]
                else:
                    interactor_b = metabolite.id
                    interactor_a = interactor[1]

                author, date, pmid = None, None, None

                if row['Publication 1st author(s)'] != '-':
                    author = row['Publication 1st author(s)'].split(' ')[0]
                    date = row['Publication 1st author(s)'].split('(')[1][:-1]
                if 'pubmed:' in row['Publication Identifier(s)']:
                    pmid = row['Publication Identifier(s)'].split(
                        'pubmed:')[1][:8]

                reference = InteractionReference(
                    interaction_id=interaction.id,
                    detection_method=row['Interaction detection method(s)'].
                    split('(')[1][:-1],
                    author_ln=author,
                    pmid=pmid,
                    pub_date=date,
                    interaction_type=row['Interaction type(s)'].split(
                        '(')[1][:-1],
                    source_db=row['Source database(s)'].split('(')[1][:-1],
                    confidence=row['Confidence value(s)'].split('(')[0],
                    interactor_a=interactor_a,
                    interactor_b=interactor_b)

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source == 'BindingDB').first()

                if source is None:
                    source = InteractionSource(interaction_id=interaction.id,
                                               data_source='BindingDB')
                    session.add(source)
                session.add(reference)
        session.commit()

Example #5

Show file

def parse_kegg(org_id, strain, sourcedb, session):
    # get pathways for organism specified by org_id
    pathways = kegg_list(database='pathway', org=org_id).read().split('path:')
    path_ids = []

    # make list of path ids to iterate through
    for path in pathways:
        if path != '':
            path_ids.append(path[:8])

    # iterate through each path and obtain interactions
    for path in path_ids:
        # get kgml representation of path
        kgml_path = read(kegg_get(path, option='kgml'))
        path_name = kgml_path._getname()
        # dictionary of compounds in current path (node_id: kegg_id)
        #   compound._getid() returns node id (only relevant in context of current path)
        #   compound._getname() returns kegg id (relevant in overall KEGG DB)
        compound_ids = {}
        for compound in kgml_path.compounds:
            compound_ids[compound._getid()] = compound._getname()[-6:]
        # go through each relation in path
        for relation in kgml_path.relations:
            relation_type = relation.element.attrib['type']

            # ignore maplink relations
            if relation_type == 'maplink': continue
            # relation._getentry1/2() returns  protein id (locus) or compound id (KEGG id)
            entries = [relation._getentry1()._getname(), relation._getentry2()._getname()]
            # if one or both interactors are listed as undefined, move on to next interaction
            if (entries[0] == 'undefined') | (entries[1] == 'undefined'): continue
            # list to hold existing interactors
            interactors = [[], []]
            # list to hold new metabolite ids for interactions with metabolites not yet in the database
            new_metabolites = [[], []]
            # go through each entry in the relation
            for num in range(0, 2):
                # each entry may contain >1 id; go through all of them
                for id in entries[num].split(' '):
                    if id == '': continue
                    # if interactor is not protein or compound, continue
                    if (id.split(':')[0] != org_id) & (id.split(':')[1] not in kegg_compounds): continue

                    # check if the id is a kegg id by searching in kegg_compounds
                    kegg_id= None
                    if id.split(':')[1] in kegg_compounds:
                        kegg_id = id.split(':')[1]

                    # check if interactor (protein) already exists
                    if (kegg_id is None) & (org_id != 'eco'):
                        interactor = session.query(Interactor).get(id.split(':')[1])
                        if interactor is not None:
                            # make sure to add None value; this will be needed to create interaction reference later
                            # None is appended rather than the interactor id because the interactor is not an ortholog
                            interactors[num].append([interactor, None])
                    # if it doesnt exist, it's not a valid protein, so check if it is a valid compound
                    elif kegg_id is not None:
                        interactor = session.query(Metabolite).filter_by(kegg = kegg_id).first()
                        # if metabolite with id was not found, append the kegg_id to new_metabolites to create
                        if interactor is None:
                            new_metabolites[num].append(kegg_id)
                        else:
                            # if the metabolite was found, add it to the existing interactor list
                            interactors[num].append([interactor, interactor.id])
                    # if parsing E. coli path, add all orthologs to interactor list
                    elif org_id == 'eco':
                        for ortholog in session.query(OrthologEcoli).filter_by(ortholog_id = id.split(':')[1],
                                                                                strain_protein = strain).all():
                            if ortholog is not None:
                                # add the id of the ecoli protein for the interaction reference later
                                interactors[num].append([ortholog.protein, id.split(':')[1]])

            # create list of interactor pairs from two separate lists
            interactor_pairs = []
            # create interactor pairs from interactors which already exist in db
            for interactor1 in interactors[0]:
                for interactor2 in interactors[1]:
                    if (interactor1[0].type != 'm') | (interactor2[0].type != 'm'):
                        interactor_pairs.append([interactor1, interactor2])
            # create interactor pair from interactors and new metabolites
            for interactor1 in interactors[0]:
                for id in new_metabolites[1]:
                    # ignore interactor pairs which would result in m-m interactions
                    if interactor1[0].type == 'm': continue
                    # Note: can query metabolite with kegg only because we updated the metabolite info first
                    metabolite = session.query(Metabolite).filter_by(kegg = id).first()
                    if metabolite is None:
                        metabolite = Metabolite(id = id, kegg = id, pubchem = kegg_compounds[id]['pubchem'],
                                                chebi = kegg_compounds[id]['chebi'])
                        session.add(metabolite)
                    interactor_pairs.append([interactor1, [metabolite, metabolite.id]])
            for interactor1 in interactors[1]:
                for id in new_metabolites[0]:
                    if interactor1[0].type == 'm': continue
                    metabolite = session.query(Metabolite).filter_by(kegg = id).first()
                    if metabolite is None:
                        metabolite = Metabolite(id = id, kegg = id, pubchem = kegg_compounds[id]['pubchem'],
                                                chebi = kegg_compounds[id]['chebi'])
                        session.add(metabolite)
                    interactor_pairs.append([interactor1, [metabolite, metabolite.id]])

            # if no interactor pairs were found, move on the the next interaction
            if len(interactor_pairs) == 0: continue

            # get all intermediates in reaction of type compound
            intermeds = []
            for subtype in relation.element.iter(tag='subtype'):
                # if the subtype element is a compound, get its node id
                if 'compound' in subtype.attrib:
                    compound_node_id = subtype.attrib['compound']
                    if compound_node_id is None: continue
                    # if the node id was not stored in the compound ids for this path, move on to the next sybtype
                    if int(compound_node_id) not in compound_ids: continue
                    # if compound id is valid, either add existing matching metabolite or create new one and add
                    kegg_id = compound_ids[int(compound_node_id)]
                    metabolite = session.query(Metabolite).filter_by(kegg = kegg_id).first()
                    if metabolite is None:
                        metabolite = Metabolite(id=kegg_id, name=kegg_compounds[kegg_id]['name'],
                                                pubchem=kegg_compounds[kegg_id]['pubchem'],
                                                chebi=kegg_compounds[kegg_id]['chebi'], kegg=kegg_id)
                        session.add(metabolite)
                    intermeds.append([metabolite, metabolite.id])

            # add protein - intermediate interactor pairs
            for interactor_list in interactors:
                for interactor in interactor_list:
                    if interactor[0].type != 'm':
                        for intermed in intermeds:
                            interactor_pairs.append([interactor, intermed])

            # go through each interaction pair and add interaction if it doesnt exist yet
            for interactor_pair in interactor_pairs:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(Interaction.interactors.contains(interactor_pair[0][0]),
                                                                Interaction.interactors.contains(interactor_pair[1][0]),
                                                                Interaction.homogenous == homogenous).first()

                source = session.query(InteractionSource).filter_by(data_source=sourcedb).first()
                #create interaction if it doesnt exist yet, add source to its sources if it isn't already
                if interaction is None:
                    interaction = Interaction(type=interactor_pair[0][0].type + '-' + interactor_pair[1][0].type,
                                              strain=strain, homogenous=homogenous,
                                              interactors=[interactor_pair[0][0], interactor_pair[1][0]])
                    interaction.sources.append(source)
                    if org_id == 'eco':
                        interaction.ortholog_derived = 'Ecoli'
                    session.add(interaction), session.commit()
                elif source not in interaction.sources:
                    interaction.sources.append(source)

                # in case the interaction already existed, make sure interactor_a and interactor_b variables for
                # new interaction reference match up with the first and second interactors of the existing
                # interaction (so it's easy to see which Pseudomonas interactor matches up with which Ecoli
                # ortholog if the org id is eco)
                interactor_a, interactor_b = None, None
                if org_id == 'eco':
                    if interaction.interactors[0] == interactor_pair[0][0]:
                        interactor_a = interactor_pair[0][1]
                        interactor_b = interactor_pair[1][1]
                    else:
                        interactor_b = interactor_pair[0][1]
                        interactor_a = interactor_pair[1][1]

                # search for reference
                reference = session.query(InteractionReference).filter_by(source_db='kegg',
                                                                          comment='in ' + path_name + ' path',
                                                                          interactor_a=interactor_a,
                                                                          interactor_b=interactor_b).first()
                # if the reference doesnt exist, create it, add it to the interaction's references and add the source
                # to the reference's sources
                if reference is None:
                    reference = InteractionReference(source_db='kegg', comment='in ' + path_name + ' path',
                                                     interactor_a=interactor_a, interactor_b=interactor_b)
                    interaction.references.append(reference)
                    reference.sources.append(source)
                # if the reference does exist, add it to the interaction's reference list and add the source to the
                # reference's source list if it isn't there already
                else:
                    if interaction not in reference.interactions:
                        reference.interactions.append(interaction)
                    if source not in reference.sources:
                        reference.sources.append(source)

    session.commit()
    print(sourcedb, session.query(Interaction).count())

Example #6

Show file

File: Parse_PSIMI_ecoli.py Project: solodova/PaintDB

def parse_psimi(session, file, source):
    with open(file) as csvfile:
        reader = csv.DictReader(csvfile, fieldnames=cols, delimiter='\t')

        # iterate through each interaction
        for row in reader:

            uniprot_A, refseq_A, orthologs_A, uniprot_B, refseq_B, orthologs_B = None, None, None, None, None, None
            # if one of the interactors is metabolite, save it's ids in pubchem and chebi
            pubchem, chebi = None, None
            # if one of the interactors is a metabolite, metabolite will be that metabolite and orthologs
            # will be set to the interaction's protein ortholog(s)
            metabolite_info, metabolite, orthologs = None, None, None

            # check if interactor A has uniprot or refseq id
            if 'uniprotkb' in row['interactor_A']:
                uniprot_A = row['interactor_A'].split('uniprotkb:')[1].split(
                    '|')[0]
            if 'refseq' in row['interactor_A']:
                refseq_A = row['interactor_A'].split('refseq:')[1].split(
                    '|')[0]

            # if uniprot id was found, look for orthologs matching that id
            if uniprot_A is not None:
                orthologs_A = session.query(OrthologEcoli).filter_by(
                    ortholog_uniprot=uniprot_A).all()
            # if no orthologs were found but a refseq id was found, try to find ortholog based on refseq
            if (orthologs_A is None) and (refseq_A is not None):
                orthologs_A = session.query(OrthologEcoli).filter_by(
                    ortholog_refseq=refseq_A).all()
            # if no orthologs were found for interactor A, but a uniprot or refseq does exist,
            # that means the ecoli interactor A is a protein without orthologs, so continue to next interaction
            if (orthologs_A is None) & ((uniprot_A is not None) |
                                        (refseq_A is not None)):
                continue

            # same as for interactor A above
            if 'uniprotkb' in row['interactor_B']:
                uniprot_B = row['interactor_B'].split('uniprotkb:')[1].split(
                    '|')[0]
            if 'refseq' in row['interactor_B']:
                refseq_B = row['interactor_B'].split('refseq:')[1].split(
                    '|')[0]

            if uniprot_B is not None:
                orthologs_B = session.query(OrthologEcoli).filter_by(
                    ortholog_uniprot=uniprot_B).all()
            if (orthologs_B is None) and (refseq_B is not None):
                orthologs_B = session.query(OrthologEcoli).filter_by(
                    ortholog_refseq=refseq_B).all()
            if (orthologs_B is None) & ((uniprot_B is not None) |
                                        (refseq_B is not None)):
                continue

            # if both orthologs_A and orthologs_B are None, then there are no protein interactors for this
            # interaction, so move on to the next interaction
            if (orthologs_A is None) and (orthologs_B is None): continue

            # if there were no orthologs for interactor A (and no refseq or uniprot was found),
            # search the file for pubchem or chebi ids for interactor A (as it may be a metabolite)
            if orthologs_A is None:
                if 'chebi' in row['interactor_A']:
                    chebi = row['interactor_A'].split('CHEBI:')[1].split(
                        '|')[0][:-1]
                if 'pubchem' in row['altID_A']:
                    pubchem = row['altID_A'].split('pubchem:')[1].split('|')[0]
                if (chebi is None) & ('chebi' in row['altID_A']):
                    chebi = row['altID_A'].split('CHEBI:')[1].split(
                        '|')[0][:-1]
                # if no metabolite ids were found in the interaction row, then move on to the next interaction
                # because no interactor_A was identified
                if (chebi is None) & (pubchem is None): continue
                # if a pubchem or chebi id was found, then this interaction will be a p-m interaction, so
                # set the protein interactors(orthologs) to orthologs_B
                orthologs = orthologs_B
            # other case where orthologs_B were not identified so need to check if interactor B has metabolite ids
            elif orthologs_B is None:
                if 'chebi' in row['interactor_B']:
                    chebi = row['interactor_B'].split('CHEBI:')[1].split(
                        '|')[0][:-1]
                if 'pubchem' in row['altID_B']:
                    pubchem = row['altID_B'].split('pubchem:')[1].split('|')[0]
                if (chebi is None) & ('chebi' in row['altID_B']):
                    chebi = row['altID_B'].split('CHEBI:')[1].split(
                        '|')[0][:-1]
                if (chebi is None) & (pubchem is None): continue
                orthologs = orthologs_A

            # if one of the interactors was identified to be a metabolite, search for the metabolite and set metabolite
            # variable to that value. if the metabolite doesnt exist create it
            # Note: if this point was reached, it means one of the interactors had protein orthologs,
            # so we can safely create a new metabolite knowing it will have a protein interaction partner
            if (chebi is not None) | (pubchem is not None):
                id = None
                # preferentially set id for new metabolites to be chebi
                if chebi is not None:
                    id = chebi
                    metabolite = session.query(Metabolite).filter_by(
                        chebi=chebi).first()
                # if no metabolite with chebi was found, but pubchem id exists, try to find
                # metabolite with that pubchem
                if (metabolite is None) & (pubchem is not None):
                    id = pubchem
                    metabolite = session.query(Metabolite).filter_by(
                        pubchem=pubchem).first()
                # if no metabolite was found with pubchem or chebi id, create new metabolite
                if metabolite is None:
                    metabolite = Metabolite(id=id,
                                            chebi=chebi,
                                            pubchem=pubchem)
                    session.add(metabolite)
                # if a metabolite was found, update its chebi and pubchem if it has none
                else:
                    if metabolite.pubchem is None:
                        metabolite.pubchem = pubchem
                    if metabolite.chebi is None:
                        metabolite.chebi = chebi

            # list of interactor pairs for interaction
            interactors = []
            # if no metabolite was found for interaction, it is a p-p interaction, so iterate through
            # orthologs to create interactor pairs
            if metabolite is None:
                for ortholog_A in orthologs_A:
                    for ortholog_B in orthologs_B:
                        if (ortholog_A is not None) and (ortholog_B
                                                         is not None):
                            # only add the interactor pair if the protein strains match
                            if ortholog_A.strain_protein == ortholog_B.strain_protein:
                                interactors.append([[
                                    ortholog_A.protein, ortholog_A.ortholog_id
                                ], [
                                    ortholog_B.protein, ortholog_B.ortholog_id
                                ]])
            else:
                # if a metabolite was found, add pairs of all orthologs with metabolite to interactor pairs
                for ortholog in orthologs:
                    interactors.append(
                        [[metabolite, metabolite.id],
                         [ortholog.protein, ortholog.ortholog_id]])

            # for each interactor pair, create interaction if it doesnt exist, otherwise update attributes
            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()
                if interaction is None:
                    # since one of the interactors may be a metabolite, set strain to match strain of protein
                    strain = None
                    if interactor_pair[0][0].type == 'p':
                        strain = interactor_pair[0][0].strain
                    else:
                        strain = interactor_pair[1][0].strain
                    # if interaction did not exist, set it to Ecoli ortholog derived
                    interaction = Interaction(
                        strain=strain,
                        interactors=[
                            interactor_pair[0][0], interactor_pair[1][0]
                        ],
                        type=(interactor_pair[0][0].type + '-' +
                              interactor_pair[1][0].type),
                        ortholog_derived='Ecoli')
                    session.add(interaction), session.commit()

                ref_parameter_list = get_psimi_ref_list(row)

                # in case the interaction already existed, make sure interactor_a and interactor_b variables for
                # new interaction reference match up with the first and second interactors of the existing
                # interaction (so it's easy to see which Pseudomonas interactor matches up with which Ecoli
                # ortholog)
                interactor_a, interactor_b = None, None
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]

                is_experimental = is_experimental_interaction(row)

                # check to see if source exists
                nsource = session.query(InteractionSource).filter_by(
                    data_source=source,
                    is_experimental=is_experimental).first()
                # if source doesn't exist, create and add it to the interaction's sources
                if nsource is None:
                    nsource = InteractionSource(
                        data_source=source, is_experimental=is_experimental)
                    interaction.sources.append(nsource)
                # if the source does exist, add it to the interaction's sources if it isn't already
                elif nsource not in interaction.sources:
                    interaction.sources.append(nsource)

                # go through each reference in the ref_parameter list, search for it, and if it doesnt exist create it
                for ref in ref_parameter_list:
                    nref = session.query(InteractionReference).filter_by(
                        detection_method=ref[0],
                        author_ln=ref[1],
                        pub_date=ref[2],
                        pmid=ref[3],
                        interaction_type=ref[4],
                        source_db=ref[5],
                        confidence=ref[6],
                        interactor_a=interactor_a,
                        interactor_b=interactor_b).first()
                    # if nref doesn't exist, create and add it to the interaction's reference list,
                    # and add the source to the reference's sources
                    if nref is None:
                        nref = InteractionReference(detection_method=ref[0],
                                                    author_ln=ref[1],
                                                    pub_date=ref[2],
                                                    pmid=ref[3],
                                                    interaction_type=ref[4],
                                                    source_db=ref[5],
                                                    confidence=ref[6],
                                                    interactor_a=interactor_a,
                                                    interactor_b=interactor_b)
                        interaction.references.append(nref)
                        nref.sources.append(nsource)
                    # if nref does exist, add the interaction and source to it's attributes if they aren't added
                    else:
                        if interaction not in nref.interactions:
                            nref.interactions.append(interaction)
                        if nsource not in nref.sources:
                            nref.sources.append(nsource)

    session.commit()
    print(source, session.query(Interaction).count())