Exemple #1
0
def parse(session):
    with open('Data/PAO1/Zhang.csv') as csvfile:
        reader = csv.DictReader(csvfile)

        source = InteractionSource(data_source='Zhang', is_experimental=0)
        session.add(source), session.commit()

        for row in reader:
            if float(row['Confidence']) < 0.9: continue

            interactor_A = session.query(Interactor).get(row['Protein1'])
            if interactor_A is None: continue

            interactor_B = session.query(Interactor).get(row['Protein2'])
            if interactor_B is None: continue

            homogenous = (interactor_A == interactor_B)
            interaction = session.query(Interaction).filter(
                Interaction.interactors.contains(interactor_A),
                Interaction.interactors.contains(interactor_B),
                Interaction.homogenous == homogenous).first()
            if interaction is None:
                interaction = Interaction(
                    strain='PAO1',
                    homogenous=homogenous,
                    type='p-p',
                    interactors=[interactor_A, interactor_B])
                interaction.sources.append(source)
                session.add(interaction), session.commit()
            elif source not in interaction.sources:
                interaction.sources.append(source)

            reference = session.query(InteractionReference).filter_by(
                detection_method='computational prediction',
                pmid='22848443',
                interaction_type='predicted',
                confidence=row['Confidence'],
                comment=row['Comment']).first()
            if reference is None:
                reference = InteractionReference(
                    detection_method='computational prediction',
                    author_ln='Zhang',
                    pub_date='2012',
                    pmid='22848443',
                    interaction_type='predicted',
                    confidence=row['Confidence'],
                    comment=row['Comment'])
                interaction.references.append(reference)
                reference.sources.append(source)
            else:
                if reference not in interaction.references:
                    interaction.references.append(reference)
                if source not in reference.sources:
                    reference.sources.append(source)
    session.commit()
    print('zhang', session.query(Interaction).count())
Exemple #2
0
def parse(session):
    with open('Data/PAO1/xlinkdb.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')

        reference = InteractionReference(
            detection_method='chemical cross-linking mass spectrometry',
            interaction_type='physical association',
            author_ln='Navari',
            pub_date='2015',
            pmid='25800553',
            source_db='xlinkdb')
        source = InteractionSource(data_source='XLinkDB', is_experimental=1)
        source.references.append(reference)
        session.add(source), session.add(reference), session.commit()

        for row in reader:

            interactor_A = session.query(Interactor).get(row['proA'])
            if interactor_A is None:
                interactor_A = session.query(Protein).filter_by(
                    uniprotkb=row['proA']).first()
            if interactor_A is None: continue

            interactor_B = session.query(Interactor).get(row['proB'])
            if interactor_B is None:
                interactor_B = session.query(Protein).filter_by(
                    uniprotkb=row['proB']).first()
            if interactor_B is None: continue

            homogenous = (interactor_A == interactor_B)
            interaction = session.query(Interaction).filter(
                Interaction.interactors.contains(interactor_A),
                Interaction.interactors.contains(interactor_B),
                Interaction.homogenous == homogenous).first()
            if interaction is None:
                interaction = Interaction(
                    strain='PAO1',
                    homogenous=homogenous,
                    type='p-p',
                    interactors=[interactor_A, interactor_B])
                interaction.references.append(reference)
                interaction.sources.append(source)
                session.add(interaction), session.commit()
            else:
                if reference not in interaction.references:
                    interaction.references.append(reference)
                if source not in interaction.sources:
                    interaction.sources.append(source)

    session.commit()
    print('xlinkdb', session.query(Interaction).count())
Exemple #3
0
def parse(session):
    with open('Data/PAO1/GeoffWinsor.csv') as csvfile:
        reader = csv.DictReader(csvfile)

        source = InteractionSource(data_source='Geoff', is_experimental=1)
        session.add(source), session.commit()

        for row in reader:
            interactor_A = session.query(Interactor).get(row['locus_tag'])
            if interactor_A is None: continue
            row = next(reader)
            interactor_B = session.query(Interactor).get(row['locus_tag'])
            if interactor_B is None: continue

            homogenous = (interactor_A == interactor_B)
            interaction = session.query(Interaction).filter(Interaction.interactors.contains(interactor_A),
                                                            Interaction.interactors.contains(interactor_B),
                                                            Interaction.homogenous == homogenous).first()
            if interaction is None:
                interaction = Interaction(strain='PAO1', homogenous=homogenous , type='p-p',
                                          interactors = [interactor_A, interactor_B])
                interaction.sources.append(source)
                session.add(interaction), session.commit()
            elif source not in interaction.sources:
                interaction.sources.append(source)

            reference = session.query(InteractionReference).filter_by(detection_method=row['experimental_type'],
                                                                      pmid=row['pmid']).first()

            if reference is None:
                reference = InteractionReference(detection_method=row['experimental_type'], pmid=row['pmid'])
                interaction.references.append(reference)
                reference.sources.append(source)
            else:
                if interaction not in reference.interactions:
                    reference.interactions.append(interaction)
                if source not in reference.sources:
                    reference.sources.append(source)

    session.commit()
    print('geoff', session.query(Interaction).count())
Exemple #4
0
def parse(session):
    with open('Data/Ecoli/RegulonDB.csv') as csvfile:
        reader = csv.DictReader(csvfile)
        # since all the interactions from here will use the same source, create and add it at the beginning
        # Note: since no references are available, is_experimental is set to 2
        source = InteractionSource(data_source='RegulonDB(Ecoli)',
                                   is_experimental=2)
        session.add(source), session.commit()

        for row in reader:
            interactors = []

            orthologs_A = session.query(OrthologEcoli).filter_by(
                ortholog_name=(row['TF name'][0].lower() +
                               row['TF name'][1:])).all()
            # if no orthologs for first interactor were found, skip to next interaction
            if orthologs_A is None: continue
            orthologs_B = session.query(OrthologEcoli).filter_by(
                ortholog_name=row['Regulated gene']).all()
            # if no orthologs for second interactor were found, skip to next interaction
            if orthologs_B is None: continue

            # iterate through each ortholog in ortholog A and B to create interactor pairs from their
            # respective pseudomonas proteins
            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    # only add the pseudomonas interactors if their strains match
                    if ortholog_A.strain_protein == ortholog_B.strain_protein:
                        # make sure to add ortholog id for creating the interaction reference later
                        interactors.append(
                            [[ortholog_A.protein, ortholog_A.ortholog_id],
                             [ortholog_B.protein, ortholog_B.ortholog_id]])

            # iterate through each interactor pair, create a new interaction if it doesnt exist yet
            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()

                if interaction is None:
                    # if interaction is None, make ortholog_derived = Ecoli and add source to interaction sources
                    interaction = Interaction(
                        strain=interactor_pair[0][0].strain,
                        interactors=[
                            interactor_pair[0][0], interactor_pair[1][0]
                        ],
                        type='p-p',
                        ortholog_derived='Ecoli')
                    interaction.sources.append(source)
                    session.add(interaction), session.commit()
                elif source not in interaction.sources:
                    interaction.sources.append(source)

                # in case the interaction already existed, make sure interactor_a and interactor_b variables for
                # new interaction reference match up with the first and second interactors of the existing
                # interaction (so it's easy to see which Pseudomonas interactor matches up with which Ecoli
                # ortholog)
                interactor_a, interactor_b = None, None
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]

                type = 'TF/sigma-binding site (' + row[
                    'Regulatory effect'] + 'regulation)'
                comment = interactor_pair[0][1] + ' regulates(' + row[
                    'Regulatory effect'] + ') ' + interactor_pair[1][1]
                # create a reference for each evidence type listed for interaction
                for evidence in row['Evidence'][1:-1].split(', '):
                    # check if interaction reference already exists in db
                    reference = session.query(InteractionReference).filter_by(
                        detection_method=evidence,
                        interaction_type=type,
                        source_db='regulondb',
                        confidence=row['Evidence type'],
                        comment=comment,
                        interactor_a=interactor_a,
                        interactor_b=interactor_b).first()
                    if reference is None:
                        # if reference is None, add reference to interaction references list and add source
                        # to reference sources list
                        reference = InteractionReference(
                            detection_method=evidence,
                            interaction_type=type,
                            comment=comment,
                            source_db='regulondb',
                            confidence=row['Evidence type'],
                            interactor_a=interactor_a,
                            interactor_b=interactor_b)
                        interaction.references.append(reference)
                        reference.sources.append(source)
                    # if reference exists, check that its interactions contains interaction, and sources contains
                    # source, and add if they are not present
                    else:
                        if interaction not in reference.interactions:
                            interaction.references.append(reference)
                        if source not in reference.sources:
                            reference.sources.append(source)

    session.commit()
    print('regulondb', session.query(Interaction).count())
Exemple #5
0
def parse_ecoli_imex(session):
    with open('Data/Ecoli/PSICQUIC/IMEx.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            if (row['#ID(s) interactor A'] == '-') | (row['ID(s) interactor B'] == '-'): continue
            interactors = []

            orthologs_B = []
            id_B = row['ID(s) interactor B'].split(':')
            if id_B[0] == 'uniprotkb':
                orthologs_B = session.query(OrthologEcoli).filter(OrthologEcoli.ortholog_uniprot == id_B[1]).all()
            elif id_B[0] == 'refseq':
                orthologs_B = session.query(OrthologEcoli).filter(OrthologEcoli.ortholog_refseq == id_B[1]).all()

            if len(orthologs_B) == 0: continue

            orthologs_A = []
            metabolite = None
            id_A = row['#ID(s) interactor A'].split(':')
            if id_A[0] == 'uniprotkb':
                orthologs_A = session.query(OrthologEcoli).filter(OrthologEcoli.ortholog_uniprot == id_A[1]).all()
            elif id_A[0] == 'refseq':
                orthologs_A = session.query(OrthologEcoli).filter(OrthologEcoli.ortholog_refseq == id_A[1]).all()
            elif id_A[0] == 'chebi':
                metabolite = session.query(Metabolite).filter(Metabolite.chebi == id_A[1]).first()
                if metabolite is None:
                    metabolite = Metabolite(id = id_A[1], chebi = id_A[1])
                    session.add(metabolite), session.commit()

            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    if (ortholog_A is not None) and (ortholog_B is not None):
                        if ortholog_A.strain_protein == ortholog_B.strain_protein:
                            interactors.append([[ortholog_A.protein, ortholog_A.ortholog_id],
                                                [ortholog_B.protein, ortholog_B.ortholog_id]])

            if metabolite is not None:
                for ortholog_B in orthologs_B:
                    interactors.append([[metabolite, metabolite.id], [ortholog_B.protein, ortholog_B.ortholog_id]])

            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(Interaction.interactors.contains(interactor_pair[0][0]),
                                                                Interaction.interactors.contains(interactor_pair[1][0]),
                                                                Interaction.homogenous == homogenous).first()
                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    strain = None
                    if interactor_pair[0][0].type == 'p':
                        strain = interactor_pair[0][0].strain
                    else:
                        strain = interactor_pair[1][0].strain
                    interaction = Interaction(strain=strain,
                                              interactors=[interactor_pair[0][0], interactor_pair[1][0]],
                                              type=(interactor_pair[0][0].type + '-' + interactor_pair[1][0].type),
                                              ortholog_derived='fe')
                    session.add(interaction), session.commit()

                # interactor_a, interactor_b = None, None
                # if interaction.interactors[0] == interactor_pair[0][0]:
                #     interactor_a = interactor_pair[0][1]
                #     interactor_b = interactor_pair[1][1]
                # else:
                #     interactor_b = interactor_pair[0][1]
                #     interactor_a = interactor_pair[1][1]
                #
                # psimi_detection, psimi_db, psimi_type, author, date, confidences = None, None, None, None, None, [None]
                # if 'MI' in row['Interaction detection method(s)']:
                #     psimi_detection=row['Interaction detection method(s)'].split('MI:')[1][:4]
                # if 'MI' in row['Interaction type(s)']:
                #     psimi_type = row['Interaction type(s)'].split('MI:')[1][:4]
                # if 'MI' in row['Source database(s)']:
                #     psimi_db = row['Source database(s)'].split('MI:')[1][:4]
                # if row['Publication 1st author(s)'] != '-':
                #     author = row['Publication 1st author(s)'].split(' ')[0]
                #     date=row['Publication 1st author(s)'].split('(')[1][:-1]
                # if ('intact-miscore' in row['Confidence value(s)']) | ('author score' in row['Confidence value(s)']):
                #     del confidences[0]
                #     confidence_ids = row['Confidence value(s)'].split('|')
                #     for confidence in confidence_ids:
                #         if (confidence.split(':')[0] == 'intact-miscore') | \
                #             (confidence.split(':')[0] == 'author score'):
                #             confidences.append(confidence)
                # for confidence in confidences:
                #     reference = InteractionReference(interaction_id=interaction.id,
                #                                      psimi_detection=psimi_detection,
                #                                      detection_method=
                #                                      row['Interaction detection method(s)'].split('(')[1][:-1],
                #                                      author_ln=author,
                #                                      pub_date=date,
                #                                      pmid=
                #                                      row['Publication Identifier(s)'].split('pubmed:')[1].split('|')[0],
                #                                      psimi_type=psimi_type,
                #                                      interaction_type=row['Interaction type(s)'].split('(')[1][:-1],
                #                                      psimi_db=psimi_db,
                #                                      source_db=row['Source database(s)'].split('(')[1][:-1],
                #                                      confidence=confidence,
                #                                      interactor_a_id=interactor_a,
                #                                      interactor_b_id=interactor_b)
                #     session.add(reference)
                #
                # source = session.query(InteractionSource).filter(
                #     InteractionSource.interaction_id == interaction.id,
                #     InteractionSource.data_source == 'IMEx').first()
                #
                # if source is None:
                #     source = InteractionSource(interaction_id=interaction.id, data_source='IMEx')
                #     session.add(source)
        session.commit()
        print(session.query(Interaction).count())
Exemple #6
0
def parse_ecoli_uniprot(session):
    with open('Ecoli/PSICQUIC/UniProt.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            interactors = []
            orthologs_B = []
            id_B = row['ID(s) interactor B'].split(':')
            if id_B[0] == 'uniprotkb':
                orthologs_B = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == id_B[1]).all()

            if len(orthologs_B) == 0: continue

            orthologs_A = []
            metabolite = None
            id_A = row['#ID(s) interactor A'].split(':')
            if id_A[0] == 'uniprotkb':
                orthologs_A = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == id_A[1]).all()
            elif id_A[0] == 'chebi':
                metabolite = session.query(Metabolite).filter(
                    Metabolite.chebi == id_A[1]).first()
                if metabolite is None:
                    metabolite = Metabolite(id=id_A[1], chebi=id_A[1])
                    session.add(metabolite), session.commit()

            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    if (ortholog_A is not None) and (ortholog_B is not None):
                        if ortholog_A.strain_protein == ortholog_B.strain_protein:
                            interactors.append(
                                [[ortholog_A.protein, ortholog_A.ortholog_id],
                                 [ortholog_B.protein, ortholog_B.ortholog_id]])

            if metabolite is not None:
                for ortholog_B in orthologs_B:
                    interactors.append(
                        [[metabolite, metabolite.id],
                         [ortholog_B.protein, ortholog_B.ortholog_id]])

            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()
                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    interaction = Interaction(
                        strain=interactor_pair[0][0].strain,
                        interactors=[
                            interactor_pair[0][0], interactor_pair[1][0]
                        ],
                        type=(interactor_pair[0][0].type + '-' +
                              interactor_pair[1][0]),
                        ortholog_derived='fe')
                    if 'MI:' in row['Interaction detection method(s)']:
                        if is_experimental_psimi(
                                row['Interaction detection method(s)'].split(
                                    'MI:')[1][:4]):
                            interaction.is_experimental = 1
                    session.add(interaction), session.commit()

                interactor_a, interactor_b = None, None
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]

                reference = InteractionReference(
                    interaction_id=interaction.id,
                    psimi_detection=row['Interaction detection method(s)'].
                    split('MI:')[1][:4],
                    detection_method=row['Interaction detection method(s)'].
                    split('(')[1][:-1],
                    author_ln=row['Publication 1st author(s)'].split(' ')[0],
                    pub_date=row['Publication 1st author(s)'].split(
                        '(')[1][:-1],
                    pmid=row['Publication Identifier(s)'].split(
                        'pubmed:')[1].split('|')[0],
                    psimi_type=row['Interaction type(s)'].split('MI:')[1][:4],
                    interaction_type=row['Interaction type(s)'].split(
                        '(')[1][:-1],
                    psimi_db=row['Source database(s)'].split('MI:')[1][:4],
                    source_db=row['Source database(s)'].split('(')[1][:-1],
                    confidence_score=row['Confidence value(s)'],
                    interactor_a_id=interactor_a,
                    interactor_b_id=interactor_b)
                session.add(reference)

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source == 'UniProt').first()

                if source is None:
                    source = InteractionSource(interaction_id=interaction.id,
                                               data_source='UniProt')
                    session.add(source)

        session.commit()
        print(session.query(Interaction).count())
Exemple #7
0
def parse(session):
    # create and add sources for the interactions (do this before since they all use the same source)
    # Note: is_experimental is set to 2 because we cannot confirm that detection method was experimental or not
    source_PAO1 = InteractionSource(data_source='Galan-Vasquez(PAO1)',
                                    is_experimental=2)
    source_PA14 = InteractionSource(data_source='Galan-Vasquez(PA14)',
                                    is_experimental=2)
    session.add(source_PAO1), session.add(source_PA14), session.commit()

    with open('Data/PAO1_PA14/regulatory_network.csv') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            # a row describing an interaction may have >1 strain
            strains = row['Strain'].split(',')
            for strain in strains:
                # only care about PAO1 and PA14 strain interactions
                if (strain != 'PAO1') and (strain != 'PA14'): continue

                # search for interactor A by name
                interactor_A = session.query(Protein).filter_by(
                    name=row['Regulator'], strain=strain).first()
                # if no interactor was found by name, id listed may be a gene locus, so search by this id
                if interactor_A is None:
                    interactor_A = session.query(Interactor).get(
                        row['Regulator'])
                # if no interactor A was found for this interaction, skip to next
                if interactor_A is None: continue

                # same as A above
                interactor_B = session.query(Protein).filter_by(
                    name=row['Target'], strain=strain).first()
                if interactor_B is None:
                    interactor_B = session.query(Interactor).get(row['Target'])
                if interactor_B is None: continue

                homogenous = (interactor_A == interactor_B)
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_A),
                    Interaction.interactors.contains(interactor_B),
                    Interaction.homogenous == homogenous).first()
                # if interaction between these 2 interactors does not yet exist, create and add it
                if interaction is None:
                    interaction = Interaction(
                        strain=strain,
                        type='p-p',
                        homogenous=homogenous,
                        interactors=[interactor_A, interactor_B])
                    session.add(interaction), session.commit()

                # specify the source to be used for the interaction and reference based on strain of interaction
                source = None
                if strain == 'PAO1':
                    source = source_PAO1
                else:
                    source = source_PA14

                # add the source to the interaction source list if it isn't there already
                if source not in interaction.sources:
                    interaction.sources.append(source)

                # get source db and detections if they are present in the file
                source_db, detections = None, [None]
                if row['source_db'] != '':
                    source_db = row['source_db']
                if row['evidence'] != '':
                    del detections[0]
                    for type in row['evidence'].split(', '):
                        detections.append(type)
                # create a new reference for each detection found, add the reference to the interaction's
                # reference list, and add the source to the reference's sources
                for detection in detections:
                    reference = InteractionReference(
                        detection_method=detection,
                        pmid=row['pmid'],
                        interaction_type='TF/sigma-binding site (' +
                        row['mode'] + 'regulation)',
                        source_db=source_db,
                        comment=interactor_A.id + ' regulates(' + row['mode'] +
                        ') ' + interactor_B.id)
                    interaction.references.append(reference)
                    reference.sources.append(source)

    session.commit()
    print('regnet', session.query(Interaction).count())
Exemple #8
0
def parse_mpidb(session):
    with open('PAO1/PSICQUIC/MPIDB.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            interactors = []

            if (row['Taxid interactor A'].split('|')[0] != 'taxid:208964(pseae)') |\
                    (row['Taxid interactor B'].split('|')[0] != 'taxid:208964(pseae)'):
                continue

            A_id = row['#ID(s) interactor A'].split(':')[1]
            B_id = row['ID(s) interactor B'].split(':')[1]

            if session.query(Interactor).filter(
                    Interactor.id == A_id).first() is not None:
                interactors.append(
                    session.query(Interactor).filter(
                        Interactor.id == A_id).one())
            elif session.query(Protein).filter(
                    Protein.uniprotkb == A_id).first() is not None:
                interactors.append(
                    session.query(Protein).filter(
                        Protein.uniprotkb == A_id).one())

            if session.query(Interactor).filter(
                    Interactor.id == B_id).first() is not None:
                interactors.append(
                    session.query(Interactor).filter(
                        Interactor.id == B_id).one())
            elif session.query(Protein).filter(
                    Protein.uniprotkb == B_id).first() is not None:
                interactors.append(
                    session.query(Protein).filter(
                        Protein.uniprotkb == B_id).one())

            if len(interactors) != 2: continue
            homogenous = (interactors[0] == interactors[1])

            interaction = session.query(Interaction).filter(
                (Interaction.interactors.contains(interactors[0])),
                (Interaction.interactors.contains(interactors[1])),
                (Interaction.homogenous == homogenous)).first()
            if interaction is None:
                type = interactors[0].type + '-' + interactors[1].type
                interaction = Interaction(strain='PAO1',
                                          type=type,
                                          homogenous=homogenous,
                                          interactors=interactors)
                if is_experimental_psimi(
                        row['Interaction detection method(s)'].split(
                            'MI:')[1][:4]):
                    interaction.is_experimental = 1
                else:
                    interaction.is_experimental = 0
                session.add(interaction), session.commit()
            else:
                if is_experimental_psimi(
                        row['Interaction detection method(s)'].split(
                            'MI:')[1][:4]):
                    interaction.is_experimental = 1

            reference = InteractionReference(
                interaction_id=interaction.id,
                detection_method=row['Interaction detection method(s)'].split(
                    '(')[1][:-1],
                author_ln=row['Publication 1st author(s)'].split(' ')[0],
                pub_date=row['Publication 1st author(s)'].split('(')[1][:-1],
                pmid=row['Publication Identifier(s)'].split('pubmed:')[1][:8],
                confidence=row['Confidence value(s)'],
                interaction_type=row['Interaction type(s)'].split('(')[1][:-1],
                source_db=row['Source database(s)'])
            session.add(reference)

            for xref in row['Interaction identifier(s)'].split('|'):
                xref_field = xref.split(':')
                xref = session.query(InteractionXref).filter(
                    InteractionXref.accession == xref_field[1],
                    InteractionXref.interaction_id == interaction.id).first()

                if xref is None:
                    xref = InteractionXref(interaction_id=interaction.id,
                                           accession=xref_field[1],
                                           data_source=xref_field[0])
                    session.add(xref)

            source = session.query(InteractionSource).filter(
                InteractionSource.interaction_id == interaction.id,
                InteractionSource.data_source == 'MPIDB').first()

            if source is None:
                source = InteractionSource(interaction_id=interaction.id,
                                           data_source='MPIDB')
                session.add(source)
        session.commit()
        print(session.query(Interaction).count())
def parse_psimi(session, file, source):
    with open(file) as csvfile:
        reader = csv.DictReader(csvfile, fieldnames=cols, delimiter='\t')

        # iterate through each interaction
        for row in reader:

            uniprot_A, refseq_A, orthologs_A, uniprot_B, refseq_B, orthologs_B = None, None, None, None, None, None
            # if one of the interactors is metabolite, save it's ids in pubchem and chebi
            pubchem, chebi = None, None
            # if one of the interactors is a metabolite, metabolite will be that metabolite and orthologs
            # will be set to the interaction's protein ortholog(s)
            metabolite_info, metabolite, orthologs = None, None, None

            # check if interactor A has uniprot or refseq id
            if 'uniprotkb' in row['interactor_A']:
                uniprot_A = row['interactor_A'].split('uniprotkb:')[1].split(
                    '|')[0]
            if 'refseq' in row['interactor_A']:
                refseq_A = row['interactor_A'].split('refseq:')[1].split(
                    '|')[0]

            # if uniprot id was found, look for orthologs matching that id
            if uniprot_A is not None:
                orthologs_A = session.query(OrthologEcoli).filter_by(
                    ortholog_uniprot=uniprot_A).all()
            # if no orthologs were found but a refseq id was found, try to find ortholog based on refseq
            if (orthologs_A is None) and (refseq_A is not None):
                orthologs_A = session.query(OrthologEcoli).filter_by(
                    ortholog_refseq=refseq_A).all()
            # if no orthologs were found for interactor A, but a uniprot or refseq does exist,
            # that means the ecoli interactor A is a protein without orthologs, so continue to next interaction
            if (orthologs_A is None) & ((uniprot_A is not None) |
                                        (refseq_A is not None)):
                continue

            # same as for interactor A above
            if 'uniprotkb' in row['interactor_B']:
                uniprot_B = row['interactor_B'].split('uniprotkb:')[1].split(
                    '|')[0]
            if 'refseq' in row['interactor_B']:
                refseq_B = row['interactor_B'].split('refseq:')[1].split(
                    '|')[0]

            if uniprot_B is not None:
                orthologs_B = session.query(OrthologEcoli).filter_by(
                    ortholog_uniprot=uniprot_B).all()
            if (orthologs_B is None) and (refseq_B is not None):
                orthologs_B = session.query(OrthologEcoli).filter_by(
                    ortholog_refseq=refseq_B).all()
            if (orthologs_B is None) & ((uniprot_B is not None) |
                                        (refseq_B is not None)):
                continue

            # if both orthologs_A and orthologs_B are None, then there are no protein interactors for this
            # interaction, so move on to the next interaction
            if (orthologs_A is None) and (orthologs_B is None): continue

            # if there were no orthologs for interactor A (and no refseq or uniprot was found),
            # search the file for pubchem or chebi ids for interactor A (as it may be a metabolite)
            if orthologs_A is None:
                if 'chebi' in row['interactor_A']:
                    chebi = row['interactor_A'].split('CHEBI:')[1].split(
                        '|')[0][:-1]
                if 'pubchem' in row['altID_A']:
                    pubchem = row['altID_A'].split('pubchem:')[1].split('|')[0]
                if (chebi is None) & ('chebi' in row['altID_A']):
                    chebi = row['altID_A'].split('CHEBI:')[1].split(
                        '|')[0][:-1]
                # if no metabolite ids were found in the interaction row, then move on to the next interaction
                # because no interactor_A was identified
                if (chebi is None) & (pubchem is None): continue
                # if a pubchem or chebi id was found, then this interaction will be a p-m interaction, so
                # set the protein interactors(orthologs) to orthologs_B
                orthologs = orthologs_B
            # other case where orthologs_B were not identified so need to check if interactor B has metabolite ids
            elif orthologs_B is None:
                if 'chebi' in row['interactor_B']:
                    chebi = row['interactor_B'].split('CHEBI:')[1].split(
                        '|')[0][:-1]
                if 'pubchem' in row['altID_B']:
                    pubchem = row['altID_B'].split('pubchem:')[1].split('|')[0]
                if (chebi is None) & ('chebi' in row['altID_B']):
                    chebi = row['altID_B'].split('CHEBI:')[1].split(
                        '|')[0][:-1]
                if (chebi is None) & (pubchem is None): continue
                orthologs = orthologs_A

            # if one of the interactors was identified to be a metabolite, search for the metabolite and set metabolite
            # variable to that value. if the metabolite doesnt exist create it
            # Note: if this point was reached, it means one of the interactors had protein orthologs,
            # so we can safely create a new metabolite knowing it will have a protein interaction partner
            if (chebi is not None) | (pubchem is not None):
                id = None
                # preferentially set id for new metabolites to be chebi
                if chebi is not None:
                    id = chebi
                    metabolite = session.query(Metabolite).filter_by(
                        chebi=chebi).first()
                # if no metabolite with chebi was found, but pubchem id exists, try to find
                # metabolite with that pubchem
                if (metabolite is None) & (pubchem is not None):
                    id = pubchem
                    metabolite = session.query(Metabolite).filter_by(
                        pubchem=pubchem).first()
                # if no metabolite was found with pubchem or chebi id, create new metabolite
                if metabolite is None:
                    metabolite = Metabolite(id=id,
                                            chebi=chebi,
                                            pubchem=pubchem)
                    session.add(metabolite)
                # if a metabolite was found, update its chebi and pubchem if it has none
                else:
                    if metabolite.pubchem is None:
                        metabolite.pubchem = pubchem
                    if metabolite.chebi is None:
                        metabolite.chebi = chebi

            # list of interactor pairs for interaction
            interactors = []
            # if no metabolite was found for interaction, it is a p-p interaction, so iterate through
            # orthologs to create interactor pairs
            if metabolite is None:
                for ortholog_A in orthologs_A:
                    for ortholog_B in orthologs_B:
                        if (ortholog_A is not None) and (ortholog_B
                                                         is not None):
                            # only add the interactor pair if the protein strains match
                            if ortholog_A.strain_protein == ortholog_B.strain_protein:
                                interactors.append([[
                                    ortholog_A.protein, ortholog_A.ortholog_id
                                ], [
                                    ortholog_B.protein, ortholog_B.ortholog_id
                                ]])
            else:
                # if a metabolite was found, add pairs of all orthologs with metabolite to interactor pairs
                for ortholog in orthologs:
                    interactors.append(
                        [[metabolite, metabolite.id],
                         [ortholog.protein, ortholog.ortholog_id]])

            # for each interactor pair, create interaction if it doesnt exist, otherwise update attributes
            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()
                if interaction is None:
                    # since one of the interactors may be a metabolite, set strain to match strain of protein
                    strain = None
                    if interactor_pair[0][0].type == 'p':
                        strain = interactor_pair[0][0].strain
                    else:
                        strain = interactor_pair[1][0].strain
                    # if interaction did not exist, set it to Ecoli ortholog derived
                    interaction = Interaction(
                        strain=strain,
                        interactors=[
                            interactor_pair[0][0], interactor_pair[1][0]
                        ],
                        type=(interactor_pair[0][0].type + '-' +
                              interactor_pair[1][0].type),
                        ortholog_derived='Ecoli')
                    session.add(interaction), session.commit()

                ref_parameter_list = get_psimi_ref_list(row)

                # in case the interaction already existed, make sure interactor_a and interactor_b variables for
                # new interaction reference match up with the first and second interactors of the existing
                # interaction (so it's easy to see which Pseudomonas interactor matches up with which Ecoli
                # ortholog)
                interactor_a, interactor_b = None, None
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]

                is_experimental = is_experimental_interaction(row)

                # check to see if source exists
                nsource = session.query(InteractionSource).filter_by(
                    data_source=source,
                    is_experimental=is_experimental).first()
                # if source doesn't exist, create and add it to the interaction's sources
                if nsource is None:
                    nsource = InteractionSource(
                        data_source=source, is_experimental=is_experimental)
                    interaction.sources.append(nsource)
                # if the source does exist, add it to the interaction's sources if it isn't already
                elif nsource not in interaction.sources:
                    interaction.sources.append(nsource)

                # go through each reference in the ref_parameter list, search for it, and if it doesnt exist create it
                for ref in ref_parameter_list:
                    nref = session.query(InteractionReference).filter_by(
                        detection_method=ref[0],
                        author_ln=ref[1],
                        pub_date=ref[2],
                        pmid=ref[3],
                        interaction_type=ref[4],
                        source_db=ref[5],
                        confidence=ref[6],
                        interactor_a=interactor_a,
                        interactor_b=interactor_b).first()
                    # if nref doesn't exist, create and add it to the interaction's reference list,
                    # and add the source to the reference's sources
                    if nref is None:
                        nref = InteractionReference(detection_method=ref[0],
                                                    author_ln=ref[1],
                                                    pub_date=ref[2],
                                                    pmid=ref[3],
                                                    interaction_type=ref[4],
                                                    source_db=ref[5],
                                                    confidence=ref[6],
                                                    interactor_a=interactor_a,
                                                    interactor_b=interactor_b)
                        interaction.references.append(nref)
                        nref.sources.append(nsource)
                    # if nref does exist, add the interaction and source to it's attributes if they aren't added
                    else:
                        if interaction not in nref.interactions:
                            nref.interactions.append(interaction)
                        if nsource not in nref.sources:
                            nref.sources.append(nsource)

    session.commit()
    print(source, session.query(Interaction).count())
Exemple #10
0
def parse_mentha(file, strain, taxid, session):
    with open(file) as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            interactors = []

            if ((row['Taxid interactor A'].split('|')[0] != taxid) |
                    (row['Taxid interactor B'].split('|')[0] != taxid)): continue

            A_id = row['#ID(s) interactor A'].split(':')[1]
            B_id = row['ID(s) interactor B'].split(':')[1]

            if session.query(Interactor).filter(Interactor.id == A_id).first() is not None:
                interactors.append(session.query(Interactor).filter(Interactor.id == A_id).one())
            elif session.query(Protein).filter(Protein.uniprotkb == A_id).first() is not None:
                interactors.append(session.query(Protein).filter(Protein.uniprotkb == A_id).one())

            if session.query(Interactor).filter(Interactor.id == B_id).first() is not None:
                interactors.append(session.query(Interactor).filter(Interactor.id == B_id).one())
            elif session.query(Protein).filter(Protein.uniprotkb == B_id).first() is not None:
                interactors.append(session.query(Protein).filter(Protein.uniprotkb == B_id).one())

            if len(interactors) != 2: continue
            homogenous = (interactors[0] == interactors[1])

            interaction = session.query(Interaction).filter(Interaction.interactors.contains(interactors[0]),
                                                            Interaction.interactors.contains(interactors[1]),
                                                            Interaction.homogenous == homogenous).first()
            if interaction is None:
                type=(interactors[0].type + '-' + interactors[1].type)
                interaction = Interaction(strain=strain, type=type, homogenous=homogenous, interactors=interactors)
                if is_experimental_psimi(row['Interaction detection method(s)'].split('MI:')[1][:4]):
                    interaction.is_experimental = 1
                else:
                    interaction.is_experimental = 0
                session.add(interaction), session.commit()
            else:
                if is_experimental_psimi(row['Interaction detection method(s)'].split('MI:')[1][:4]):
                    interaction.is_experimental = 1

            reference = InteractionReference(interaction_id=interaction.id,
                                             detection_method=row['Interaction detection method(s)'].split('(')[1][:-1],
                                             pmid=row['Publication Identifier(s)'].split('pubmed:')[1][:8],
                                             interaction_type=row['Interaction type(s)'].split('(')[1][:-1],
                                             source_db=row['Source database(s)'].split('(')[1][:-1],
                                             confidence_score=row['Confidence value(s)'])
            session.add(reference)

            xref_field = row['Interaction identifier(s)'].split(':')
            xref = session.query(InteractionXref).filter(InteractionXref.accession == xref_field[1],
                                                         InteractionXref.interaction_id == interaction.id).first()

            if xref is None:
                xref = InteractionXref(interaction_id=interaction.id, accession=xref_field[1],
                                       data_source=xref_field[0])
                session.add(xref)

            source = session.query(InteractionSource).filter(InteractionSource.interaction_id == interaction.id,
                                                             InteractionSource.data_source == 'mentha').first()

            if source is None:
                source = InteractionSource(interaction_id=interaction.id, data_source='mentha')
                session.add(source)
        print(session.query(Interaction).count())
Exemple #11
0
def parse_irefindex(file, strain, taxid, session):
    with open(file) as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            interactors = []

            if ((row['Taxid interactor A'].split('|')[0] != taxid) |
                    (row['Taxid interactor B'].split('|')[0] != taxid)): continue

            A_id = row['#ID(s) interactor A'].split(':')
            B_id = row['ID(s) interactor B'].split(':')

            if A_id[0] == 'uniprotkb':
                if session.query(Interactor).filter(Interactor.id == A_id[1]).first() is not None:
                    interactors.append(session.query(Interactor).filter(Interactor.id == A_id[1]).one())
                elif session.query(Protein).filter(Protein.uniprotkb == A_id[1]).first() is not None:
                    interactors.append(session.query(Protein).filter(Protein.uniprotkb == A_id[1]).one())
            elif A_id[0] == 'refseq':
                if session.query(Protein).filter(Protein.ncbi_acc == A_id[1]).first() is not None:
                    interactors.append(session.query(Protein).filter(Protein.ncbi_acc == A_id[1]).one())

            if B_id[0] == 'uniprotkb':
                if session.query(Interactor).filter(Interactor.id == B_id[1]).first() is not None:
                    interactors.append(session.query(Interactor).filter(Interactor.id == B_id[1]).one())
                elif session.query(Protein).filter(Protein.uniprotkb == B_id[1]).first() is not None:
                    interactors.append(session.query(Protein).filter(Protein.uniprotkb == B_id[1]).one())
            elif B_id[0] == 'refseq':
                if session.query(Protein).filter(Protein.ncbi_acc == B_id[1]).first() is not None:
                    interactors.append(session.query(Protein).filter(Protein.ncbi_acc == B_id[1]).one())

            if len(interactors) != 2: continue
            homogenous = (interactors[0] == interactors[1])

            interaction = session.query(Interaction).filter((Interaction.interactors.contains(interactors[0])),
                                                            (Interaction.interactors.contains(interactors[1])),
                                                            (Interaction.homogenous == homogenous)).first()
            if interaction is None:
                type = interactors[0].type + '-' + interactors[1].type
                interaction = Interaction(strain=strain, type=type, homogenous=homogenous, interactors=interactors)
                if row['Interaction detection method(s)'] != '-':
                    if is_experimental_psimi(row['Interaction detection method(s)'].split('MI:')[1][:4]):
                        interaction.is_experimental = 1
                    else:
                        interaction.is_experimental = 0
            else:
                if is_experimental_psimi(row['Interaction detection method(s)'].split('MI:')[1][:4]):
                    interaction.is_experimental = 1
                elif (row['Interaction detection method(s)'] == '-') and (interaction.is_experimental == 0):
                    interaction.is_experimental = None

            author, date, type= None, None, None
            pmids, detections = [None], [None]
            if row['Interaction detection method(s)'] != '-':
                del detections[0]
                for method in row['Interaction detection method(s)'].split('|'):
                    detections.append(method.split('(')[1][:-1])
            if (row['Interaction type(s)'] != '-'):
                type = row['Interaction type(s)'].split('(')[1][:-1]
            if (row['Publication 1st author(s)'] != '-'):
                author = row['Publication 1st author(s)'].split('-')[0][0].upper() + \
                         row['Publication 1st author(s)'].split('-')[0][1:]
                date = row['Publication 1st author(s)'].split('-')[1]
            if (row['Publication Identifier(s)'] != '-'):
                del pmids[0]
                for pmid in row['Publication Identifier(s)'].split('|'):
                    pmids.append(pmid.split('pubmed:')[1][:8])

            for pmid in pmids:
                for detection in detections:
                    reference = InteractionReference(interaction_id=interaction.id,
                                                     detection_method=detection,
                                                     author_ln=author,
                                                     pub_date=date,
                                                     pmid=pmid,
                                                     interaction_type=type,
                                                     source_db=row['Source database(s)'].split('(')[1][:-1],
                                                     confidence_score=row['Confidence value(s)'])
                    session.add(reference)

            for xref in row['Interaction identifier(s)'].split('|'):
                xref_field = xref.split(':')
                xref = session.query(InteractionXref).filter(InteractionXref.accession == xref_field[1],
                                                             InteractionXref.interaction_id == interaction.id).first()

                if xref is None:
                    xref = InteractionXref(interaction_id=interaction.id, accession=xref_field[1],
                                           data_source=xref_field[0])
                    session.add(xref)

            source = session.query(InteractionSource).filter(InteractionSource.interaction_id == interaction.id,
                                                             InteractionSource.data_source == 'iRefIndex').first()

            if source is None:
                source = InteractionSource(interaction_id=interaction.id, data_source='iRefIndex')
                session.add(source)
        print(session.query(Interaction).count())
Exemple #12
0
def parse_ecoli_bindingdb(session):
    with open('Data/Ecoli/PSICQUIC/BindingDB.txt') as csvfile:
        reader = csv.DictReader(csvfile)

        # iterate through each interaction
        for row in reader:
            uniprot_protein = None

            # check if interactor B has uniprot ID
            if 'uniprotkb' in row['ID(s) interactor B']:
                uniprot_protein = row['ID(s) interactor B'].split(
                    'uniprotkb:')[1].split('|')[0]

            if uniprot_protein is None: continue

            orthologs = []
            for ecoli_ortholog in session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == uniprot_protein).all():
                if ecoli_ortholog is not None:
                    orthologs.append(
                        [ecoli_ortholog.protein, ecoli_ortholog.ortholog_id])

            if len(orthologs) == 0: continue

            ids_metabolite = row['#ID(s) interactor A'].split('|')
            chebi_metabolite, pubchem_metabolite = None, None

            # check if interactor A has ChEBI id
            for id in ids_metabolite:
                if id.split(':')[0] == 'chebi':
                    chebi_metabolite = id.split(':')[1][1:-1]

            metabolite = None

            # if interactor A has ChEBI id, query for matching metabolite
            if chebi_metabolite is not None:
                metabolite = session.query(Metabolite).filter(
                    Metabolite.chebi == chebi_metabolite).first()

            # if unable to identify metabolite based on ChEBI id, try using pubchem id
            if metabolite is None:
                alt_ids_metabolite = row['Alt. ID(s) interactor A'].split('|')

                for id in alt_ids_metabolite:
                    if id.split(':')[0] == 'pubchem':
                        pubchem_metabolite = id.split(':')[1]

                metabolite = session.query(Metabolite).filter(
                    Metabolite.id == pubchem_metabolite).first()

            # if unable to find interactor A in database, create new metabolite
            if metabolite is None:
                metabolite = Metabolite(id=pubchem_metabolite,
                                        pubchem=pubchem_metabolite,
                                        chebi=chebi_metabolite)
                session.add(metabolite), session.commit()

            for interactor in orthologs:
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor[0]),
                    Interaction.interactors.contains(metabolite)).first()

                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    interaction = Interaction(
                        strain=interactor.strain,
                        interactors=[metabolite, interactor[0]],
                        type='p-m',
                        ortholog_derived='fe')
                    # should ortholog interactions be marked as experimental?
                    if is_experimental_psimi(
                            row['Interaction detection method(s)'].split(
                                'MI:')[1][:4]):
                        interaction.is_experimental = 1
                    session.add(interaction), session.commit()

                interactor_a, interactor_b = '', ''
                if interaction.interactors[0] == metabolite:
                    interactor_a = metabolite.id
                    interactor_b = interactor[1]
                else:
                    interactor_b = metabolite.id
                    interactor_a = interactor[1]

                author, date, pmid = None, None, None

                if row['Publication 1st author(s)'] != '-':
                    author = row['Publication 1st author(s)'].split(' ')[0]
                    date = row['Publication 1st author(s)'].split('(')[1][:-1]
                if 'pubmed:' in row['Publication Identifier(s)']:
                    pmid = row['Publication Identifier(s)'].split(
                        'pubmed:')[1][:8]

                reference = InteractionReference(
                    interaction_id=interaction.id,
                    detection_method=row['Interaction detection method(s)'].
                    split('(')[1][:-1],
                    author_ln=author,
                    pmid=pmid,
                    pub_date=date,
                    interaction_type=row['Interaction type(s)'].split(
                        '(')[1][:-1],
                    source_db=row['Source database(s)'].split('(')[1][:-1],
                    confidence=row['Confidence value(s)'].split('(')[0],
                    interactor_a=interactor_a,
                    interactor_b=interactor_b)

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source == 'BindingDB').first()

                if source is None:
                    source = InteractionSource(interaction_id=interaction.id,
                                               data_source='BindingDB')
                    session.add(source)
                session.add(reference)
        session.commit()
Exemple #13
0
def parse_psimi(file, strain, source, session):
    with open(file) as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t', fieldnames=cols)
        next(reader)
        for row in reader:
            uniprot_A, refseq_A, interactor_A, uniprot_B, refseq_B, interactor_B = None, None, None, None, None, None

            # check if interactor A has uniprot or refseq id, store these values
            if 'uniprotkb' in row['interactor_A']:
                uniprot_A = row['interactor_A'].split('uniprotkb:')[1].split(
                    '|')[0]
            if 'refseq' in row['interactor_A']:
                refseq_A = row['interactor_A'].split('refseq:')[1].split(
                    '|')[0]

            # if a uniprot id was found, try to find the interactor in the database
            if uniprot_A is not None:
                # check if there is a protein-complex with this uniprot id
                interactor_A = session.query(Interactor).get(uniprot_A)
                # if no protein complex, check for protein matching the uniprot id
                if interactor_A is None:
                    interactor_A = session.query(Protein).filter_by(
                        uniprotkb=uniprot_A).first()
            # if no interactor A was found but there was also a refseq id, try to find the protein based on
            # it's refseq
            if (interactor_A is None) and (refseq_A is not None):
                interactor_A = session.query(Protein).filter_by(
                    ncbi_acc=refseq_A).first()
            # if no interactor A was found, move on to next interaction
            if interactor_A is None: continue

            # same as for A above
            if 'uniprotkb' in row['interactor_B']:
                uniprot_B = row['interactor_B'].split('uniprotkb:')[1].split(
                    '|')[0]
            if 'refseq' in row['interactor_B']:
                refseq_B = row['interactor_B'].split('refseq:')[1].split(
                    '|')[0]

            if uniprot_B is not None:
                interactor_B = session.query(Interactor).get(uniprot_B)
                if interactor_B is None:
                    interactor_B = session.query(Protein).filter_by(
                        uniprotkb=uniprot_B).first()
            if (interactor_B is None) and (refseq_B is not None):
                interactor_B = session.query(Protein).filter_by(
                    ncbi_acc=refseq_B).first()

            if interactor_B is None: continue

            homogenous = (interactor_A == interactor_B)
            interaction = session.query(Interaction).filter(
                Interaction.interactors.contains(interactor_A),
                Interaction.interactors.contains(interactor_B),
                Interaction.homogenous == homogenous).first()
            # if no interaction was found with the interactors, create a new interaction
            if interaction is None:
                interaction = Interaction(
                    strain=strain,
                    type='p-p',
                    homogenous=homogenous,
                    interactors=[interactor_A, interactor_B])
                session.add(interaction), session.commit()

            ref_parameter_list = get_psimi_ref_list(row)

            is_experimental = is_experimental_interaction(row)

            # check to see if source exists
            nsource = session.query(InteractionSource).filter_by(
                data_source=source, is_experimental=is_experimental).first()
            # if source doesn't exist, create and add it to the interaction's sources
            if nsource is None:
                nsource = InteractionSource(data_source=source,
                                            is_experimental=is_experimental)
                interaction.sources.append(nsource)
            # if the source does exist, add it to the interaction's sources if it isn't already
            elif nsource not in interaction.sources:
                interaction.sources.append(nsource)

            # go through each reference in the ref_parameter list, search for it, and if it doesnt exist create it
            for ref in ref_parameter_list:
                nref = session.query(InteractionReference).filter_by(
                    detection_method=ref[0],
                    author_ln=ref[1],
                    pub_date=ref[2],
                    pmid=ref[3],
                    interaction_type=ref[4],
                    source_db=ref[5],
                    confidence=ref[6],
                    interactor_a=None,
                    interactor_b=None).first()
                # if nref doesn't exist, create and add it to the interaction's reference list,
                # and add the source to the reference's sources
                if nref is None:
                    nref = InteractionReference(detection_method=ref[0],
                                                author_ln=ref[1],
                                                pub_date=ref[2],
                                                pmid=ref[3],
                                                interaction_type=ref[4],
                                                source_db=ref[5],
                                                confidence=ref[6])
                    interaction.references.append(nref)
                    nref.sources.append(nsource)
                # if nref does exist, add the interaction and source to it's attributes if they aren't added
                else:
                    if interaction not in nref.interactions:
                        nref.interactions.append(interaction)
                    if nsource not in nref.sources:
                        nref.sources.append(nsource)

            #collect all the cross references for the interaction
            for xref in row['identifier'].split('|'):
                xref_field = xref.split(':')
                # check if the cross reference exists for this interaction, if it doesnt create it
                xref = session.query(InteractionXref).filter_by(
                    accession=xref_field[1],
                    interaction_id=interaction.id).first()

                if xref is None:
                    xref = InteractionXref(interaction_id=interaction.id,
                                           accession=xref_field[1],
                                           data_source=xref_field[0])
                    session.add(xref)

        session.commit()
    print(source, session.query(Interaction).count())
Exemple #14
0
def parse_ecoli_ebi_goa_nonintact(session):
    with open('Ecoli/PSICQUIC/EBI-GOA-nonIntAct.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            interactors = []
            uniprot_A, uniprot_B = None, None
            if 'uniprotkb:' in row['#ID(s) interactor A']:
                uniprot_A = row['#ID(s) interactor A'].split('uniprotkb:')[1]
            if 'uniprotkb:' in row['ID(s) interactor B']:
                uniprot_B = row['ID(s) interactor B'].split('uniprotkb:')[1]

            if (uniprot_A is None) | (uniprot_B is None): continue

            orthologs_A = session.query(OrthologEcoli).filter(
                OrthologEcoli.ortholog_uniprot == uniprot_A).all()
            orthologs_B = session.query(OrthologEcoli).filter(
                OrthologEcoli.ortholog_uniprot == uniprot_B).all()
            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    if (ortholog_A is not None) and (ortholog_B is not None):
                        if ortholog_A.strain_protein == ortholog_B.strain_protein:
                            interactors.append(
                                [[ortholog_A.protein, ortholog_A.ortholog_id],
                                 [ortholog_B.protein, ortholog_B.ortholog_id]])

            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()
                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    interaction = Interaction(
                        strain=interactor_pair[0][0].strain,
                        interactors=interactor_pair,
                        type='p-p',
                        ortholog_derived='fe')
                    if is_experimental_psimi(
                            row['Interaction detection method(s)'].split(
                                'MI:')[1][:4]):
                        interaction.is_experimental = 1
                    session.add(interaction), session.commit()

                interactor_a, interactor_b = '', ''
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]

                reference = InteractionReference(
                    interaction_id=interaction.id,
                    detection_method=row['Interaction detection method(s)'].
                    split('(')[1][:-1],
                    author_ln=row['Publication 1st author(s)'].split(' ')[0],
                    pub_date=row['Publication 1st author(s)'].split('(')[1],
                    pmid=row['Publication Identifier(s)'].split('pubmed:')[1],
                    interaction_type=row['Interaction type(s)'].split(
                        '(')[1][:-1],
                    source_db=row['Source database(s)'].split('(')[1][:-1],
                    interactor_a_id=row['#ID(s) interactor A'].split(':')[1],
                    interactor_b_id=row['ID(s) interactor B'].split(':')[1])
                session.add(reference)

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source ==
                    'EBI-GOA non-IntAct').first()

                if source is None:
                    source = InteractionSource(
                        interaction_id=interaction.id,
                        data_source='EBI-GOA non-IntAct')
                    session.add(source)
        session.commit()
        print(session.query(Interaction).count())
Exemple #15
0
def parse_kegg(org_id, strain, sourcedb, session):
    # get pathways for organism specified by org_id
    pathways = kegg_list(database='pathway', org=org_id).read().split('path:')
    path_ids = []

    # make list of path ids to iterate through
    for path in pathways:
        if path != '':
            path_ids.append(path[:8])

    # iterate through each path and obtain interactions
    for path in path_ids:
        # get kgml representation of path
        kgml_path = read(kegg_get(path, option='kgml'))
        path_name = kgml_path._getname()
        # dictionary of compounds in current path (node_id: kegg_id)
        #   compound._getid() returns node id (only relevant in context of current path)
        #   compound._getname() returns kegg id (relevant in overall KEGG DB)
        compound_ids = {}
        for compound in kgml_path.compounds:
            compound_ids[compound._getid()] = compound._getname()[-6:]
        # go through each relation in path
        for relation in kgml_path.relations:
            relation_type = relation.element.attrib['type']

            # ignore maplink relations
            if relation_type == 'maplink': continue
            # relation._getentry1/2() returns  protein id (locus) or compound id (KEGG id)
            entries = [relation._getentry1()._getname(), relation._getentry2()._getname()]
            # if one or both interactors are listed as undefined, move on to next interaction
            if (entries[0] == 'undefined') | (entries[1] == 'undefined'): continue
            # list to hold existing interactors
            interactors = [[], []]
            # list to hold new metabolite ids for interactions with metabolites not yet in the database
            new_metabolites = [[], []]
            # go through each entry in the relation
            for num in range(0, 2):
                # each entry may contain >1 id; go through all of them
                for id in entries[num].split(' '):
                    if id == '': continue
                    # if interactor is not protein or compound, continue
                    if (id.split(':')[0] != org_id) & (id.split(':')[1] not in kegg_compounds): continue

                    # check if the id is a kegg id by searching in kegg_compounds
                    kegg_id= None
                    if id.split(':')[1] in kegg_compounds:
                        kegg_id = id.split(':')[1]

                    # check if interactor (protein) already exists
                    if (kegg_id is None) & (org_id != 'eco'):
                        interactor = session.query(Interactor).get(id.split(':')[1])
                        if interactor is not None:
                            # make sure to add None value; this will be needed to create interaction reference later
                            # None is appended rather than the interactor id because the interactor is not an ortholog
                            interactors[num].append([interactor, None])
                    # if it doesnt exist, it's not a valid protein, so check if it is a valid compound
                    elif kegg_id is not None:
                        interactor = session.query(Metabolite).filter_by(kegg = kegg_id).first()
                        # if metabolite with id was not found, append the kegg_id to new_metabolites to create
                        if interactor is None:
                            new_metabolites[num].append(kegg_id)
                        else:
                            # if the metabolite was found, add it to the existing interactor list
                            interactors[num].append([interactor, interactor.id])
                    # if parsing E. coli path, add all orthologs to interactor list
                    elif org_id == 'eco':
                        for ortholog in session.query(OrthologEcoli).filter_by(ortholog_id = id.split(':')[1],
                                                                                strain_protein = strain).all():
                            if ortholog is not None:
                                # add the id of the ecoli protein for the interaction reference later
                                interactors[num].append([ortholog.protein, id.split(':')[1]])

            # create list of interactor pairs from two separate lists
            interactor_pairs = []
            # create interactor pairs from interactors which already exist in db
            for interactor1 in interactors[0]:
                for interactor2 in interactors[1]:
                    if (interactor1[0].type != 'm') | (interactor2[0].type != 'm'):
                        interactor_pairs.append([interactor1, interactor2])
            # create interactor pair from interactors and new metabolites
            for interactor1 in interactors[0]:
                for id in new_metabolites[1]:
                    # ignore interactor pairs which would result in m-m interactions
                    if interactor1[0].type == 'm': continue
                    # Note: can query metabolite with kegg only because we updated the metabolite info first
                    metabolite = session.query(Metabolite).filter_by(kegg = id).first()
                    if metabolite is None:
                        metabolite = Metabolite(id = id, kegg = id, pubchem = kegg_compounds[id]['pubchem'],
                                                chebi = kegg_compounds[id]['chebi'])
                        session.add(metabolite)
                    interactor_pairs.append([interactor1, [metabolite, metabolite.id]])
            for interactor1 in interactors[1]:
                for id in new_metabolites[0]:
                    if interactor1[0].type == 'm': continue
                    metabolite = session.query(Metabolite).filter_by(kegg = id).first()
                    if metabolite is None:
                        metabolite = Metabolite(id = id, kegg = id, pubchem = kegg_compounds[id]['pubchem'],
                                                chebi = kegg_compounds[id]['chebi'])
                        session.add(metabolite)
                    interactor_pairs.append([interactor1, [metabolite, metabolite.id]])

            # if no interactor pairs were found, move on the the next interaction
            if len(interactor_pairs) == 0: continue

            # get all intermediates in reaction of type compound
            intermeds = []
            for subtype in relation.element.iter(tag='subtype'):
                # if the subtype element is a compound, get its node id
                if 'compound' in subtype.attrib:
                    compound_node_id = subtype.attrib['compound']
                    if compound_node_id is None: continue
                    # if the node id was not stored in the compound ids for this path, move on to the next sybtype
                    if int(compound_node_id) not in compound_ids: continue
                    # if compound id is valid, either add existing matching metabolite or create new one and add
                    kegg_id = compound_ids[int(compound_node_id)]
                    metabolite = session.query(Metabolite).filter_by(kegg = kegg_id).first()
                    if metabolite is None:
                        metabolite = Metabolite(id=kegg_id, name=kegg_compounds[kegg_id]['name'],
                                                pubchem=kegg_compounds[kegg_id]['pubchem'],
                                                chebi=kegg_compounds[kegg_id]['chebi'], kegg=kegg_id)
                        session.add(metabolite)
                    intermeds.append([metabolite, metabolite.id])

            # add protein - intermediate interactor pairs
            for interactor_list in interactors:
                for interactor in interactor_list:
                    if interactor[0].type != 'm':
                        for intermed in intermeds:
                            interactor_pairs.append([interactor, intermed])

            # go through each interaction pair and add interaction if it doesnt exist yet
            for interactor_pair in interactor_pairs:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(Interaction.interactors.contains(interactor_pair[0][0]),
                                                                Interaction.interactors.contains(interactor_pair[1][0]),
                                                                Interaction.homogenous == homogenous).first()

                source = session.query(InteractionSource).filter_by(data_source=sourcedb).first()
                #create interaction if it doesnt exist yet, add source to its sources if it isn't already
                if interaction is None:
                    interaction = Interaction(type=interactor_pair[0][0].type + '-' + interactor_pair[1][0].type,
                                              strain=strain, homogenous=homogenous,
                                              interactors=[interactor_pair[0][0], interactor_pair[1][0]])
                    interaction.sources.append(source)
                    if org_id == 'eco':
                        interaction.ortholog_derived = 'Ecoli'
                    session.add(interaction), session.commit()
                elif source not in interaction.sources:
                    interaction.sources.append(source)

                # in case the interaction already existed, make sure interactor_a and interactor_b variables for
                # new interaction reference match up with the first and second interactors of the existing
                # interaction (so it's easy to see which Pseudomonas interactor matches up with which Ecoli
                # ortholog if the org id is eco)
                interactor_a, interactor_b = None, None
                if org_id == 'eco':
                    if interaction.interactors[0] == interactor_pair[0][0]:
                        interactor_a = interactor_pair[0][1]
                        interactor_b = interactor_pair[1][1]
                    else:
                        interactor_b = interactor_pair[0][1]
                        interactor_a = interactor_pair[1][1]

                # search for reference
                reference = session.query(InteractionReference).filter_by(source_db='kegg',
                                                                          comment='in ' + path_name + ' path',
                                                                          interactor_a=interactor_a,
                                                                          interactor_b=interactor_b).first()
                # if the reference doesnt exist, create it, add it to the interaction's references and add the source
                # to the reference's sources
                if reference is None:
                    reference = InteractionReference(source_db='kegg', comment='in ' + path_name + ' path',
                                                     interactor_a=interactor_a, interactor_b=interactor_b)
                    interaction.references.append(reference)
                    reference.sources.append(source)
                # if the reference does exist, add it to the interaction's reference list and add the source to the
                # reference's source list if it isn't there already
                else:
                    if interaction not in reference.interactions:
                        reference.interactions.append(interaction)
                    if source not in reference.sources:
                        reference.sources.append(source)

    session.commit()
    print(sourcedb, session.query(Interaction).count())
Exemple #16
0
def parse_ecocyc(strain, session):
    for path in ecocyc_paths:
        interaction_file_name = "Data/Ecoli/ecocyc_files/interactions_sif/" + path + "_interactions.txt"
        #if there was a problem with obtaining the sif files for a pathway, they may not exist
        if not exists(interaction_file_name): continue
        with open(interaction_file_name) as file:
            reader = csv.DictReader(file)
            for interaction_row in reader:
                interactors_A, interactors_B = [], []
                new_metabolite_A, new_metabolite_B = None, None

                id_A = interaction_row['PARTICIPANT_A']
                id_B = interaction_row['PARTICIPANT_B']

                #if id_A isn't in ecocyc_compounds, it's a uniprot id; search for ecoli orthologs matching id_A
                if id_A not in ecocyc_compounds:
                    for ortholog in session.query(OrthologEcoli).filter_by(
                            ortholog_uniprot=id_A,
                            strain_protein=strain).all():
                        if ortholog is not None:
                            # add both the pseudomonas protein and the ortholog id (will be needed later to
                            # create interaction reference) to interactors_A
                            interactors_A.append(
                                [ortholog.protein, ortholog.ortholog_id])
                # if id_A is in ecocyc_compounds, it means it's a metabolite id
                else:
                    A_ecocyc = ecocyc_compounds[id_A]['ecocyc']
                    #check if the metabolite already exists in database (only need to search ecocyc id since
                    # update_metabolites_ecocyc was called)
                    metabolite = session.query(Metabolite).filter_by(
                        ecocyc=A_ecocyc).first()
                    if metabolite is not None:
                        # if metabolite exists, add both the metabolite and it's name (will be needed later
                        # to create interaction reference) to interactors_A
                        interactors_A.append([metabolite, metabolite.name])
                    else:
                        # if metabolite doesn't exist yet, store it's id to create it later (don't create it now
                        # since if interactor_B is invalid, there is no need for new metabolite to be created)
                        new_metabolite_A = A_ecocyc

                # same as for id_A above, now with second interactor
                if id_B not in ecocyc_compounds:
                    for ortholog in session.query(OrthologEcoli).filter_by(
                            ortholog_uniprot=id_B,
                            strain_protein=strain).all():
                        if ortholog is not None:
                            interactors_B.append(
                                [ortholog.protein, ortholog.ortholog_id])
                else:
                    B_ecocyc = ecocyc_compounds[id_B]['ecocyc']
                    metabolite = session.query(Metabolite).filter_by(
                        ecocyc=B_ecocyc).first()
                    if metabolite is not None:
                        interactors_B.append([metabolite, metabolite.name])
                    else:
                        new_metabolite_B = B_ecocyc

                # store new interactor pairs from which to create interactions here
                interactor_pairs = []

                # case where no unknown metabolites were found
                if (new_metabolite_A is None) and (new_metabolite_B is None):
                    # iterate through known protein interactors, add them together to interactor_pairs
                    for interactor_A in interactors_A:
                        for interactor_B in interactors_B:
                            # only add the interactor pair if at least one of them is not a metabolite
                            if (interactor_A[0].type !=
                                    'm') | (interactor_B[0].type != 'm'):
                                interactor_pairs.append(
                                    [interactor_A, interactor_B])
                # case where there is one new metabolite (new_metabolite_A)
                elif new_metabolite_A is not None:
                    for interactor_B in interactors_B:
                        # don't add a new interactor pair if both are metabolites
                        if interactor_B[0].type != 'm':
                            # check if new metabolite exists in database (eg. if more than one ortholog was found for
                            # interactors_B, you don't want to create the same new metabolite twice)
                            metabolite = session.query(Metabolite).filter_by(
                                ecocyc=new_metabolite_A).first()
                            # create a new metabolite if it doesn't exist
                            if metabolite is None:
                                metabolite = Metabolite(
                                    id=new_metabolite_A,
                                    name=id_A,
                                    ecocyc=new_metabolite_A,
                                    pubchem=ecocyc_compounds[id_A]['pubchem'],
                                    kegg=ecocyc_compounds[id_A]['kegg'],
                                    cas=ecocyc_compounds[id_A]['cas'],
                                    chebi=ecocyc_compounds[id_A]['chebi'])
                                session.add(metabolite)
                            # add the interactor pair (for the new metabolite, make sure to add it's name (for
                            # reference later)
                            interactor_pairs.append(
                                [interactor_B, [metabolite, id_A]])
                # same as previous case, but if new metabolite is new_metabolite_B
                elif new_metabolite_B is not None:
                    for interactor_A in interactors_A:
                        if interactor_A[0].type != 'm':
                            metabolite = session.query(Metabolite).filter_by(
                                ecocyc=new_metabolite_B).first()
                            if metabolite is None:
                                metabolite = Metabolite(
                                    id=new_metabolite_B,
                                    name=id_B,
                                    ecocyc=new_metabolite_B,
                                    pubchem=ecocyc_compounds[id_B]['pubchem'],
                                    kegg=ecocyc_compounds[id_B]['kegg'],
                                    cas=ecocyc_compounds[id_B]['cas'],
                                    chebi=ecocyc_compounds[id_B]['chebi'])
                                session.add(metabolite)
                            interactor_pairs.append(
                                [interactor_A, [metabolite, id_B]])

                # iterate through all interactor pairs and create new interactions
                # note interactor_pairs will be empty if:
                #   1) both interactors were new metabolites
                #   2) one or both ecoli interactors did not have orthologs in Pseudomonas
                for interactor_pair in interactor_pairs:
                    homogenous = (
                        interactor_pair[0][0] == interactor_pair[1][0])
                    interaction = session.query(Interaction).filter(
                        Interaction.interactors.contains(
                            interactor_pair[0][0]),
                        Interaction.interactors.contains(
                            interactor_pair[1][0]),
                        Interaction.homogenous == homogenous).first()

                    source = session.query(InteractionSource).filter_by(
                        data_source='EcoCyc').first()

                    # if interaction doesn't exist, add it, and EcoCyc as a source
                    if interaction is None:
                        # if this interaction is created for first time, mark it as ortholog derived from Ecoli
                        interaction = Interaction(
                            type=(interactor_pair[0][0].type + '-' +
                                  interactor_pair[1][0].type),
                            strain=strain,
                            homogenous=homogenous,
                            interactors=[
                                interactor_pair[0][0], interactor_pair[1][0]
                            ],
                            ortholog_derived='Ecoli')
                        interaction.sources.append(source)
                        session.add(interaction), session.commit()
                    # add EcoCyc as source for interaction if it isn't already
                    elif source not in interaction.sources:
                        interaction.sources.append(source)

                    # in case the interaction already existed, make sure interactor_a and interactor_b variables for
                    # new interaction reference match up with the first and second interactors of the existing
                    # interaction (so it's easy to see which Pseudomonas interactor matches up with which Ecoli
                    # ortholog)
                    interactor_a, interactor_b = None, None
                    if interaction.interactors[0] == interactor_pair[0][0]:
                        interactor_a = interactor_pair[0][1]
                        interactor_b = interactor_pair[1][1]
                    else:
                        interactor_b = interactor_pair[0][1]
                        interactor_a = interactor_pair[1][1]

                    comment = interactor_pair[0][1] + interaction_row[
                        "INTERACTION_TYPE"] + interactor_pair[1][1]

                    # iterate through all the pmids listed as reference for given interaction
                    for pmid in interaction_row["INTERACTION_PUBMED_ID"].split(
                            ';'):
                        # check if interaction reference already exists in db
                        reference = session.query(
                            InteractionReference).filter_by(
                                pmid=pmid,
                                source_db='ecocyc',
                                comment=comment,
                                interactor_a=interactor_a,
                                interactor_b=interactor_b).first()
                        # if reference doesn't exist, create it, add the interaction to its references, and the
                        # EcoCyc source to its sources
                        if reference is None:
                            reference = InteractionReference(
                                pmid=pmid,
                                source_db='ecocyc',
                                comment=comment,
                                interactor_a=interactor_a,
                                interactor_b=interactor_b)
                            interaction.references.append(reference)
                            reference.sources.append(source)
                        # if reference does exist, add interaction to its interactions and source to its sources
                        # (if it doesn't have them already)
                        else:
                            if interaction not in reference.interactions:
                                reference.interactions.append(interaction)
                            if source not in reference.sources:
                                reference.sources.append(source)
    session.commit()
    print('ecocyc', session.query(Interaction).count())
Exemple #17
0
def parse_ecoli_dip(session):
    with open('Ecoli/DIP.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            interactors = []

            ids_A = row['ID interactor A'].split('|')
            ids_B = row['ID interactor B'].split('|')
            refseq_A, uniprotkb_A, refseq_B, uniprotkb_B = '', '', '', ''
            for id in ids_A:
                fields = id.split(':')
                if fields[0] == 'refseq':
                    refseq_A = fields[1]
                elif fields[0] == 'uniprotkb':
                    uniprotkb_A = fields[1]
            for id in ids_B:
                fields = id.split(':')
                if fields[0] == 'refseq':
                    refseq_B = fields[1]
                elif fields[0] == 'uniprotkb':
                    uniprotkb_B = fields[1]

            orthologs_A, orthologs_B = [], []
            if uniprotkb_A != '':
                orthologs_A = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == uniprotkb_A).all()
            if (len(orthologs_A) == 0) & (refseq_A != ''):
                orthologs_A = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_refseq == refseq_A).all()
            if uniprotkb_B != '':
                orthologs_B = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == uniprotkb_B).all()
            if (len(orthologs_B) == 0) & (refseq_B != ''):
                orthologs_B = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_refseq == refseq_B).all()

            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    if (ortholog_A is not None) and (ortholog_B is not None):
                        if ortholog_A.strain_protein == ortholog_B.strain_protein:
                            interactors.append(
                                [[ortholog_A.protein, ortholog_A.ortholog_id],
                                 [ortholog_B.protein, ortholog_B.ortholog_id]])

            for interactor_pair in interactors:
                is_new = 0
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()
                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    is_new = 1
                    interaction = Interaction(
                        strain=interactor_pair[0][0].strain,
                        interactors=[
                            interactor_pair[0][0], interactor_pair[1][0]
                        ],
                        type='p-p',
                        ortholog_derived='fe')
                    session.add(interaction), session.commit()

                detections, pmids, types, list = [], [], [], []
                if row['Interaction detection method(s)'] != '-':
                    detections = row['Interaction detection method(s)'].split(
                        '|')
                    list.append(detections)
                if row['Publication Identifier(s)'] != '-':
                    pmids = row['Publication Identifier(s)'].split('|')
                    list.append(pmids)
                if row['Interaction type(s)'] != '-':
                    types = row['Interaction type(s)'].split('|')
                    list.append(types)

                interactor_a, interactor_b = '', ''
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]

                for num in range(0, len(list[0])):
                    type = types[num].split('(')[1][:-1]
                    pmid = pmids[num * 2].split('pubmed:')[1]
                    detection = detections[num].split('(')[1][:-1]
                    # there are more than one pmid sometimes
                    reference = InteractionReference(
                        interaction_id=interaction.id,
                        detection_method=detection,
                        pmid=pmid,
                        source_db=row['Source database(s)'].split('(')[1][:-1],
                        interactor_a=interactor_a,
                        interactor_b=interactor_b)
                    session.add(reference)

                    if is_new:
                        if interaction.is_experimental is None:
                            if is_experimental_psimi(
                                    row['Interaction detection method(s)'].
                                    split('MI:')[1][:4]):
                                interaction.is_experimental = 1
                            else:
                                interaction.is_experimental = 0
                        elif is_experimental_psimi(
                                row['Interaction detection method(s)'].split(
                                    'MI:')[1][:4]):
                            interaction.is_experimental = 1

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source == 'DIP').first()

                if source is None:
                    source = InteractionSource(interaction_id=interaction.id,
                                               data_source='DIP')
                    session.add(source)
        session.commit()
        print(session.query(Interaction).count())
def parse_ortholog_interactions(session):
    # query for all current interactions from PAO1 and PA14 sources
    all_interactions = session.query(Interaction).all()
    # iterate through each interaction, see if interactors have orthologs, and create new interactions in
    # other strain if they do
    for interaction in all_interactions:
        # ortholog interactors is interactors from opposite strain from that in interaction
        interactor_pairs, ortholog_interactors = [], [[], []]
        num = 0
        for interactor in interaction.interactors:
            # if the interactor is a protein, add its pseudomonas orthologs to ortholog_interactors[num]
            if interactor.type == 'p':
                for ortholog in interactor.pseudomonas_orthologs:
                    if ortholog is not None:
                        # add the interactor's psuedomonas ortholog to ortholog_interactors
                        # also add the interactor id for creation of interaction reference later
                        ortholog_interactor = session.query(Interactor).get(
                            ortholog.ortholog_id)
                        ortholog_interactors[num].append(
                            [ortholog_interactor, interactor.id])
            # if the interactor is a metabolite, add it as is
            else:
                ortholog_interactors[num].append([interactor, interactor.id])
            num += 1

        # create interactor pairs from ortholog interactors
        for interactor1 in ortholog_interactors[0]:
            for interactor2 in ortholog_interactors[1]:
                interactor_pairs.append([interactor1, interactor2])

        # iterate through each interactor pair, create interaction if it doesnt already exist
        for interactor_pair in interactor_pairs:
            homogenous = (interactor_pair[0] == interactor_pair[1])
            new_interaction = session.query(Interaction).filter(
                Interaction.interactors.contains(interactor_pair[0]),
                Interaction.interactors.contains(interactor_pair[1]),
                Interaction.homogenous == homogenous).first()
            if new_interaction is None:
                # set strain for new interaction to opposite of original interaction
                strain = 'PAO1'
                if interaction.strain == 'PAO1':
                    strain = 'PA14'
                # set ortholog derived to the original interaction strain
                new_interaction = Interaction(
                    strain=strain,
                    type=interaction.type,
                    interactors=interactor_pair,
                    homogenous=homogenous,
                    ortholog_derived=interaction.strain)
                session.add(new_interaction), session.commit()

            # in case the interaction already existed, make sure interactor_a and interactor_b variables for
            # new interaction reference match up with the first and second interactors of the existing
            # interaction (so it's easy to see which Pseudomonas interactor matches up with which Ecoli
            # ortholog)
            interactor_a, interactor_b = None, None
            if new_interaction.interactors[0] == interactor_pair[0]:
                interactor_a = interactor_pair[0][1]
                interactor_b = interactor_pair[1][1]
            else:
                interactor_b = interactor_pair[0][1]
                interactor_a = interactor_pair[1][1]

            # iterate through all of the original interaction references, create new reference
            # with same fields except with added interactor_a and interactor_b attributes to show original
            # interactors from which interaction was derived from
            for reference in interaction.references:
                new_ref = session.query(InteractionReference).filter_by(
                    detection_method=reference.detection_method,
                    author_ln=reference.author_ln,
                    pub_date=reference.pub_date,
                    pmid=reference.pmid,
                    interaction_type=reference.interaction_type,
                    source_db=reference.source_db,
                    confidence=reference.confidence,
                    comment=reference.comment,
                    interactor_a=interactor_a,
                    interactor_b=interactor_b).first()

                if new_ref is None:
                    # if the new_ref doesn't exist, create and add it to the new interaction's reference list
                    # and add the original reference's sources to the new ones sources
                    new_ref = InteractionReference(
                        detection_method=reference.detection_method,
                        author_ln=reference.author_ln,
                        pub_date=reference.pub_date,
                        pmid=reference.pmid,
                        interaction_type=reference.interaction_type,
                        source_db=reference.source_db,
                        confidence=reference.confidence,
                        comment=reference.comment,
                        interactor_a=interactor_a,
                        interactor_b=interactor_b)
                    new_interaction.references.append(new_ref)
                    new_ref.sources = reference.sources
                else:
                    # if the new reference did exist, add the new interaction and original interactions sources
                    # to new reference's attributes if they were not there already
                    if new_interaction not in new_ref.interactions:
                        new_ref.interactions.append(new_interaction)
                    for source in reference.sources:
                        if source is not None:
                            if source not in new_ref.sources:
                                new_ref.sources.append(source)

            # for each source in the original interaction's sources, add it to the new interaction's source list if
            # it isn't already there
            for source in interaction.sources:
                if source not in new_interaction.sources:
                    new_interaction.sources.append(source)

    session.commit()
    print('p_orthologs', session.query(Interaction).count())
Exemple #19
0
def parse_ecoli_mentha(session):
    with open('Ecoli/PSICQUIC/mentha.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            if (row['#ID(s) interactor A'] == '-') | (row['ID(s) interactor B']
                                                      == '-'):
                continue
            interactors = []
            orthologs_A = session.query(OrthologEcoli).filter(
                OrthologEcoli.ortholog_uniprot ==
                row['#ID(s) interactor A'].split(':')[1]).all()
            orthologs_B = session.query(OrthologEcoli).filter(
                OrthologEcoli.ortholog_uniprot ==
                row['ID(s) interactor B'].split(':')[1]).all()

            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    if (ortholog_A is not None) and (ortholog_B is not None):
                        if ortholog_A.strain_protein == ortholog_B.strain_protein:
                            interactors.append(
                                [[ortholog_A.protein, ortholog_A.ortholog_id],
                                 [ortholog_B.protein, ortholog_B.ortholog_id]])

            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()
                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    interaction = Interaction(
                        strain=interactor_pair[0][0].strain,
                        interactors=[
                            interactor_pair[0][0], interactor_pair[1][0]
                        ],
                        type=(interactor_pair[0][0].type + '-' +
                              interactor_pair[1][0]),
                        ortholog_derived='fe')
                    #ask about marking ecoli ortholog interactions as experimental!!
                    if 'MI:' in row['Interaction detection method(s)']:
                        #iterate through all methods
                        if is_experimental_psimi(
                                row['Interaction detection method(s)'].split(
                                    'MI:')[1][:4]):
                            interaction.is_experimental = 1
                    session.add(interaction), session.commit()

                interactor_a, interactor_b = '', ''
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]
                reference = InteractionReference(
                    interaction_id=interaction.id,
                    psimi_detection=row['Interaction detection method(s)'].
                    split('MI:')[1][:4],
                    detection_method=row['Interaction detection method(s)'].
                    split('(')[1][:-1],
                    pmid=row['Publication Identifier(s)'].split('pubmed:')[1],
                    psimi_type=row['Interaction type(s)'].split('MI:')[1][:4],
                    interaction_type=row['Interaction type(s)'].split(
                        '(')[1][:-1],
                    psimi_db=row['Source database(s)'].split('MI:')[1][:4],
                    source_db=row['Source database(s)'].split('(')[1][:-1],
                    confidence_score=row['Confidence value(s)'])
                session.add(reference)

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source == 'mentha').first()

                if source is None:
                    source = InteractionSource(interaction_id=interaction.id,
                                               data_source='mentha')
                    session.add(source)

        session.commit()
        print(session.query(Interaction).count())
Exemple #20
0
def parse_ecoli_irefindex(session):
    with open('Ecoli/PSICQUIC/iRefIndex.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            if (row['#ID(s) interactor A'] == '-') | (row['ID(s) interactor B']
                                                      == '-'):
                continue
            interactors = []

            orthologs_A = []
            id_A = row['#ID(s) interactor A'].split(':')
            if id_A[0] == 'uniprotkb':
                orthologs_A = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == id_A[1]).all()
            elif id_A[0] == 'refseq':
                orthologs_A = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_refseq == id_A[1]).all()

            if len(orthologs_A) == 0: continue

            orthologs_B = []
            id_B = row['ID(s) interactor B'].split(':')
            if id_B[0] == 'uniprotkb':
                orthologs_B = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == id_B[1]).all()
            elif id_B[0] == 'refseq':
                orthologs_B = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_refseq == id_B[1]).all()

            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    if (ortholog_A is not None) and (ortholog_B is not None):
                        if ortholog_A.strain_protein == ortholog_B.strain_protein:
                            interactors.append(
                                [[ortholog_A.protein, ortholog_A.ortholog_id],
                                 [ortholog_B.protein, ortholog_B.ortholog_id]])

            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()
                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    interaction = Interaction(
                        strain=interactor_pair[0][0].strain,
                        interactors=[
                            interactor_pair[0][0], interactor_pair[1][0]
                        ],
                        type=(interactor_pair[0][0].type + '-' +
                              interactor_pair[1][0]),
                        ortholog_derived='fe')
                    if 'MI:' in row['Interaction detection method(s)']:
                        #iterate through all methods
                        if is_experimental_psimi(
                                row['Interaction detection method(s)'].split(
                                    'MI:')[1][:4]):
                            interaction.is_experimental = 1
                    session.add(interaction), session.commit()

                interactor_a, interactor_b = '', ''
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]

                author, date, psimi_type, type = None, None, None, None
                confidences, psimi_detections, detections, pmids = [None], [
                    None
                ], [None], [None]
                if row['Publication 1st author(s)'] != '-':
                    author = row['Publication 1st author(s)'].split(' ')[0]
                    date = row['Publication 1st author(s)'].split('(')[1][:-1]
                if row['Interaction type(s)'] != '-':
                    type = row['Interaction type(s)'].split('(')[1][:-1]
                    if 'MI' in row['Interaction type(s)']:
                        psimi_type = row['Interaction type(s)'].split(
                            'MI:')[1][:4]
                if row['Publication Identifier(s)'] != '-':
                    del pmids[0]
                    for pmid in row['Publication Identifier(s)'].split('|'):
                        pmids.append(pmid.split(':')[1])
                if row['Interaction detection method(s)'] != '-':
                    del detections[0]
                    del psimi_detections[0]
                    for detection in row['Publication Identifier(s)'].split(
                            '|'):
                        detections.append(detection.split('(')[1][:-1])
                        psimi_detections.append(detection.split('MI:')[1][:4])

                for pmid in pmids:
                    for confidence in confidences:
                        for (detection,
                             psimi_detection) in zip(detections,
                                                     psimi_detections):
                            reference = InteractionReference(
                                interaction_id=interaction.id,
                                psimi_detection=psimi_detection,
                                detection_method=detection,
                                author_ln=author,
                                date=date,
                                psimi_type=psimi_type,
                                interaction_type=type,
                                psimi_db=row['Source database(s)'].split(
                                    'MI')[1][:4],
                                source_db=row['Source database(s)'].split(
                                    '(')[1][:-1],
                                confidence=confidence,
                                interactor_a=interactor_a,
                                interactor_b=interactor_b)
                            session.add(reference)

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source == 'iRefIndex').first()

                if source is None:
                    source = InteractionSource(interaction_id=interaction.id,
                                               data_source='iRefIndex')
                    session.add(source)
        session.commit()
        print(session.query(Interaction).count())