Exemple #1
0
def parse_ecoli_dip(session):
    with open('Ecoli/DIP.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            interactors = []

            ids_A = row['ID interactor A'].split('|')
            ids_B = row['ID interactor B'].split('|')
            refseq_A, uniprotkb_A, refseq_B, uniprotkb_B = '', '', '', ''
            for id in ids_A:
                fields = id.split(':')
                if fields[0] == 'refseq':
                    refseq_A = fields[1]
                elif fields[0] == 'uniprotkb':
                    uniprotkb_A = fields[1]
            for id in ids_B:
                fields = id.split(':')
                if fields[0] == 'refseq':
                    refseq_B = fields[1]
                elif fields[0] == 'uniprotkb':
                    uniprotkb_B = fields[1]

            orthologs_A, orthologs_B = [], []
            if uniprotkb_A != '':
                orthologs_A = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == uniprotkb_A).all()
            if (len(orthologs_A) == 0) & (refseq_A != ''):
                orthologs_A = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_refseq == refseq_A).all()
            if uniprotkb_B != '':
                orthologs_B = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == uniprotkb_B).all()
            if (len(orthologs_B) == 0) & (refseq_B != ''):
                orthologs_B = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_refseq == refseq_B).all()

            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    if (ortholog_A is not None) and (ortholog_B is not None):
                        if ortholog_A.strain_protein == ortholog_B.strain_protein:
                            interactors.append(
                                [[ortholog_A.protein, ortholog_A.ortholog_id],
                                 [ortholog_B.protein, ortholog_B.ortholog_id]])

            for interactor_pair in interactors:
                is_new = 0
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()
                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    is_new = 1
                    interaction = Interaction(
                        strain=interactor_pair[0][0].strain,
                        interactors=[
                            interactor_pair[0][0], interactor_pair[1][0]
                        ],
                        type='p-p',
                        ortholog_derived='fe')
                    session.add(interaction), session.commit()

                detections, pmids, types, list = [], [], [], []
                if row['Interaction detection method(s)'] != '-':
                    detections = row['Interaction detection method(s)'].split(
                        '|')
                    list.append(detections)
                if row['Publication Identifier(s)'] != '-':
                    pmids = row['Publication Identifier(s)'].split('|')
                    list.append(pmids)
                if row['Interaction type(s)'] != '-':
                    types = row['Interaction type(s)'].split('|')
                    list.append(types)

                interactor_a, interactor_b = '', ''
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]

                for num in range(0, len(list[0])):
                    type = types[num].split('(')[1][:-1]
                    pmid = pmids[num * 2].split('pubmed:')[1]
                    detection = detections[num].split('(')[1][:-1]
                    # there are more than one pmid sometimes
                    reference = InteractionReference(
                        interaction_id=interaction.id,
                        detection_method=detection,
                        pmid=pmid,
                        source_db=row['Source database(s)'].split('(')[1][:-1],
                        interactor_a=interactor_a,
                        interactor_b=interactor_b)
                    session.add(reference)

                    if is_new:
                        if interaction.is_experimental is None:
                            if is_experimental_psimi(
                                    row['Interaction detection method(s)'].
                                    split('MI:')[1][:4]):
                                interaction.is_experimental = 1
                            else:
                                interaction.is_experimental = 0
                        elif is_experimental_psimi(
                                row['Interaction detection method(s)'].split(
                                    'MI:')[1][:4]):
                            interaction.is_experimental = 1

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source == 'DIP').first()

                if source is None:
                    source = InteractionSource(interaction_id=interaction.id,
                                               data_source='DIP')
                    session.add(source)
        session.commit()
        print(session.query(Interaction).count())
Exemple #2
0
def parse_ecoli_mentha(session):
    with open('Ecoli/PSICQUIC/mentha.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            if (row['#ID(s) interactor A'] == '-') | (row['ID(s) interactor B']
                                                      == '-'):
                continue
            interactors = []
            orthologs_A = session.query(OrthologEcoli).filter(
                OrthologEcoli.ortholog_uniprot ==
                row['#ID(s) interactor A'].split(':')[1]).all()
            orthologs_B = session.query(OrthologEcoli).filter(
                OrthologEcoli.ortholog_uniprot ==
                row['ID(s) interactor B'].split(':')[1]).all()

            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    if (ortholog_A is not None) and (ortholog_B is not None):
                        if ortholog_A.strain_protein == ortholog_B.strain_protein:
                            interactors.append(
                                [[ortholog_A.protein, ortholog_A.ortholog_id],
                                 [ortholog_B.protein, ortholog_B.ortholog_id]])

            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()
                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    interaction = Interaction(
                        strain=interactor_pair[0][0].strain,
                        interactors=[
                            interactor_pair[0][0], interactor_pair[1][0]
                        ],
                        type=(interactor_pair[0][0].type + '-' +
                              interactor_pair[1][0]),
                        ortholog_derived='fe')
                    #ask about marking ecoli ortholog interactions as experimental!!
                    if 'MI:' in row['Interaction detection method(s)']:
                        #iterate through all methods
                        if is_experimental_psimi(
                                row['Interaction detection method(s)'].split(
                                    'MI:')[1][:4]):
                            interaction.is_experimental = 1
                    session.add(interaction), session.commit()

                interactor_a, interactor_b = '', ''
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]
                reference = InteractionReference(
                    interaction_id=interaction.id,
                    psimi_detection=row['Interaction detection method(s)'].
                    split('MI:')[1][:4],
                    detection_method=row['Interaction detection method(s)'].
                    split('(')[1][:-1],
                    pmid=row['Publication Identifier(s)'].split('pubmed:')[1],
                    psimi_type=row['Interaction type(s)'].split('MI:')[1][:4],
                    interaction_type=row['Interaction type(s)'].split(
                        '(')[1][:-1],
                    psimi_db=row['Source database(s)'].split('MI:')[1][:4],
                    source_db=row['Source database(s)'].split('(')[1][:-1],
                    confidence_score=row['Confidence value(s)'])
                session.add(reference)

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source == 'mentha').first()

                if source is None:
                    source = InteractionSource(interaction_id=interaction.id,
                                               data_source='mentha')
                    session.add(source)

        session.commit()
        print(session.query(Interaction).count())
Exemple #3
0
def parse_mpidb(session):
    with open('PAO1/PSICQUIC/MPIDB.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            interactors = []

            if (row['Taxid interactor A'].split('|')[0] != 'taxid:208964(pseae)') |\
                    (row['Taxid interactor B'].split('|')[0] != 'taxid:208964(pseae)'):
                continue

            A_id = row['#ID(s) interactor A'].split(':')[1]
            B_id = row['ID(s) interactor B'].split(':')[1]

            if session.query(Interactor).filter(
                    Interactor.id == A_id).first() is not None:
                interactors.append(
                    session.query(Interactor).filter(
                        Interactor.id == A_id).one())
            elif session.query(Protein).filter(
                    Protein.uniprotkb == A_id).first() is not None:
                interactors.append(
                    session.query(Protein).filter(
                        Protein.uniprotkb == A_id).one())

            if session.query(Interactor).filter(
                    Interactor.id == B_id).first() is not None:
                interactors.append(
                    session.query(Interactor).filter(
                        Interactor.id == B_id).one())
            elif session.query(Protein).filter(
                    Protein.uniprotkb == B_id).first() is not None:
                interactors.append(
                    session.query(Protein).filter(
                        Protein.uniprotkb == B_id).one())

            if len(interactors) != 2: continue
            homogenous = (interactors[0] == interactors[1])

            interaction = session.query(Interaction).filter(
                (Interaction.interactors.contains(interactors[0])),
                (Interaction.interactors.contains(interactors[1])),
                (Interaction.homogenous == homogenous)).first()
            if interaction is None:
                type = interactors[0].type + '-' + interactors[1].type
                interaction = Interaction(strain='PAO1',
                                          type=type,
                                          homogenous=homogenous,
                                          interactors=interactors)
                if is_experimental_psimi(
                        row['Interaction detection method(s)'].split(
                            'MI:')[1][:4]):
                    interaction.is_experimental = 1
                else:
                    interaction.is_experimental = 0
                session.add(interaction), session.commit()
            else:
                if is_experimental_psimi(
                        row['Interaction detection method(s)'].split(
                            'MI:')[1][:4]):
                    interaction.is_experimental = 1

            reference = InteractionReference(
                interaction_id=interaction.id,
                detection_method=row['Interaction detection method(s)'].split(
                    '(')[1][:-1],
                author_ln=row['Publication 1st author(s)'].split(' ')[0],
                pub_date=row['Publication 1st author(s)'].split('(')[1][:-1],
                pmid=row['Publication Identifier(s)'].split('pubmed:')[1][:8],
                confidence=row['Confidence value(s)'],
                interaction_type=row['Interaction type(s)'].split('(')[1][:-1],
                source_db=row['Source database(s)'])
            session.add(reference)

            for xref in row['Interaction identifier(s)'].split('|'):
                xref_field = xref.split(':')
                xref = session.query(InteractionXref).filter(
                    InteractionXref.accession == xref_field[1],
                    InteractionXref.interaction_id == interaction.id).first()

                if xref is None:
                    xref = InteractionXref(interaction_id=interaction.id,
                                           accession=xref_field[1],
                                           data_source=xref_field[0])
                    session.add(xref)

            source = session.query(InteractionSource).filter(
                InteractionSource.interaction_id == interaction.id,
                InteractionSource.data_source == 'MPIDB').first()

            if source is None:
                source = InteractionSource(interaction_id=interaction.id,
                                           data_source='MPIDB')
                session.add(source)
        session.commit()
        print(session.query(Interaction).count())
Exemple #4
0
def parse_ecoli_uniprot(session):
    with open('Ecoli/PSICQUIC/UniProt.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            interactors = []
            orthologs_B = []
            id_B = row['ID(s) interactor B'].split(':')
            if id_B[0] == 'uniprotkb':
                orthologs_B = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == id_B[1]).all()

            if len(orthologs_B) == 0: continue

            orthologs_A = []
            metabolite = None
            id_A = row['#ID(s) interactor A'].split(':')
            if id_A[0] == 'uniprotkb':
                orthologs_A = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == id_A[1]).all()
            elif id_A[0] == 'chebi':
                metabolite = session.query(Metabolite).filter(
                    Metabolite.chebi == id_A[1]).first()
                if metabolite is None:
                    metabolite = Metabolite(id=id_A[1], chebi=id_A[1])
                    session.add(metabolite), session.commit()

            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    if (ortholog_A is not None) and (ortholog_B is not None):
                        if ortholog_A.strain_protein == ortholog_B.strain_protein:
                            interactors.append(
                                [[ortholog_A.protein, ortholog_A.ortholog_id],
                                 [ortholog_B.protein, ortholog_B.ortholog_id]])

            if metabolite is not None:
                for ortholog_B in orthologs_B:
                    interactors.append(
                        [[metabolite, metabolite.id],
                         [ortholog_B.protein, ortholog_B.ortholog_id]])

            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()
                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    interaction = Interaction(
                        strain=interactor_pair[0][0].strain,
                        interactors=[
                            interactor_pair[0][0], interactor_pair[1][0]
                        ],
                        type=(interactor_pair[0][0].type + '-' +
                              interactor_pair[1][0]),
                        ortholog_derived='fe')
                    if 'MI:' in row['Interaction detection method(s)']:
                        if is_experimental_psimi(
                                row['Interaction detection method(s)'].split(
                                    'MI:')[1][:4]):
                            interaction.is_experimental = 1
                    session.add(interaction), session.commit()

                interactor_a, interactor_b = None, None
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]

                reference = InteractionReference(
                    interaction_id=interaction.id,
                    psimi_detection=row['Interaction detection method(s)'].
                    split('MI:')[1][:4],
                    detection_method=row['Interaction detection method(s)'].
                    split('(')[1][:-1],
                    author_ln=row['Publication 1st author(s)'].split(' ')[0],
                    pub_date=row['Publication 1st author(s)'].split(
                        '(')[1][:-1],
                    pmid=row['Publication Identifier(s)'].split(
                        'pubmed:')[1].split('|')[0],
                    psimi_type=row['Interaction type(s)'].split('MI:')[1][:4],
                    interaction_type=row['Interaction type(s)'].split(
                        '(')[1][:-1],
                    psimi_db=row['Source database(s)'].split('MI:')[1][:4],
                    source_db=row['Source database(s)'].split('(')[1][:-1],
                    confidence_score=row['Confidence value(s)'],
                    interactor_a_id=interactor_a,
                    interactor_b_id=interactor_b)
                session.add(reference)

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source == 'UniProt').first()

                if source is None:
                    source = InteractionSource(interaction_id=interaction.id,
                                               data_source='UniProt')
                    session.add(source)

        session.commit()
        print(session.query(Interaction).count())
Exemple #5
0
def parse_mentha(file, strain, taxid, session):
    with open(file) as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            interactors = []

            if ((row['Taxid interactor A'].split('|')[0] != taxid) |
                    (row['Taxid interactor B'].split('|')[0] != taxid)): continue

            A_id = row['#ID(s) interactor A'].split(':')[1]
            B_id = row['ID(s) interactor B'].split(':')[1]

            if session.query(Interactor).filter(Interactor.id == A_id).first() is not None:
                interactors.append(session.query(Interactor).filter(Interactor.id == A_id).one())
            elif session.query(Protein).filter(Protein.uniprotkb == A_id).first() is not None:
                interactors.append(session.query(Protein).filter(Protein.uniprotkb == A_id).one())

            if session.query(Interactor).filter(Interactor.id == B_id).first() is not None:
                interactors.append(session.query(Interactor).filter(Interactor.id == B_id).one())
            elif session.query(Protein).filter(Protein.uniprotkb == B_id).first() is not None:
                interactors.append(session.query(Protein).filter(Protein.uniprotkb == B_id).one())

            if len(interactors) != 2: continue
            homogenous = (interactors[0] == interactors[1])

            interaction = session.query(Interaction).filter(Interaction.interactors.contains(interactors[0]),
                                                            Interaction.interactors.contains(interactors[1]),
                                                            Interaction.homogenous == homogenous).first()
            if interaction is None:
                type=(interactors[0].type + '-' + interactors[1].type)
                interaction = Interaction(strain=strain, type=type, homogenous=homogenous, interactors=interactors)
                if is_experimental_psimi(row['Interaction detection method(s)'].split('MI:')[1][:4]):
                    interaction.is_experimental = 1
                else:
                    interaction.is_experimental = 0
                session.add(interaction), session.commit()
            else:
                if is_experimental_psimi(row['Interaction detection method(s)'].split('MI:')[1][:4]):
                    interaction.is_experimental = 1

            reference = InteractionReference(interaction_id=interaction.id,
                                             detection_method=row['Interaction detection method(s)'].split('(')[1][:-1],
                                             pmid=row['Publication Identifier(s)'].split('pubmed:')[1][:8],
                                             interaction_type=row['Interaction type(s)'].split('(')[1][:-1],
                                             source_db=row['Source database(s)'].split('(')[1][:-1],
                                             confidence_score=row['Confidence value(s)'])
            session.add(reference)

            xref_field = row['Interaction identifier(s)'].split(':')
            xref = session.query(InteractionXref).filter(InteractionXref.accession == xref_field[1],
                                                         InteractionXref.interaction_id == interaction.id).first()

            if xref is None:
                xref = InteractionXref(interaction_id=interaction.id, accession=xref_field[1],
                                       data_source=xref_field[0])
                session.add(xref)

            source = session.query(InteractionSource).filter(InteractionSource.interaction_id == interaction.id,
                                                             InteractionSource.data_source == 'mentha').first()

            if source is None:
                source = InteractionSource(interaction_id=interaction.id, data_source='mentha')
                session.add(source)
        print(session.query(Interaction).count())
Exemple #6
0
def parse_irefindex(file, strain, taxid, session):
    with open(file) as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            interactors = []

            if ((row['Taxid interactor A'].split('|')[0] != taxid) |
                    (row['Taxid interactor B'].split('|')[0] != taxid)): continue

            A_id = row['#ID(s) interactor A'].split(':')
            B_id = row['ID(s) interactor B'].split(':')

            if A_id[0] == 'uniprotkb':
                if session.query(Interactor).filter(Interactor.id == A_id[1]).first() is not None:
                    interactors.append(session.query(Interactor).filter(Interactor.id == A_id[1]).one())
                elif session.query(Protein).filter(Protein.uniprotkb == A_id[1]).first() is not None:
                    interactors.append(session.query(Protein).filter(Protein.uniprotkb == A_id[1]).one())
            elif A_id[0] == 'refseq':
                if session.query(Protein).filter(Protein.ncbi_acc == A_id[1]).first() is not None:
                    interactors.append(session.query(Protein).filter(Protein.ncbi_acc == A_id[1]).one())

            if B_id[0] == 'uniprotkb':
                if session.query(Interactor).filter(Interactor.id == B_id[1]).first() is not None:
                    interactors.append(session.query(Interactor).filter(Interactor.id == B_id[1]).one())
                elif session.query(Protein).filter(Protein.uniprotkb == B_id[1]).first() is not None:
                    interactors.append(session.query(Protein).filter(Protein.uniprotkb == B_id[1]).one())
            elif B_id[0] == 'refseq':
                if session.query(Protein).filter(Protein.ncbi_acc == B_id[1]).first() is not None:
                    interactors.append(session.query(Protein).filter(Protein.ncbi_acc == B_id[1]).one())

            if len(interactors) != 2: continue
            homogenous = (interactors[0] == interactors[1])

            interaction = session.query(Interaction).filter((Interaction.interactors.contains(interactors[0])),
                                                            (Interaction.interactors.contains(interactors[1])),
                                                            (Interaction.homogenous == homogenous)).first()
            if interaction is None:
                type = interactors[0].type + '-' + interactors[1].type
                interaction = Interaction(strain=strain, type=type, homogenous=homogenous, interactors=interactors)
                if row['Interaction detection method(s)'] != '-':
                    if is_experimental_psimi(row['Interaction detection method(s)'].split('MI:')[1][:4]):
                        interaction.is_experimental = 1
                    else:
                        interaction.is_experimental = 0
            else:
                if is_experimental_psimi(row['Interaction detection method(s)'].split('MI:')[1][:4]):
                    interaction.is_experimental = 1
                elif (row['Interaction detection method(s)'] == '-') and (interaction.is_experimental == 0):
                    interaction.is_experimental = None

            author, date, type= None, None, None
            pmids, detections = [None], [None]
            if row['Interaction detection method(s)'] != '-':
                del detections[0]
                for method in row['Interaction detection method(s)'].split('|'):
                    detections.append(method.split('(')[1][:-1])
            if (row['Interaction type(s)'] != '-'):
                type = row['Interaction type(s)'].split('(')[1][:-1]
            if (row['Publication 1st author(s)'] != '-'):
                author = row['Publication 1st author(s)'].split('-')[0][0].upper() + \
                         row['Publication 1st author(s)'].split('-')[0][1:]
                date = row['Publication 1st author(s)'].split('-')[1]
            if (row['Publication Identifier(s)'] != '-'):
                del pmids[0]
                for pmid in row['Publication Identifier(s)'].split('|'):
                    pmids.append(pmid.split('pubmed:')[1][:8])

            for pmid in pmids:
                for detection in detections:
                    reference = InteractionReference(interaction_id=interaction.id,
                                                     detection_method=detection,
                                                     author_ln=author,
                                                     pub_date=date,
                                                     pmid=pmid,
                                                     interaction_type=type,
                                                     source_db=row['Source database(s)'].split('(')[1][:-1],
                                                     confidence_score=row['Confidence value(s)'])
                    session.add(reference)

            for xref in row['Interaction identifier(s)'].split('|'):
                xref_field = xref.split(':')
                xref = session.query(InteractionXref).filter(InteractionXref.accession == xref_field[1],
                                                             InteractionXref.interaction_id == interaction.id).first()

                if xref is None:
                    xref = InteractionXref(interaction_id=interaction.id, accession=xref_field[1],
                                           data_source=xref_field[0])
                    session.add(xref)

            source = session.query(InteractionSource).filter(InteractionSource.interaction_id == interaction.id,
                                                             InteractionSource.data_source == 'iRefIndex').first()

            if source is None:
                source = InteractionSource(interaction_id=interaction.id, data_source='iRefIndex')
                session.add(source)
        print(session.query(Interaction).count())
Exemple #7
0
def parse_ecoli_bindingdb(session):
    with open('Data/Ecoli/PSICQUIC/BindingDB.txt') as csvfile:
        reader = csv.DictReader(csvfile)

        # iterate through each interaction
        for row in reader:
            uniprot_protein = None

            # check if interactor B has uniprot ID
            if 'uniprotkb' in row['ID(s) interactor B']:
                uniprot_protein = row['ID(s) interactor B'].split(
                    'uniprotkb:')[1].split('|')[0]

            if uniprot_protein is None: continue

            orthologs = []
            for ecoli_ortholog in session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == uniprot_protein).all():
                if ecoli_ortholog is not None:
                    orthologs.append(
                        [ecoli_ortholog.protein, ecoli_ortholog.ortholog_id])

            if len(orthologs) == 0: continue

            ids_metabolite = row['#ID(s) interactor A'].split('|')
            chebi_metabolite, pubchem_metabolite = None, None

            # check if interactor A has ChEBI id
            for id in ids_metabolite:
                if id.split(':')[0] == 'chebi':
                    chebi_metabolite = id.split(':')[1][1:-1]

            metabolite = None

            # if interactor A has ChEBI id, query for matching metabolite
            if chebi_metabolite is not None:
                metabolite = session.query(Metabolite).filter(
                    Metabolite.chebi == chebi_metabolite).first()

            # if unable to identify metabolite based on ChEBI id, try using pubchem id
            if metabolite is None:
                alt_ids_metabolite = row['Alt. ID(s) interactor A'].split('|')

                for id in alt_ids_metabolite:
                    if id.split(':')[0] == 'pubchem':
                        pubchem_metabolite = id.split(':')[1]

                metabolite = session.query(Metabolite).filter(
                    Metabolite.id == pubchem_metabolite).first()

            # if unable to find interactor A in database, create new metabolite
            if metabolite is None:
                metabolite = Metabolite(id=pubchem_metabolite,
                                        pubchem=pubchem_metabolite,
                                        chebi=chebi_metabolite)
                session.add(metabolite), session.commit()

            for interactor in orthologs:
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor[0]),
                    Interaction.interactors.contains(metabolite)).first()

                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    interaction = Interaction(
                        strain=interactor.strain,
                        interactors=[metabolite, interactor[0]],
                        type='p-m',
                        ortholog_derived='fe')
                    # should ortholog interactions be marked as experimental?
                    if is_experimental_psimi(
                            row['Interaction detection method(s)'].split(
                                'MI:')[1][:4]):
                        interaction.is_experimental = 1
                    session.add(interaction), session.commit()

                interactor_a, interactor_b = '', ''
                if interaction.interactors[0] == metabolite:
                    interactor_a = metabolite.id
                    interactor_b = interactor[1]
                else:
                    interactor_b = metabolite.id
                    interactor_a = interactor[1]

                author, date, pmid = None, None, None

                if row['Publication 1st author(s)'] != '-':
                    author = row['Publication 1st author(s)'].split(' ')[0]
                    date = row['Publication 1st author(s)'].split('(')[1][:-1]
                if 'pubmed:' in row['Publication Identifier(s)']:
                    pmid = row['Publication Identifier(s)'].split(
                        'pubmed:')[1][:8]

                reference = InteractionReference(
                    interaction_id=interaction.id,
                    detection_method=row['Interaction detection method(s)'].
                    split('(')[1][:-1],
                    author_ln=author,
                    pmid=pmid,
                    pub_date=date,
                    interaction_type=row['Interaction type(s)'].split(
                        '(')[1][:-1],
                    source_db=row['Source database(s)'].split('(')[1][:-1],
                    confidence=row['Confidence value(s)'].split('(')[0],
                    interactor_a=interactor_a,
                    interactor_b=interactor_b)

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source == 'BindingDB').first()

                if source is None:
                    source = InteractionSource(interaction_id=interaction.id,
                                               data_source='BindingDB')
                    session.add(source)
                session.add(reference)
        session.commit()
def parse_ecoli_ebi_goa_nonintact(session):
    with open('Ecoli/PSICQUIC/EBI-GOA-nonIntAct.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            interactors = []
            uniprot_A, uniprot_B = None, None
            if 'uniprotkb:' in row['#ID(s) interactor A']:
                uniprot_A = row['#ID(s) interactor A'].split('uniprotkb:')[1]
            if 'uniprotkb:' in row['ID(s) interactor B']:
                uniprot_B = row['ID(s) interactor B'].split('uniprotkb:')[1]

            if (uniprot_A is None) | (uniprot_B is None): continue

            orthologs_A = session.query(OrthologEcoli).filter(
                OrthologEcoli.ortholog_uniprot == uniprot_A).all()
            orthologs_B = session.query(OrthologEcoli).filter(
                OrthologEcoli.ortholog_uniprot == uniprot_B).all()
            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    if (ortholog_A is not None) and (ortholog_B is not None):
                        if ortholog_A.strain_protein == ortholog_B.strain_protein:
                            interactors.append(
                                [[ortholog_A.protein, ortholog_A.ortholog_id],
                                 [ortholog_B.protein, ortholog_B.ortholog_id]])

            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()
                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    interaction = Interaction(
                        strain=interactor_pair[0][0].strain,
                        interactors=interactor_pair,
                        type='p-p',
                        ortholog_derived='fe')
                    if is_experimental_psimi(
                            row['Interaction detection method(s)'].split(
                                'MI:')[1][:4]):
                        interaction.is_experimental = 1
                    session.add(interaction), session.commit()

                interactor_a, interactor_b = '', ''
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]

                reference = InteractionReference(
                    interaction_id=interaction.id,
                    detection_method=row['Interaction detection method(s)'].
                    split('(')[1][:-1],
                    author_ln=row['Publication 1st author(s)'].split(' ')[0],
                    pub_date=row['Publication 1st author(s)'].split('(')[1],
                    pmid=row['Publication Identifier(s)'].split('pubmed:')[1],
                    interaction_type=row['Interaction type(s)'].split(
                        '(')[1][:-1],
                    source_db=row['Source database(s)'].split('(')[1][:-1],
                    interactor_a_id=row['#ID(s) interactor A'].split(':')[1],
                    interactor_b_id=row['ID(s) interactor B'].split(':')[1])
                session.add(reference)

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source ==
                    'EBI-GOA non-IntAct').first()

                if source is None:
                    source = InteractionSource(
                        interaction_id=interaction.id,
                        data_source='EBI-GOA non-IntAct')
                    session.add(source)
        session.commit()
        print(session.query(Interaction).count())
Exemple #9
0
def parse_ecoli_irefindex(session):
    with open('Ecoli/PSICQUIC/iRefIndex.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            if (row['#ID(s) interactor A'] == '-') | (row['ID(s) interactor B']
                                                      == '-'):
                continue
            interactors = []

            orthologs_A = []
            id_A = row['#ID(s) interactor A'].split(':')
            if id_A[0] == 'uniprotkb':
                orthologs_A = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == id_A[1]).all()
            elif id_A[0] == 'refseq':
                orthologs_A = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_refseq == id_A[1]).all()

            if len(orthologs_A) == 0: continue

            orthologs_B = []
            id_B = row['ID(s) interactor B'].split(':')
            if id_B[0] == 'uniprotkb':
                orthologs_B = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == id_B[1]).all()
            elif id_B[0] == 'refseq':
                orthologs_B = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_refseq == id_B[1]).all()

            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    if (ortholog_A is not None) and (ortholog_B is not None):
                        if ortholog_A.strain_protein == ortholog_B.strain_protein:
                            interactors.append(
                                [[ortholog_A.protein, ortholog_A.ortholog_id],
                                 [ortholog_B.protein, ortholog_B.ortholog_id]])

            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()
                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    interaction = Interaction(
                        strain=interactor_pair[0][0].strain,
                        interactors=[
                            interactor_pair[0][0], interactor_pair[1][0]
                        ],
                        type=(interactor_pair[0][0].type + '-' +
                              interactor_pair[1][0]),
                        ortholog_derived='fe')
                    if 'MI:' in row['Interaction detection method(s)']:
                        #iterate through all methods
                        if is_experimental_psimi(
                                row['Interaction detection method(s)'].split(
                                    'MI:')[1][:4]):
                            interaction.is_experimental = 1
                    session.add(interaction), session.commit()

                interactor_a, interactor_b = '', ''
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]

                author, date, psimi_type, type = None, None, None, None
                confidences, psimi_detections, detections, pmids = [None], [
                    None
                ], [None], [None]
                if row['Publication 1st author(s)'] != '-':
                    author = row['Publication 1st author(s)'].split(' ')[0]
                    date = row['Publication 1st author(s)'].split('(')[1][:-1]
                if row['Interaction type(s)'] != '-':
                    type = row['Interaction type(s)'].split('(')[1][:-1]
                    if 'MI' in row['Interaction type(s)']:
                        psimi_type = row['Interaction type(s)'].split(
                            'MI:')[1][:4]
                if row['Publication Identifier(s)'] != '-':
                    del pmids[0]
                    for pmid in row['Publication Identifier(s)'].split('|'):
                        pmids.append(pmid.split(':')[1])
                if row['Interaction detection method(s)'] != '-':
                    del detections[0]
                    del psimi_detections[0]
                    for detection in row['Publication Identifier(s)'].split(
                            '|'):
                        detections.append(detection.split('(')[1][:-1])
                        psimi_detections.append(detection.split('MI:')[1][:4])

                for pmid in pmids:
                    for confidence in confidences:
                        for (detection,
                             psimi_detection) in zip(detections,
                                                     psimi_detections):
                            reference = InteractionReference(
                                interaction_id=interaction.id,
                                psimi_detection=psimi_detection,
                                detection_method=detection,
                                author_ln=author,
                                date=date,
                                psimi_type=psimi_type,
                                interaction_type=type,
                                psimi_db=row['Source database(s)'].split(
                                    'MI')[1][:4],
                                source_db=row['Source database(s)'].split(
                                    '(')[1][:-1],
                                confidence=confidence,
                                interactor_a=interactor_a,
                                interactor_b=interactor_b)
                            session.add(reference)

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source == 'iRefIndex').first()

                if source is None:
                    source = InteractionSource(interaction_id=interaction.id,
                                               data_source='iRefIndex')
                    session.add(source)
        session.commit()
        print(session.query(Interaction).count())