Esempio n. 1
0
def parse_pseudomonas(session):
    # get all the kegg compounds if they were not obtained yet
    if not kegg_compounds:
        get_kegg_compounds()
    # add PAO1 and PA14 KEGG sources
    # Note: is_experimental is set to 2 since no way to deteremine if detection method was experimental or not
    source_PAO1 = InteractionSource(data_source='KEGG(PAO1)', is_experimental=2)
    source_PA14 = InteractionSource(data_source='KEGG(PA14)', is_experimental=2)
    session.add(source_PAO1), session.add(source_PA14), session.commit()
    # parse PAO1 and PA14 Kegg interactions
    parse_kegg('pae', 'PAO1', 'KEGG(PAO1)', session)
    parse_kegg('pau', 'PA14', 'KEGG(PA14)', session)
Esempio n. 2
0
def parse(session):
    with open('Data/PAO1/Zhang.csv') as csvfile:
        reader = csv.DictReader(csvfile)

        source = InteractionSource(data_source='Zhang', is_experimental=0)
        session.add(source), session.commit()

        for row in reader:
            if float(row['Confidence']) < 0.9: continue

            interactor_A = session.query(Interactor).get(row['Protein1'])
            if interactor_A is None: continue

            interactor_B = session.query(Interactor).get(row['Protein2'])
            if interactor_B is None: continue

            homogenous = (interactor_A == interactor_B)
            interaction = session.query(Interaction).filter(
                Interaction.interactors.contains(interactor_A),
                Interaction.interactors.contains(interactor_B),
                Interaction.homogenous == homogenous).first()
            if interaction is None:
                interaction = Interaction(
                    strain='PAO1',
                    homogenous=homogenous,
                    type='p-p',
                    interactors=[interactor_A, interactor_B])
                interaction.sources.append(source)
                session.add(interaction), session.commit()
            elif source not in interaction.sources:
                interaction.sources.append(source)

            reference = session.query(InteractionReference).filter_by(
                detection_method='computational prediction',
                pmid='22848443',
                interaction_type='predicted',
                confidence=row['Confidence'],
                comment=row['Comment']).first()
            if reference is None:
                reference = InteractionReference(
                    detection_method='computational prediction',
                    author_ln='Zhang',
                    pub_date='2012',
                    pmid='22848443',
                    interaction_type='predicted',
                    confidence=row['Confidence'],
                    comment=row['Comment'])
                interaction.references.append(reference)
                reference.sources.append(source)
            else:
                if reference not in interaction.references:
                    interaction.references.append(reference)
                if source not in reference.sources:
                    reference.sources.append(source)
    session.commit()
    print('zhang', session.query(Interaction).count())
Esempio n. 3
0
def parse(session):
    # parse ecocyc paths from file from EcoCyc and pyut into ecocyc_paths
    get_ecocyc_paths()
    # parse compounds from ecocyc interactor files, put into ecocyc_compounds
    get_ecocyc_compounds(session)
    update_metabolite_info_ecocyc(session)
    # create and add new source for EcoCyc (no references for any interactions so is_experimental = 2)
    source = InteractionSource(data_source='EcoCyc', is_experimental=2)
    session.add(source), session.commit()
    # parse PAO1 and PA14 separately
    parse_ecocyc('PAO1', session)
    parse_ecocyc('PA14', session)
Esempio n. 4
0
def parse_ecoli(session):
    # get all kegg compounds if they were not obtained yet
    if not kegg_compounds:
        get_kegg_compounds()
    # update metabolite info for existing metabolites which may be missing ids
    update_metabolite_info_kegg(session)
    # add Ecoli KEGG source
    # Note: is_experimental is set to 2 since no way to determine if detection method was experimental or not
    source = InteractionSource(data_source='KEGG(Ecoli)', is_experimental=2)
    session.add(source), session.commit()
    # parse kegg interactions from Ecoli, doing PAO1 and PA14 ortholog mapping separately
    parse_kegg('eco', 'PAO1', 'KEGG(Ecoli)', session)
    parse_kegg('eco', 'PA14', 'KEGG(Ecoli)', session)
Esempio n. 5
0
def parse(session):
    with open('Data/PAO1/xlinkdb.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')

        reference = InteractionReference(
            detection_method='chemical cross-linking mass spectrometry',
            interaction_type='physical association',
            author_ln='Navari',
            pub_date='2015',
            pmid='25800553',
            source_db='xlinkdb')
        source = InteractionSource(data_source='XLinkDB', is_experimental=1)
        source.references.append(reference)
        session.add(source), session.add(reference), session.commit()

        for row in reader:

            interactor_A = session.query(Interactor).get(row['proA'])
            if interactor_A is None:
                interactor_A = session.query(Protein).filter_by(
                    uniprotkb=row['proA']).first()
            if interactor_A is None: continue

            interactor_B = session.query(Interactor).get(row['proB'])
            if interactor_B is None:
                interactor_B = session.query(Protein).filter_by(
                    uniprotkb=row['proB']).first()
            if interactor_B is None: continue

            homogenous = (interactor_A == interactor_B)
            interaction = session.query(Interaction).filter(
                Interaction.interactors.contains(interactor_A),
                Interaction.interactors.contains(interactor_B),
                Interaction.homogenous == homogenous).first()
            if interaction is None:
                interaction = Interaction(
                    strain='PAO1',
                    homogenous=homogenous,
                    type='p-p',
                    interactors=[interactor_A, interactor_B])
                interaction.references.append(reference)
                interaction.sources.append(source)
                session.add(interaction), session.commit()
            else:
                if reference not in interaction.references:
                    interaction.references.append(reference)
                if source not in interaction.sources:
                    interaction.sources.append(source)

    session.commit()
    print('xlinkdb', session.query(Interaction).count())
Esempio n. 6
0
def parse(session):
    with open('Data/PAO1/GeoffWinsor.csv') as csvfile:
        reader = csv.DictReader(csvfile)

        source = InteractionSource(data_source='Geoff', is_experimental=1)
        session.add(source), session.commit()

        for row in reader:
            interactor_A = session.query(Interactor).get(row['locus_tag'])
            if interactor_A is None: continue
            row = next(reader)
            interactor_B = session.query(Interactor).get(row['locus_tag'])
            if interactor_B is None: continue

            homogenous = (interactor_A == interactor_B)
            interaction = session.query(Interaction).filter(Interaction.interactors.contains(interactor_A),
                                                            Interaction.interactors.contains(interactor_B),
                                                            Interaction.homogenous == homogenous).first()
            if interaction is None:
                interaction = Interaction(strain='PAO1', homogenous=homogenous , type='p-p',
                                          interactors = [interactor_A, interactor_B])
                interaction.sources.append(source)
                session.add(interaction), session.commit()
            elif source not in interaction.sources:
                interaction.sources.append(source)

            reference = session.query(InteractionReference).filter_by(detection_method=row['experimental_type'],
                                                                      pmid=row['pmid']).first()

            if reference is None:
                reference = InteractionReference(detection_method=row['experimental_type'], pmid=row['pmid'])
                interaction.references.append(reference)
                reference.sources.append(source)
            else:
                if interaction not in reference.interactions:
                    reference.interactions.append(interaction)
                if source not in reference.sources:
                    reference.sources.append(source)

    session.commit()
    print('geoff', session.query(Interaction).count())
Esempio n. 7
0
def parse(session):
    with open('Data/Ecoli/RegulonDB.csv') as csvfile:
        reader = csv.DictReader(csvfile)
        # since all the interactions from here will use the same source, create and add it at the beginning
        # Note: since no references are available, is_experimental is set to 2
        source = InteractionSource(data_source='RegulonDB(Ecoli)',
                                   is_experimental=2)
        session.add(source), session.commit()

        for row in reader:
            interactors = []

            orthologs_A = session.query(OrthologEcoli).filter_by(
                ortholog_name=(row['TF name'][0].lower() +
                               row['TF name'][1:])).all()
            # if no orthologs for first interactor were found, skip to next interaction
            if orthologs_A is None: continue
            orthologs_B = session.query(OrthologEcoli).filter_by(
                ortholog_name=row['Regulated gene']).all()
            # if no orthologs for second interactor were found, skip to next interaction
            if orthologs_B is None: continue

            # iterate through each ortholog in ortholog A and B to create interactor pairs from their
            # respective pseudomonas proteins
            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    # only add the pseudomonas interactors if their strains match
                    if ortholog_A.strain_protein == ortholog_B.strain_protein:
                        # make sure to add ortholog id for creating the interaction reference later
                        interactors.append(
                            [[ortholog_A.protein, ortholog_A.ortholog_id],
                             [ortholog_B.protein, ortholog_B.ortholog_id]])

            # iterate through each interactor pair, create a new interaction if it doesnt exist yet
            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()

                if interaction is None:
                    # if interaction is None, make ortholog_derived = Ecoli and add source to interaction sources
                    interaction = Interaction(
                        strain=interactor_pair[0][0].strain,
                        interactors=[
                            interactor_pair[0][0], interactor_pair[1][0]
                        ],
                        type='p-p',
                        ortholog_derived='Ecoli')
                    interaction.sources.append(source)
                    session.add(interaction), session.commit()
                elif source not in interaction.sources:
                    interaction.sources.append(source)

                # in case the interaction already existed, make sure interactor_a and interactor_b variables for
                # new interaction reference match up with the first and second interactors of the existing
                # interaction (so it's easy to see which Pseudomonas interactor matches up with which Ecoli
                # ortholog)
                interactor_a, interactor_b = None, None
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]

                type = 'TF/sigma-binding site (' + row[
                    'Regulatory effect'] + 'regulation)'
                comment = interactor_pair[0][1] + ' regulates(' + row[
                    'Regulatory effect'] + ') ' + interactor_pair[1][1]
                # create a reference for each evidence type listed for interaction
                for evidence in row['Evidence'][1:-1].split(', '):
                    # check if interaction reference already exists in db
                    reference = session.query(InteractionReference).filter_by(
                        detection_method=evidence,
                        interaction_type=type,
                        source_db='regulondb',
                        confidence=row['Evidence type'],
                        comment=comment,
                        interactor_a=interactor_a,
                        interactor_b=interactor_b).first()
                    if reference is None:
                        # if reference is None, add reference to interaction references list and add source
                        # to reference sources list
                        reference = InteractionReference(
                            detection_method=evidence,
                            interaction_type=type,
                            comment=comment,
                            source_db='regulondb',
                            confidence=row['Evidence type'],
                            interactor_a=interactor_a,
                            interactor_b=interactor_b)
                        interaction.references.append(reference)
                        reference.sources.append(source)
                    # if reference exists, check that its interactions contains interaction, and sources contains
                    # source, and add if they are not present
                    else:
                        if interaction not in reference.interactions:
                            interaction.references.append(reference)
                        if source not in reference.sources:
                            reference.sources.append(source)

    session.commit()
    print('regulondb', session.query(Interaction).count())
Esempio n. 8
0
def parse_ecoli_uniprot(session):
    with open('Ecoli/PSICQUIC/UniProt.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            interactors = []
            orthologs_B = []
            id_B = row['ID(s) interactor B'].split(':')
            if id_B[0] == 'uniprotkb':
                orthologs_B = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == id_B[1]).all()

            if len(orthologs_B) == 0: continue

            orthologs_A = []
            metabolite = None
            id_A = row['#ID(s) interactor A'].split(':')
            if id_A[0] == 'uniprotkb':
                orthologs_A = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == id_A[1]).all()
            elif id_A[0] == 'chebi':
                metabolite = session.query(Metabolite).filter(
                    Metabolite.chebi == id_A[1]).first()
                if metabolite is None:
                    metabolite = Metabolite(id=id_A[1], chebi=id_A[1])
                    session.add(metabolite), session.commit()

            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    if (ortholog_A is not None) and (ortholog_B is not None):
                        if ortholog_A.strain_protein == ortholog_B.strain_protein:
                            interactors.append(
                                [[ortholog_A.protein, ortholog_A.ortholog_id],
                                 [ortholog_B.protein, ortholog_B.ortholog_id]])

            if metabolite is not None:
                for ortholog_B in orthologs_B:
                    interactors.append(
                        [[metabolite, metabolite.id],
                         [ortholog_B.protein, ortholog_B.ortholog_id]])

            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()
                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    interaction = Interaction(
                        strain=interactor_pair[0][0].strain,
                        interactors=[
                            interactor_pair[0][0], interactor_pair[1][0]
                        ],
                        type=(interactor_pair[0][0].type + '-' +
                              interactor_pair[1][0]),
                        ortholog_derived='fe')
                    if 'MI:' in row['Interaction detection method(s)']:
                        if is_experimental_psimi(
                                row['Interaction detection method(s)'].split(
                                    'MI:')[1][:4]):
                            interaction.is_experimental = 1
                    session.add(interaction), session.commit()

                interactor_a, interactor_b = None, None
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]

                reference = InteractionReference(
                    interaction_id=interaction.id,
                    psimi_detection=row['Interaction detection method(s)'].
                    split('MI:')[1][:4],
                    detection_method=row['Interaction detection method(s)'].
                    split('(')[1][:-1],
                    author_ln=row['Publication 1st author(s)'].split(' ')[0],
                    pub_date=row['Publication 1st author(s)'].split(
                        '(')[1][:-1],
                    pmid=row['Publication Identifier(s)'].split(
                        'pubmed:')[1].split('|')[0],
                    psimi_type=row['Interaction type(s)'].split('MI:')[1][:4],
                    interaction_type=row['Interaction type(s)'].split(
                        '(')[1][:-1],
                    psimi_db=row['Source database(s)'].split('MI:')[1][:4],
                    source_db=row['Source database(s)'].split('(')[1][:-1],
                    confidence_score=row['Confidence value(s)'],
                    interactor_a_id=interactor_a,
                    interactor_b_id=interactor_b)
                session.add(reference)

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source == 'UniProt').first()

                if source is None:
                    source = InteractionSource(interaction_id=interaction.id,
                                               data_source='UniProt')
                    session.add(source)

        session.commit()
        print(session.query(Interaction).count())
Esempio n. 9
0
def parse(session):
    # create and add sources for the interactions (do this before since they all use the same source)
    # Note: is_experimental is set to 2 because we cannot confirm that detection method was experimental or not
    source_PAO1 = InteractionSource(data_source='Galan-Vasquez(PAO1)',
                                    is_experimental=2)
    source_PA14 = InteractionSource(data_source='Galan-Vasquez(PA14)',
                                    is_experimental=2)
    session.add(source_PAO1), session.add(source_PA14), session.commit()

    with open('Data/PAO1_PA14/regulatory_network.csv') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            # a row describing an interaction may have >1 strain
            strains = row['Strain'].split(',')
            for strain in strains:
                # only care about PAO1 and PA14 strain interactions
                if (strain != 'PAO1') and (strain != 'PA14'): continue

                # search for interactor A by name
                interactor_A = session.query(Protein).filter_by(
                    name=row['Regulator'], strain=strain).first()
                # if no interactor was found by name, id listed may be a gene locus, so search by this id
                if interactor_A is None:
                    interactor_A = session.query(Interactor).get(
                        row['Regulator'])
                # if no interactor A was found for this interaction, skip to next
                if interactor_A is None: continue

                # same as A above
                interactor_B = session.query(Protein).filter_by(
                    name=row['Target'], strain=strain).first()
                if interactor_B is None:
                    interactor_B = session.query(Interactor).get(row['Target'])
                if interactor_B is None: continue

                homogenous = (interactor_A == interactor_B)
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_A),
                    Interaction.interactors.contains(interactor_B),
                    Interaction.homogenous == homogenous).first()
                # if interaction between these 2 interactors does not yet exist, create and add it
                if interaction is None:
                    interaction = Interaction(
                        strain=strain,
                        type='p-p',
                        homogenous=homogenous,
                        interactors=[interactor_A, interactor_B])
                    session.add(interaction), session.commit()

                # specify the source to be used for the interaction and reference based on strain of interaction
                source = None
                if strain == 'PAO1':
                    source = source_PAO1
                else:
                    source = source_PA14

                # add the source to the interaction source list if it isn't there already
                if source not in interaction.sources:
                    interaction.sources.append(source)

                # get source db and detections if they are present in the file
                source_db, detections = None, [None]
                if row['source_db'] != '':
                    source_db = row['source_db']
                if row['evidence'] != '':
                    del detections[0]
                    for type in row['evidence'].split(', '):
                        detections.append(type)
                # create a new reference for each detection found, add the reference to the interaction's
                # reference list, and add the source to the reference's sources
                for detection in detections:
                    reference = InteractionReference(
                        detection_method=detection,
                        pmid=row['pmid'],
                        interaction_type='TF/sigma-binding site (' +
                        row['mode'] + 'regulation)',
                        source_db=source_db,
                        comment=interactor_A.id + ' regulates(' + row['mode'] +
                        ') ' + interactor_B.id)
                    interaction.references.append(reference)
                    reference.sources.append(source)

    session.commit()
    print('regnet', session.query(Interaction).count())
Esempio n. 10
0
def parse_mpidb(session):
    with open('PAO1/PSICQUIC/MPIDB.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            interactors = []

            if (row['Taxid interactor A'].split('|')[0] != 'taxid:208964(pseae)') |\
                    (row['Taxid interactor B'].split('|')[0] != 'taxid:208964(pseae)'):
                continue

            A_id = row['#ID(s) interactor A'].split(':')[1]
            B_id = row['ID(s) interactor B'].split(':')[1]

            if session.query(Interactor).filter(
                    Interactor.id == A_id).first() is not None:
                interactors.append(
                    session.query(Interactor).filter(
                        Interactor.id == A_id).one())
            elif session.query(Protein).filter(
                    Protein.uniprotkb == A_id).first() is not None:
                interactors.append(
                    session.query(Protein).filter(
                        Protein.uniprotkb == A_id).one())

            if session.query(Interactor).filter(
                    Interactor.id == B_id).first() is not None:
                interactors.append(
                    session.query(Interactor).filter(
                        Interactor.id == B_id).one())
            elif session.query(Protein).filter(
                    Protein.uniprotkb == B_id).first() is not None:
                interactors.append(
                    session.query(Protein).filter(
                        Protein.uniprotkb == B_id).one())

            if len(interactors) != 2: continue
            homogenous = (interactors[0] == interactors[1])

            interaction = session.query(Interaction).filter(
                (Interaction.interactors.contains(interactors[0])),
                (Interaction.interactors.contains(interactors[1])),
                (Interaction.homogenous == homogenous)).first()
            if interaction is None:
                type = interactors[0].type + '-' + interactors[1].type
                interaction = Interaction(strain='PAO1',
                                          type=type,
                                          homogenous=homogenous,
                                          interactors=interactors)
                if is_experimental_psimi(
                        row['Interaction detection method(s)'].split(
                            'MI:')[1][:4]):
                    interaction.is_experimental = 1
                else:
                    interaction.is_experimental = 0
                session.add(interaction), session.commit()
            else:
                if is_experimental_psimi(
                        row['Interaction detection method(s)'].split(
                            'MI:')[1][:4]):
                    interaction.is_experimental = 1

            reference = InteractionReference(
                interaction_id=interaction.id,
                detection_method=row['Interaction detection method(s)'].split(
                    '(')[1][:-1],
                author_ln=row['Publication 1st author(s)'].split(' ')[0],
                pub_date=row['Publication 1st author(s)'].split('(')[1][:-1],
                pmid=row['Publication Identifier(s)'].split('pubmed:')[1][:8],
                confidence=row['Confidence value(s)'],
                interaction_type=row['Interaction type(s)'].split('(')[1][:-1],
                source_db=row['Source database(s)'])
            session.add(reference)

            for xref in row['Interaction identifier(s)'].split('|'):
                xref_field = xref.split(':')
                xref = session.query(InteractionXref).filter(
                    InteractionXref.accession == xref_field[1],
                    InteractionXref.interaction_id == interaction.id).first()

                if xref is None:
                    xref = InteractionXref(interaction_id=interaction.id,
                                           accession=xref_field[1],
                                           data_source=xref_field[0])
                    session.add(xref)

            source = session.query(InteractionSource).filter(
                InteractionSource.interaction_id == interaction.id,
                InteractionSource.data_source == 'MPIDB').first()

            if source is None:
                source = InteractionSource(interaction_id=interaction.id,
                                           data_source='MPIDB')
                session.add(source)
        session.commit()
        print(session.query(Interaction).count())
Esempio n. 11
0
def parse_psimi(session, file, source):
    with open(file) as csvfile:
        reader = csv.DictReader(csvfile, fieldnames=cols, delimiter='\t')

        # iterate through each interaction
        for row in reader:

            uniprot_A, refseq_A, orthologs_A, uniprot_B, refseq_B, orthologs_B = None, None, None, None, None, None
            # if one of the interactors is metabolite, save it's ids in pubchem and chebi
            pubchem, chebi = None, None
            # if one of the interactors is a metabolite, metabolite will be that metabolite and orthologs
            # will be set to the interaction's protein ortholog(s)
            metabolite_info, metabolite, orthologs = None, None, None

            # check if interactor A has uniprot or refseq id
            if 'uniprotkb' in row['interactor_A']:
                uniprot_A = row['interactor_A'].split('uniprotkb:')[1].split(
                    '|')[0]
            if 'refseq' in row['interactor_A']:
                refseq_A = row['interactor_A'].split('refseq:')[1].split(
                    '|')[0]

            # if uniprot id was found, look for orthologs matching that id
            if uniprot_A is not None:
                orthologs_A = session.query(OrthologEcoli).filter_by(
                    ortholog_uniprot=uniprot_A).all()
            # if no orthologs were found but a refseq id was found, try to find ortholog based on refseq
            if (orthologs_A is None) and (refseq_A is not None):
                orthologs_A = session.query(OrthologEcoli).filter_by(
                    ortholog_refseq=refseq_A).all()
            # if no orthologs were found for interactor A, but a uniprot or refseq does exist,
            # that means the ecoli interactor A is a protein without orthologs, so continue to next interaction
            if (orthologs_A is None) & ((uniprot_A is not None) |
                                        (refseq_A is not None)):
                continue

            # same as for interactor A above
            if 'uniprotkb' in row['interactor_B']:
                uniprot_B = row['interactor_B'].split('uniprotkb:')[1].split(
                    '|')[0]
            if 'refseq' in row['interactor_B']:
                refseq_B = row['interactor_B'].split('refseq:')[1].split(
                    '|')[0]

            if uniprot_B is not None:
                orthologs_B = session.query(OrthologEcoli).filter_by(
                    ortholog_uniprot=uniprot_B).all()
            if (orthologs_B is None) and (refseq_B is not None):
                orthologs_B = session.query(OrthologEcoli).filter_by(
                    ortholog_refseq=refseq_B).all()
            if (orthologs_B is None) & ((uniprot_B is not None) |
                                        (refseq_B is not None)):
                continue

            # if both orthologs_A and orthologs_B are None, then there are no protein interactors for this
            # interaction, so move on to the next interaction
            if (orthologs_A is None) and (orthologs_B is None): continue

            # if there were no orthologs for interactor A (and no refseq or uniprot was found),
            # search the file for pubchem or chebi ids for interactor A (as it may be a metabolite)
            if orthologs_A is None:
                if 'chebi' in row['interactor_A']:
                    chebi = row['interactor_A'].split('CHEBI:')[1].split(
                        '|')[0][:-1]
                if 'pubchem' in row['altID_A']:
                    pubchem = row['altID_A'].split('pubchem:')[1].split('|')[0]
                if (chebi is None) & ('chebi' in row['altID_A']):
                    chebi = row['altID_A'].split('CHEBI:')[1].split(
                        '|')[0][:-1]
                # if no metabolite ids were found in the interaction row, then move on to the next interaction
                # because no interactor_A was identified
                if (chebi is None) & (pubchem is None): continue
                # if a pubchem or chebi id was found, then this interaction will be a p-m interaction, so
                # set the protein interactors(orthologs) to orthologs_B
                orthologs = orthologs_B
            # other case where orthologs_B were not identified so need to check if interactor B has metabolite ids
            elif orthologs_B is None:
                if 'chebi' in row['interactor_B']:
                    chebi = row['interactor_B'].split('CHEBI:')[1].split(
                        '|')[0][:-1]
                if 'pubchem' in row['altID_B']:
                    pubchem = row['altID_B'].split('pubchem:')[1].split('|')[0]
                if (chebi is None) & ('chebi' in row['altID_B']):
                    chebi = row['altID_B'].split('CHEBI:')[1].split(
                        '|')[0][:-1]
                if (chebi is None) & (pubchem is None): continue
                orthologs = orthologs_A

            # if one of the interactors was identified to be a metabolite, search for the metabolite and set metabolite
            # variable to that value. if the metabolite doesnt exist create it
            # Note: if this point was reached, it means one of the interactors had protein orthologs,
            # so we can safely create a new metabolite knowing it will have a protein interaction partner
            if (chebi is not None) | (pubchem is not None):
                id = None
                # preferentially set id for new metabolites to be chebi
                if chebi is not None:
                    id = chebi
                    metabolite = session.query(Metabolite).filter_by(
                        chebi=chebi).first()
                # if no metabolite with chebi was found, but pubchem id exists, try to find
                # metabolite with that pubchem
                if (metabolite is None) & (pubchem is not None):
                    id = pubchem
                    metabolite = session.query(Metabolite).filter_by(
                        pubchem=pubchem).first()
                # if no metabolite was found with pubchem or chebi id, create new metabolite
                if metabolite is None:
                    metabolite = Metabolite(id=id,
                                            chebi=chebi,
                                            pubchem=pubchem)
                    session.add(metabolite)
                # if a metabolite was found, update its chebi and pubchem if it has none
                else:
                    if metabolite.pubchem is None:
                        metabolite.pubchem = pubchem
                    if metabolite.chebi is None:
                        metabolite.chebi = chebi

            # list of interactor pairs for interaction
            interactors = []
            # if no metabolite was found for interaction, it is a p-p interaction, so iterate through
            # orthologs to create interactor pairs
            if metabolite is None:
                for ortholog_A in orthologs_A:
                    for ortholog_B in orthologs_B:
                        if (ortholog_A is not None) and (ortholog_B
                                                         is not None):
                            # only add the interactor pair if the protein strains match
                            if ortholog_A.strain_protein == ortholog_B.strain_protein:
                                interactors.append([[
                                    ortholog_A.protein, ortholog_A.ortholog_id
                                ], [
                                    ortholog_B.protein, ortholog_B.ortholog_id
                                ]])
            else:
                # if a metabolite was found, add pairs of all orthologs with metabolite to interactor pairs
                for ortholog in orthologs:
                    interactors.append(
                        [[metabolite, metabolite.id],
                         [ortholog.protein, ortholog.ortholog_id]])

            # for each interactor pair, create interaction if it doesnt exist, otherwise update attributes
            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()
                if interaction is None:
                    # since one of the interactors may be a metabolite, set strain to match strain of protein
                    strain = None
                    if interactor_pair[0][0].type == 'p':
                        strain = interactor_pair[0][0].strain
                    else:
                        strain = interactor_pair[1][0].strain
                    # if interaction did not exist, set it to Ecoli ortholog derived
                    interaction = Interaction(
                        strain=strain,
                        interactors=[
                            interactor_pair[0][0], interactor_pair[1][0]
                        ],
                        type=(interactor_pair[0][0].type + '-' +
                              interactor_pair[1][0].type),
                        ortholog_derived='Ecoli')
                    session.add(interaction), session.commit()

                ref_parameter_list = get_psimi_ref_list(row)

                # in case the interaction already existed, make sure interactor_a and interactor_b variables for
                # new interaction reference match up with the first and second interactors of the existing
                # interaction (so it's easy to see which Pseudomonas interactor matches up with which Ecoli
                # ortholog)
                interactor_a, interactor_b = None, None
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]

                is_experimental = is_experimental_interaction(row)

                # check to see if source exists
                nsource = session.query(InteractionSource).filter_by(
                    data_source=source,
                    is_experimental=is_experimental).first()
                # if source doesn't exist, create and add it to the interaction's sources
                if nsource is None:
                    nsource = InteractionSource(
                        data_source=source, is_experimental=is_experimental)
                    interaction.sources.append(nsource)
                # if the source does exist, add it to the interaction's sources if it isn't already
                elif nsource not in interaction.sources:
                    interaction.sources.append(nsource)

                # go through each reference in the ref_parameter list, search for it, and if it doesnt exist create it
                for ref in ref_parameter_list:
                    nref = session.query(InteractionReference).filter_by(
                        detection_method=ref[0],
                        author_ln=ref[1],
                        pub_date=ref[2],
                        pmid=ref[3],
                        interaction_type=ref[4],
                        source_db=ref[5],
                        confidence=ref[6],
                        interactor_a=interactor_a,
                        interactor_b=interactor_b).first()
                    # if nref doesn't exist, create and add it to the interaction's reference list,
                    # and add the source to the reference's sources
                    if nref is None:
                        nref = InteractionReference(detection_method=ref[0],
                                                    author_ln=ref[1],
                                                    pub_date=ref[2],
                                                    pmid=ref[3],
                                                    interaction_type=ref[4],
                                                    source_db=ref[5],
                                                    confidence=ref[6],
                                                    interactor_a=interactor_a,
                                                    interactor_b=interactor_b)
                        interaction.references.append(nref)
                        nref.sources.append(nsource)
                    # if nref does exist, add the interaction and source to it's attributes if they aren't added
                    else:
                        if interaction not in nref.interactions:
                            nref.interactions.append(interaction)
                        if nsource not in nref.sources:
                            nref.sources.append(nsource)

    session.commit()
    print(source, session.query(Interaction).count())
Esempio n. 12
0
def parse_ecoli_bindingdb(session):
    with open('Data/Ecoli/PSICQUIC/BindingDB.txt') as csvfile:
        reader = csv.DictReader(csvfile)

        # iterate through each interaction
        for row in reader:
            uniprot_protein = None

            # check if interactor B has uniprot ID
            if 'uniprotkb' in row['ID(s) interactor B']:
                uniprot_protein = row['ID(s) interactor B'].split(
                    'uniprotkb:')[1].split('|')[0]

            if uniprot_protein is None: continue

            orthologs = []
            for ecoli_ortholog in session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == uniprot_protein).all():
                if ecoli_ortholog is not None:
                    orthologs.append(
                        [ecoli_ortholog.protein, ecoli_ortholog.ortholog_id])

            if len(orthologs) == 0: continue

            ids_metabolite = row['#ID(s) interactor A'].split('|')
            chebi_metabolite, pubchem_metabolite = None, None

            # check if interactor A has ChEBI id
            for id in ids_metabolite:
                if id.split(':')[0] == 'chebi':
                    chebi_metabolite = id.split(':')[1][1:-1]

            metabolite = None

            # if interactor A has ChEBI id, query for matching metabolite
            if chebi_metabolite is not None:
                metabolite = session.query(Metabolite).filter(
                    Metabolite.chebi == chebi_metabolite).first()

            # if unable to identify metabolite based on ChEBI id, try using pubchem id
            if metabolite is None:
                alt_ids_metabolite = row['Alt. ID(s) interactor A'].split('|')

                for id in alt_ids_metabolite:
                    if id.split(':')[0] == 'pubchem':
                        pubchem_metabolite = id.split(':')[1]

                metabolite = session.query(Metabolite).filter(
                    Metabolite.id == pubchem_metabolite).first()

            # if unable to find interactor A in database, create new metabolite
            if metabolite is None:
                metabolite = Metabolite(id=pubchem_metabolite,
                                        pubchem=pubchem_metabolite,
                                        chebi=chebi_metabolite)
                session.add(metabolite), session.commit()

            for interactor in orthologs:
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor[0]),
                    Interaction.interactors.contains(metabolite)).first()

                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    interaction = Interaction(
                        strain=interactor.strain,
                        interactors=[metabolite, interactor[0]],
                        type='p-m',
                        ortholog_derived='fe')
                    # should ortholog interactions be marked as experimental?
                    if is_experimental_psimi(
                            row['Interaction detection method(s)'].split(
                                'MI:')[1][:4]):
                        interaction.is_experimental = 1
                    session.add(interaction), session.commit()

                interactor_a, interactor_b = '', ''
                if interaction.interactors[0] == metabolite:
                    interactor_a = metabolite.id
                    interactor_b = interactor[1]
                else:
                    interactor_b = metabolite.id
                    interactor_a = interactor[1]

                author, date, pmid = None, None, None

                if row['Publication 1st author(s)'] != '-':
                    author = row['Publication 1st author(s)'].split(' ')[0]
                    date = row['Publication 1st author(s)'].split('(')[1][:-1]
                if 'pubmed:' in row['Publication Identifier(s)']:
                    pmid = row['Publication Identifier(s)'].split(
                        'pubmed:')[1][:8]

                reference = InteractionReference(
                    interaction_id=interaction.id,
                    detection_method=row['Interaction detection method(s)'].
                    split('(')[1][:-1],
                    author_ln=author,
                    pmid=pmid,
                    pub_date=date,
                    interaction_type=row['Interaction type(s)'].split(
                        '(')[1][:-1],
                    source_db=row['Source database(s)'].split('(')[1][:-1],
                    confidence=row['Confidence value(s)'].split('(')[0],
                    interactor_a=interactor_a,
                    interactor_b=interactor_b)

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source == 'BindingDB').first()

                if source is None:
                    source = InteractionSource(interaction_id=interaction.id,
                                               data_source='BindingDB')
                    session.add(source)
                session.add(reference)
        session.commit()
Esempio n. 13
0
def parse_psimi(file, strain, source, session):
    with open(file) as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t', fieldnames=cols)
        next(reader)
        for row in reader:
            uniprot_A, refseq_A, interactor_A, uniprot_B, refseq_B, interactor_B = None, None, None, None, None, None

            # check if interactor A has uniprot or refseq id, store these values
            if 'uniprotkb' in row['interactor_A']:
                uniprot_A = row['interactor_A'].split('uniprotkb:')[1].split(
                    '|')[0]
            if 'refseq' in row['interactor_A']:
                refseq_A = row['interactor_A'].split('refseq:')[1].split(
                    '|')[0]

            # if a uniprot id was found, try to find the interactor in the database
            if uniprot_A is not None:
                # check if there is a protein-complex with this uniprot id
                interactor_A = session.query(Interactor).get(uniprot_A)
                # if no protein complex, check for protein matching the uniprot id
                if interactor_A is None:
                    interactor_A = session.query(Protein).filter_by(
                        uniprotkb=uniprot_A).first()
            # if no interactor A was found but there was also a refseq id, try to find the protein based on
            # it's refseq
            if (interactor_A is None) and (refseq_A is not None):
                interactor_A = session.query(Protein).filter_by(
                    ncbi_acc=refseq_A).first()
            # if no interactor A was found, move on to next interaction
            if interactor_A is None: continue

            # same as for A above
            if 'uniprotkb' in row['interactor_B']:
                uniprot_B = row['interactor_B'].split('uniprotkb:')[1].split(
                    '|')[0]
            if 'refseq' in row['interactor_B']:
                refseq_B = row['interactor_B'].split('refseq:')[1].split(
                    '|')[0]

            if uniprot_B is not None:
                interactor_B = session.query(Interactor).get(uniprot_B)
                if interactor_B is None:
                    interactor_B = session.query(Protein).filter_by(
                        uniprotkb=uniprot_B).first()
            if (interactor_B is None) and (refseq_B is not None):
                interactor_B = session.query(Protein).filter_by(
                    ncbi_acc=refseq_B).first()

            if interactor_B is None: continue

            homogenous = (interactor_A == interactor_B)
            interaction = session.query(Interaction).filter(
                Interaction.interactors.contains(interactor_A),
                Interaction.interactors.contains(interactor_B),
                Interaction.homogenous == homogenous).first()
            # if no interaction was found with the interactors, create a new interaction
            if interaction is None:
                interaction = Interaction(
                    strain=strain,
                    type='p-p',
                    homogenous=homogenous,
                    interactors=[interactor_A, interactor_B])
                session.add(interaction), session.commit()

            ref_parameter_list = get_psimi_ref_list(row)

            is_experimental = is_experimental_interaction(row)

            # check to see if source exists
            nsource = session.query(InteractionSource).filter_by(
                data_source=source, is_experimental=is_experimental).first()
            # if source doesn't exist, create and add it to the interaction's sources
            if nsource is None:
                nsource = InteractionSource(data_source=source,
                                            is_experimental=is_experimental)
                interaction.sources.append(nsource)
            # if the source does exist, add it to the interaction's sources if it isn't already
            elif nsource not in interaction.sources:
                interaction.sources.append(nsource)

            # go through each reference in the ref_parameter list, search for it, and if it doesnt exist create it
            for ref in ref_parameter_list:
                nref = session.query(InteractionReference).filter_by(
                    detection_method=ref[0],
                    author_ln=ref[1],
                    pub_date=ref[2],
                    pmid=ref[3],
                    interaction_type=ref[4],
                    source_db=ref[5],
                    confidence=ref[6],
                    interactor_a=None,
                    interactor_b=None).first()
                # if nref doesn't exist, create and add it to the interaction's reference list,
                # and add the source to the reference's sources
                if nref is None:
                    nref = InteractionReference(detection_method=ref[0],
                                                author_ln=ref[1],
                                                pub_date=ref[2],
                                                pmid=ref[3],
                                                interaction_type=ref[4],
                                                source_db=ref[5],
                                                confidence=ref[6])
                    interaction.references.append(nref)
                    nref.sources.append(nsource)
                # if nref does exist, add the interaction and source to it's attributes if they aren't added
                else:
                    if interaction not in nref.interactions:
                        nref.interactions.append(interaction)
                    if nsource not in nref.sources:
                        nref.sources.append(nsource)

            #collect all the cross references for the interaction
            for xref in row['identifier'].split('|'):
                xref_field = xref.split(':')
                # check if the cross reference exists for this interaction, if it doesnt create it
                xref = session.query(InteractionXref).filter_by(
                    accession=xref_field[1],
                    interaction_id=interaction.id).first()

                if xref is None:
                    xref = InteractionXref(interaction_id=interaction.id,
                                           accession=xref_field[1],
                                           data_source=xref_field[0])
                    session.add(xref)

        session.commit()
    print(source, session.query(Interaction).count())
Esempio n. 14
0
def parse_ecoli_ebi_goa_nonintact(session):
    with open('Ecoli/PSICQUIC/EBI-GOA-nonIntAct.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            interactors = []
            uniprot_A, uniprot_B = None, None
            if 'uniprotkb:' in row['#ID(s) interactor A']:
                uniprot_A = row['#ID(s) interactor A'].split('uniprotkb:')[1]
            if 'uniprotkb:' in row['ID(s) interactor B']:
                uniprot_B = row['ID(s) interactor B'].split('uniprotkb:')[1]

            if (uniprot_A is None) | (uniprot_B is None): continue

            orthologs_A = session.query(OrthologEcoli).filter(
                OrthologEcoli.ortholog_uniprot == uniprot_A).all()
            orthologs_B = session.query(OrthologEcoli).filter(
                OrthologEcoli.ortholog_uniprot == uniprot_B).all()
            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    if (ortholog_A is not None) and (ortholog_B is not None):
                        if ortholog_A.strain_protein == ortholog_B.strain_protein:
                            interactors.append(
                                [[ortholog_A.protein, ortholog_A.ortholog_id],
                                 [ortholog_B.protein, ortholog_B.ortholog_id]])

            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()
                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    interaction = Interaction(
                        strain=interactor_pair[0][0].strain,
                        interactors=interactor_pair,
                        type='p-p',
                        ortholog_derived='fe')
                    if is_experimental_psimi(
                            row['Interaction detection method(s)'].split(
                                'MI:')[1][:4]):
                        interaction.is_experimental = 1
                    session.add(interaction), session.commit()

                interactor_a, interactor_b = '', ''
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]

                reference = InteractionReference(
                    interaction_id=interaction.id,
                    detection_method=row['Interaction detection method(s)'].
                    split('(')[1][:-1],
                    author_ln=row['Publication 1st author(s)'].split(' ')[0],
                    pub_date=row['Publication 1st author(s)'].split('(')[1],
                    pmid=row['Publication Identifier(s)'].split('pubmed:')[1],
                    interaction_type=row['Interaction type(s)'].split(
                        '(')[1][:-1],
                    source_db=row['Source database(s)'].split('(')[1][:-1],
                    interactor_a_id=row['#ID(s) interactor A'].split(':')[1],
                    interactor_b_id=row['ID(s) interactor B'].split(':')[1])
                session.add(reference)

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source ==
                    'EBI-GOA non-IntAct').first()

                if source is None:
                    source = InteractionSource(
                        interaction_id=interaction.id,
                        data_source='EBI-GOA non-IntAct')
                    session.add(source)
        session.commit()
        print(session.query(Interaction).count())
Esempio n. 15
0
def parse_irefindex(file, strain, taxid, session):
    with open(file) as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            interactors = []

            if ((row['Taxid interactor A'].split('|')[0] != taxid) |
                    (row['Taxid interactor B'].split('|')[0] != taxid)): continue

            A_id = row['#ID(s) interactor A'].split(':')
            B_id = row['ID(s) interactor B'].split(':')

            if A_id[0] == 'uniprotkb':
                if session.query(Interactor).filter(Interactor.id == A_id[1]).first() is not None:
                    interactors.append(session.query(Interactor).filter(Interactor.id == A_id[1]).one())
                elif session.query(Protein).filter(Protein.uniprotkb == A_id[1]).first() is not None:
                    interactors.append(session.query(Protein).filter(Protein.uniprotkb == A_id[1]).one())
            elif A_id[0] == 'refseq':
                if session.query(Protein).filter(Protein.ncbi_acc == A_id[1]).first() is not None:
                    interactors.append(session.query(Protein).filter(Protein.ncbi_acc == A_id[1]).one())

            if B_id[0] == 'uniprotkb':
                if session.query(Interactor).filter(Interactor.id == B_id[1]).first() is not None:
                    interactors.append(session.query(Interactor).filter(Interactor.id == B_id[1]).one())
                elif session.query(Protein).filter(Protein.uniprotkb == B_id[1]).first() is not None:
                    interactors.append(session.query(Protein).filter(Protein.uniprotkb == B_id[1]).one())
            elif B_id[0] == 'refseq':
                if session.query(Protein).filter(Protein.ncbi_acc == B_id[1]).first() is not None:
                    interactors.append(session.query(Protein).filter(Protein.ncbi_acc == B_id[1]).one())

            if len(interactors) != 2: continue
            homogenous = (interactors[0] == interactors[1])

            interaction = session.query(Interaction).filter((Interaction.interactors.contains(interactors[0])),
                                                            (Interaction.interactors.contains(interactors[1])),
                                                            (Interaction.homogenous == homogenous)).first()
            if interaction is None:
                type = interactors[0].type + '-' + interactors[1].type
                interaction = Interaction(strain=strain, type=type, homogenous=homogenous, interactors=interactors)
                if row['Interaction detection method(s)'] != '-':
                    if is_experimental_psimi(row['Interaction detection method(s)'].split('MI:')[1][:4]):
                        interaction.is_experimental = 1
                    else:
                        interaction.is_experimental = 0
            else:
                if is_experimental_psimi(row['Interaction detection method(s)'].split('MI:')[1][:4]):
                    interaction.is_experimental = 1
                elif (row['Interaction detection method(s)'] == '-') and (interaction.is_experimental == 0):
                    interaction.is_experimental = None

            author, date, type= None, None, None
            pmids, detections = [None], [None]
            if row['Interaction detection method(s)'] != '-':
                del detections[0]
                for method in row['Interaction detection method(s)'].split('|'):
                    detections.append(method.split('(')[1][:-1])
            if (row['Interaction type(s)'] != '-'):
                type = row['Interaction type(s)'].split('(')[1][:-1]
            if (row['Publication 1st author(s)'] != '-'):
                author = row['Publication 1st author(s)'].split('-')[0][0].upper() + \
                         row['Publication 1st author(s)'].split('-')[0][1:]
                date = row['Publication 1st author(s)'].split('-')[1]
            if (row['Publication Identifier(s)'] != '-'):
                del pmids[0]
                for pmid in row['Publication Identifier(s)'].split('|'):
                    pmids.append(pmid.split('pubmed:')[1][:8])

            for pmid in pmids:
                for detection in detections:
                    reference = InteractionReference(interaction_id=interaction.id,
                                                     detection_method=detection,
                                                     author_ln=author,
                                                     pub_date=date,
                                                     pmid=pmid,
                                                     interaction_type=type,
                                                     source_db=row['Source database(s)'].split('(')[1][:-1],
                                                     confidence_score=row['Confidence value(s)'])
                    session.add(reference)

            for xref in row['Interaction identifier(s)'].split('|'):
                xref_field = xref.split(':')
                xref = session.query(InteractionXref).filter(InteractionXref.accession == xref_field[1],
                                                             InteractionXref.interaction_id == interaction.id).first()

                if xref is None:
                    xref = InteractionXref(interaction_id=interaction.id, accession=xref_field[1],
                                           data_source=xref_field[0])
                    session.add(xref)

            source = session.query(InteractionSource).filter(InteractionSource.interaction_id == interaction.id,
                                                             InteractionSource.data_source == 'iRefIndex').first()

            if source is None:
                source = InteractionSource(interaction_id=interaction.id, data_source='iRefIndex')
                session.add(source)
        print(session.query(Interaction).count())
Esempio n. 16
0
def parse_ecoli_dip(session):
    with open('Ecoli/DIP.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            interactors = []

            ids_A = row['ID interactor A'].split('|')
            ids_B = row['ID interactor B'].split('|')
            refseq_A, uniprotkb_A, refseq_B, uniprotkb_B = '', '', '', ''
            for id in ids_A:
                fields = id.split(':')
                if fields[0] == 'refseq':
                    refseq_A = fields[1]
                elif fields[0] == 'uniprotkb':
                    uniprotkb_A = fields[1]
            for id in ids_B:
                fields = id.split(':')
                if fields[0] == 'refseq':
                    refseq_B = fields[1]
                elif fields[0] == 'uniprotkb':
                    uniprotkb_B = fields[1]

            orthologs_A, orthologs_B = [], []
            if uniprotkb_A != '':
                orthologs_A = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == uniprotkb_A).all()
            if (len(orthologs_A) == 0) & (refseq_A != ''):
                orthologs_A = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_refseq == refseq_A).all()
            if uniprotkb_B != '':
                orthologs_B = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == uniprotkb_B).all()
            if (len(orthologs_B) == 0) & (refseq_B != ''):
                orthologs_B = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_refseq == refseq_B).all()

            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    if (ortholog_A is not None) and (ortholog_B is not None):
                        if ortholog_A.strain_protein == ortholog_B.strain_protein:
                            interactors.append(
                                [[ortholog_A.protein, ortholog_A.ortholog_id],
                                 [ortholog_B.protein, ortholog_B.ortholog_id]])

            for interactor_pair in interactors:
                is_new = 0
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()
                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    is_new = 1
                    interaction = Interaction(
                        strain=interactor_pair[0][0].strain,
                        interactors=[
                            interactor_pair[0][0], interactor_pair[1][0]
                        ],
                        type='p-p',
                        ortholog_derived='fe')
                    session.add(interaction), session.commit()

                detections, pmids, types, list = [], [], [], []
                if row['Interaction detection method(s)'] != '-':
                    detections = row['Interaction detection method(s)'].split(
                        '|')
                    list.append(detections)
                if row['Publication Identifier(s)'] != '-':
                    pmids = row['Publication Identifier(s)'].split('|')
                    list.append(pmids)
                if row['Interaction type(s)'] != '-':
                    types = row['Interaction type(s)'].split('|')
                    list.append(types)

                interactor_a, interactor_b = '', ''
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]

                for num in range(0, len(list[0])):
                    type = types[num].split('(')[1][:-1]
                    pmid = pmids[num * 2].split('pubmed:')[1]
                    detection = detections[num].split('(')[1][:-1]
                    # there are more than one pmid sometimes
                    reference = InteractionReference(
                        interaction_id=interaction.id,
                        detection_method=detection,
                        pmid=pmid,
                        source_db=row['Source database(s)'].split('(')[1][:-1],
                        interactor_a=interactor_a,
                        interactor_b=interactor_b)
                    session.add(reference)

                    if is_new:
                        if interaction.is_experimental is None:
                            if is_experimental_psimi(
                                    row['Interaction detection method(s)'].
                                    split('MI:')[1][:4]):
                                interaction.is_experimental = 1
                            else:
                                interaction.is_experimental = 0
                        elif is_experimental_psimi(
                                row['Interaction detection method(s)'].split(
                                    'MI:')[1][:4]):
                            interaction.is_experimental = 1

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source == 'DIP').first()

                if source is None:
                    source = InteractionSource(interaction_id=interaction.id,
                                               data_source='DIP')
                    session.add(source)
        session.commit()
        print(session.query(Interaction).count())
Esempio n. 17
0
def parse_mentha(file, strain, taxid, session):
    with open(file) as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            interactors = []

            if ((row['Taxid interactor A'].split('|')[0] != taxid) |
                    (row['Taxid interactor B'].split('|')[0] != taxid)): continue

            A_id = row['#ID(s) interactor A'].split(':')[1]
            B_id = row['ID(s) interactor B'].split(':')[1]

            if session.query(Interactor).filter(Interactor.id == A_id).first() is not None:
                interactors.append(session.query(Interactor).filter(Interactor.id == A_id).one())
            elif session.query(Protein).filter(Protein.uniprotkb == A_id).first() is not None:
                interactors.append(session.query(Protein).filter(Protein.uniprotkb == A_id).one())

            if session.query(Interactor).filter(Interactor.id == B_id).first() is not None:
                interactors.append(session.query(Interactor).filter(Interactor.id == B_id).one())
            elif session.query(Protein).filter(Protein.uniprotkb == B_id).first() is not None:
                interactors.append(session.query(Protein).filter(Protein.uniprotkb == B_id).one())

            if len(interactors) != 2: continue
            homogenous = (interactors[0] == interactors[1])

            interaction = session.query(Interaction).filter(Interaction.interactors.contains(interactors[0]),
                                                            Interaction.interactors.contains(interactors[1]),
                                                            Interaction.homogenous == homogenous).first()
            if interaction is None:
                type=(interactors[0].type + '-' + interactors[1].type)
                interaction = Interaction(strain=strain, type=type, homogenous=homogenous, interactors=interactors)
                if is_experimental_psimi(row['Interaction detection method(s)'].split('MI:')[1][:4]):
                    interaction.is_experimental = 1
                else:
                    interaction.is_experimental = 0
                session.add(interaction), session.commit()
            else:
                if is_experimental_psimi(row['Interaction detection method(s)'].split('MI:')[1][:4]):
                    interaction.is_experimental = 1

            reference = InteractionReference(interaction_id=interaction.id,
                                             detection_method=row['Interaction detection method(s)'].split('(')[1][:-1],
                                             pmid=row['Publication Identifier(s)'].split('pubmed:')[1][:8],
                                             interaction_type=row['Interaction type(s)'].split('(')[1][:-1],
                                             source_db=row['Source database(s)'].split('(')[1][:-1],
                                             confidence_score=row['Confidence value(s)'])
            session.add(reference)

            xref_field = row['Interaction identifier(s)'].split(':')
            xref = session.query(InteractionXref).filter(InteractionXref.accession == xref_field[1],
                                                         InteractionXref.interaction_id == interaction.id).first()

            if xref is None:
                xref = InteractionXref(interaction_id=interaction.id, accession=xref_field[1],
                                       data_source=xref_field[0])
                session.add(xref)

            source = session.query(InteractionSource).filter(InteractionSource.interaction_id == interaction.id,
                                                             InteractionSource.data_source == 'mentha').first()

            if source is None:
                source = InteractionSource(interaction_id=interaction.id, data_source='mentha')
                session.add(source)
        print(session.query(Interaction).count())
Esempio n. 18
0
def parse_ecoli_mentha(session):
    with open('Ecoli/PSICQUIC/mentha.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            if (row['#ID(s) interactor A'] == '-') | (row['ID(s) interactor B']
                                                      == '-'):
                continue
            interactors = []
            orthologs_A = session.query(OrthologEcoli).filter(
                OrthologEcoli.ortholog_uniprot ==
                row['#ID(s) interactor A'].split(':')[1]).all()
            orthologs_B = session.query(OrthologEcoli).filter(
                OrthologEcoli.ortholog_uniprot ==
                row['ID(s) interactor B'].split(':')[1]).all()

            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    if (ortholog_A is not None) and (ortholog_B is not None):
                        if ortholog_A.strain_protein == ortholog_B.strain_protein:
                            interactors.append(
                                [[ortholog_A.protein, ortholog_A.ortholog_id],
                                 [ortholog_B.protein, ortholog_B.ortholog_id]])

            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()
                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    interaction = Interaction(
                        strain=interactor_pair[0][0].strain,
                        interactors=[
                            interactor_pair[0][0], interactor_pair[1][0]
                        ],
                        type=(interactor_pair[0][0].type + '-' +
                              interactor_pair[1][0]),
                        ortholog_derived='fe')
                    #ask about marking ecoli ortholog interactions as experimental!!
                    if 'MI:' in row['Interaction detection method(s)']:
                        #iterate through all methods
                        if is_experimental_psimi(
                                row['Interaction detection method(s)'].split(
                                    'MI:')[1][:4]):
                            interaction.is_experimental = 1
                    session.add(interaction), session.commit()

                interactor_a, interactor_b = '', ''
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]
                reference = InteractionReference(
                    interaction_id=interaction.id,
                    psimi_detection=row['Interaction detection method(s)'].
                    split('MI:')[1][:4],
                    detection_method=row['Interaction detection method(s)'].
                    split('(')[1][:-1],
                    pmid=row['Publication Identifier(s)'].split('pubmed:')[1],
                    psimi_type=row['Interaction type(s)'].split('MI:')[1][:4],
                    interaction_type=row['Interaction type(s)'].split(
                        '(')[1][:-1],
                    psimi_db=row['Source database(s)'].split('MI:')[1][:4],
                    source_db=row['Source database(s)'].split('(')[1][:-1],
                    confidence_score=row['Confidence value(s)'])
                session.add(reference)

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source == 'mentha').first()

                if source is None:
                    source = InteractionSource(interaction_id=interaction.id,
                                               data_source='mentha')
                    session.add(source)

        session.commit()
        print(session.query(Interaction).count())
Esempio n. 19
0
def parse_ecoli_irefindex(session):
    with open('Ecoli/PSICQUIC/iRefIndex.txt') as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        for row in reader:
            if (row['#ID(s) interactor A'] == '-') | (row['ID(s) interactor B']
                                                      == '-'):
                continue
            interactors = []

            orthologs_A = []
            id_A = row['#ID(s) interactor A'].split(':')
            if id_A[0] == 'uniprotkb':
                orthologs_A = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == id_A[1]).all()
            elif id_A[0] == 'refseq':
                orthologs_A = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_refseq == id_A[1]).all()

            if len(orthologs_A) == 0: continue

            orthologs_B = []
            id_B = row['ID(s) interactor B'].split(':')
            if id_B[0] == 'uniprotkb':
                orthologs_B = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_uniprot == id_B[1]).all()
            elif id_B[0] == 'refseq':
                orthologs_B = session.query(OrthologEcoli).filter(
                    OrthologEcoli.ortholog_refseq == id_B[1]).all()

            for ortholog_A in orthologs_A:
                for ortholog_B in orthologs_B:
                    if (ortholog_A is not None) and (ortholog_B is not None):
                        if ortholog_A.strain_protein == ortholog_B.strain_protein:
                            interactors.append(
                                [[ortholog_A.protein, ortholog_A.ortholog_id],
                                 [ortholog_B.protein, ortholog_B.ortholog_id]])

            for interactor_pair in interactors:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(
                    Interaction.interactors.contains(interactor_pair[0][0]),
                    Interaction.interactors.contains(interactor_pair[1][0]),
                    Interaction.homogenous == homogenous).first()
                if interaction is not None:
                    if interaction.ortholog_derived is None:
                        interaction.ortholog_derived = 'cfe'
                    elif 'fe' not in interaction.ortholog_derived:
                        interaction.ortholog_derived += ', cfe'
                    session.commit()
                else:
                    interaction = Interaction(
                        strain=interactor_pair[0][0].strain,
                        interactors=[
                            interactor_pair[0][0], interactor_pair[1][0]
                        ],
                        type=(interactor_pair[0][0].type + '-' +
                              interactor_pair[1][0]),
                        ortholog_derived='fe')
                    if 'MI:' in row['Interaction detection method(s)']:
                        #iterate through all methods
                        if is_experimental_psimi(
                                row['Interaction detection method(s)'].split(
                                    'MI:')[1][:4]):
                            interaction.is_experimental = 1
                    session.add(interaction), session.commit()

                interactor_a, interactor_b = '', ''
                if interaction.interactors[0] == interactor_pair[0][0]:
                    interactor_a = interactor_pair[0][1]
                    interactor_b = interactor_pair[1][1]
                else:
                    interactor_b = interactor_pair[0][1]
                    interactor_a = interactor_pair[1][1]

                author, date, psimi_type, type = None, None, None, None
                confidences, psimi_detections, detections, pmids = [None], [
                    None
                ], [None], [None]
                if row['Publication 1st author(s)'] != '-':
                    author = row['Publication 1st author(s)'].split(' ')[0]
                    date = row['Publication 1st author(s)'].split('(')[1][:-1]
                if row['Interaction type(s)'] != '-':
                    type = row['Interaction type(s)'].split('(')[1][:-1]
                    if 'MI' in row['Interaction type(s)']:
                        psimi_type = row['Interaction type(s)'].split(
                            'MI:')[1][:4]
                if row['Publication Identifier(s)'] != '-':
                    del pmids[0]
                    for pmid in row['Publication Identifier(s)'].split('|'):
                        pmids.append(pmid.split(':')[1])
                if row['Interaction detection method(s)'] != '-':
                    del detections[0]
                    del psimi_detections[0]
                    for detection in row['Publication Identifier(s)'].split(
                            '|'):
                        detections.append(detection.split('(')[1][:-1])
                        psimi_detections.append(detection.split('MI:')[1][:4])

                for pmid in pmids:
                    for confidence in confidences:
                        for (detection,
                             psimi_detection) in zip(detections,
                                                     psimi_detections):
                            reference = InteractionReference(
                                interaction_id=interaction.id,
                                psimi_detection=psimi_detection,
                                detection_method=detection,
                                author_ln=author,
                                date=date,
                                psimi_type=psimi_type,
                                interaction_type=type,
                                psimi_db=row['Source database(s)'].split(
                                    'MI')[1][:4],
                                source_db=row['Source database(s)'].split(
                                    '(')[1][:-1],
                                confidence=confidence,
                                interactor_a=interactor_a,
                                interactor_b=interactor_b)
                            session.add(reference)

                source = session.query(InteractionSource).filter(
                    InteractionSource.interaction_id == interaction.id,
                    InteractionSource.data_source == 'iRefIndex').first()

                if source is None:
                    source = InteractionSource(interaction_id=interaction.id,
                                               data_source='iRefIndex')
                    session.add(source)
        session.commit()
        print(session.query(Interaction).count())