def parse_ecoli_mentha(session): with open('Ecoli/PSICQUIC/mentha.txt') as csvfile: reader = csv.DictReader(csvfile, delimiter='\t') for row in reader: if (row['#ID(s) interactor A'] == '-') | (row['ID(s) interactor B'] == '-'): continue interactors = [] orthologs_A = session.query(OrthologEcoli).filter( OrthologEcoli.ortholog_uniprot == row['#ID(s) interactor A'].split(':')[1]).all() orthologs_B = session.query(OrthologEcoli).filter( OrthologEcoli.ortholog_uniprot == row['ID(s) interactor B'].split(':')[1]).all() for ortholog_A in orthologs_A: for ortholog_B in orthologs_B: if (ortholog_A is not None) and (ortholog_B is not None): if ortholog_A.strain_protein == ortholog_B.strain_protein: interactors.append( [[ortholog_A.protein, ortholog_A.ortholog_id], [ortholog_B.protein, ortholog_B.ortholog_id]]) for interactor_pair in interactors: homogenous = (interactor_pair[0][0] == interactor_pair[1][0]) interaction = session.query(Interaction).filter( Interaction.interactors.contains(interactor_pair[0][0]), Interaction.interactors.contains(interactor_pair[1][0]), Interaction.homogenous == homogenous).first() if interaction is not None: if interaction.ortholog_derived is None: interaction.ortholog_derived = 'cfe' elif 'fe' not in interaction.ortholog_derived: interaction.ortholog_derived += ', cfe' session.commit() else: interaction = Interaction( strain=interactor_pair[0][0].strain, interactors=[ interactor_pair[0][0], interactor_pair[1][0] ], type=(interactor_pair[0][0].type + '-' + interactor_pair[1][0]), ortholog_derived='fe') #ask about marking ecoli ortholog interactions as experimental!! if 'MI:' in row['Interaction detection method(s)']: #iterate through all methods if is_experimental_psimi( row['Interaction detection method(s)'].split( 'MI:')[1][:4]): interaction.is_experimental = 1 session.add(interaction), session.commit() interactor_a, interactor_b = '', '' if interaction.interactors[0] == interactor_pair[0][0]: interactor_a = interactor_pair[0][1] interactor_b = interactor_pair[1][1] else: interactor_b = interactor_pair[0][1] interactor_a = interactor_pair[1][1] reference = InteractionReference( interaction_id=interaction.id, psimi_detection=row['Interaction detection method(s)']. split('MI:')[1][:4], detection_method=row['Interaction detection method(s)']. split('(')[1][:-1], pmid=row['Publication Identifier(s)'].split('pubmed:')[1], psimi_type=row['Interaction type(s)'].split('MI:')[1][:4], interaction_type=row['Interaction type(s)'].split( '(')[1][:-1], psimi_db=row['Source database(s)'].split('MI:')[1][:4], source_db=row['Source database(s)'].split('(')[1][:-1], confidence_score=row['Confidence value(s)']) session.add(reference) source = session.query(InteractionSource).filter( InteractionSource.interaction_id == interaction.id, InteractionSource.data_source == 'mentha').first() if source is None: source = InteractionSource(interaction_id=interaction.id, data_source='mentha') session.add(source) session.commit() print(session.query(Interaction).count())
def parse_ecoli_uniprot(session): with open('Ecoli/PSICQUIC/UniProt.txt') as csvfile: reader = csv.DictReader(csvfile, delimiter='\t') for row in reader: interactors = [] orthologs_B = [] id_B = row['ID(s) interactor B'].split(':') if id_B[0] == 'uniprotkb': orthologs_B = session.query(OrthologEcoli).filter( OrthologEcoli.ortholog_uniprot == id_B[1]).all() if len(orthologs_B) == 0: continue orthologs_A = [] metabolite = None id_A = row['#ID(s) interactor A'].split(':') if id_A[0] == 'uniprotkb': orthologs_A = session.query(OrthologEcoli).filter( OrthologEcoli.ortholog_uniprot == id_A[1]).all() elif id_A[0] == 'chebi': metabolite = session.query(Metabolite).filter( Metabolite.chebi == id_A[1]).first() if metabolite is None: metabolite = Metabolite(id=id_A[1], chebi=id_A[1]) session.add(metabolite), session.commit() for ortholog_A in orthologs_A: for ortholog_B in orthologs_B: if (ortholog_A is not None) and (ortholog_B is not None): if ortholog_A.strain_protein == ortholog_B.strain_protein: interactors.append( [[ortholog_A.protein, ortholog_A.ortholog_id], [ortholog_B.protein, ortholog_B.ortholog_id]]) if metabolite is not None: for ortholog_B in orthologs_B: interactors.append( [[metabolite, metabolite.id], [ortholog_B.protein, ortholog_B.ortholog_id]]) for interactor_pair in interactors: homogenous = (interactor_pair[0][0] == interactor_pair[1][0]) interaction = session.query(Interaction).filter( Interaction.interactors.contains(interactor_pair[0][0]), Interaction.interactors.contains(interactor_pair[1][0]), Interaction.homogenous == homogenous).first() if interaction is not None: if interaction.ortholog_derived is None: interaction.ortholog_derived = 'cfe' elif 'fe' not in interaction.ortholog_derived: interaction.ortholog_derived += ', cfe' session.commit() else: interaction = Interaction( strain=interactor_pair[0][0].strain, interactors=[ interactor_pair[0][0], interactor_pair[1][0] ], type=(interactor_pair[0][0].type + '-' + interactor_pair[1][0]), ortholog_derived='fe') if 'MI:' in row['Interaction detection method(s)']: if is_experimental_psimi( row['Interaction detection method(s)'].split( 'MI:')[1][:4]): interaction.is_experimental = 1 session.add(interaction), session.commit() interactor_a, interactor_b = None, None if interaction.interactors[0] == interactor_pair[0][0]: interactor_a = interactor_pair[0][1] interactor_b = interactor_pair[1][1] else: interactor_b = interactor_pair[0][1] interactor_a = interactor_pair[1][1] reference = InteractionReference( interaction_id=interaction.id, psimi_detection=row['Interaction detection method(s)']. split('MI:')[1][:4], detection_method=row['Interaction detection method(s)']. split('(')[1][:-1], author_ln=row['Publication 1st author(s)'].split(' ')[0], pub_date=row['Publication 1st author(s)'].split( '(')[1][:-1], pmid=row['Publication Identifier(s)'].split( 'pubmed:')[1].split('|')[0], psimi_type=row['Interaction type(s)'].split('MI:')[1][:4], interaction_type=row['Interaction type(s)'].split( '(')[1][:-1], psimi_db=row['Source database(s)'].split('MI:')[1][:4], source_db=row['Source database(s)'].split('(')[1][:-1], confidence_score=row['Confidence value(s)'], interactor_a_id=interactor_a, interactor_b_id=interactor_b) session.add(reference) source = session.query(InteractionSource).filter( InteractionSource.interaction_id == interaction.id, InteractionSource.data_source == 'UniProt').first() if source is None: source = InteractionSource(interaction_id=interaction.id, data_source='UniProt') session.add(source) session.commit() print(session.query(Interaction).count())
def parse_ecoli_dip(session): with open('Ecoli/DIP.txt') as csvfile: reader = csv.DictReader(csvfile, delimiter='\t') for row in reader: interactors = [] ids_A = row['ID interactor A'].split('|') ids_B = row['ID interactor B'].split('|') refseq_A, uniprotkb_A, refseq_B, uniprotkb_B = '', '', '', '' for id in ids_A: fields = id.split(':') if fields[0] == 'refseq': refseq_A = fields[1] elif fields[0] == 'uniprotkb': uniprotkb_A = fields[1] for id in ids_B: fields = id.split(':') if fields[0] == 'refseq': refseq_B = fields[1] elif fields[0] == 'uniprotkb': uniprotkb_B = fields[1] orthologs_A, orthologs_B = [], [] if uniprotkb_A != '': orthologs_A = session.query(OrthologEcoli).filter( OrthologEcoli.ortholog_uniprot == uniprotkb_A).all() if (len(orthologs_A) == 0) & (refseq_A != ''): orthologs_A = session.query(OrthologEcoli).filter( OrthologEcoli.ortholog_refseq == refseq_A).all() if uniprotkb_B != '': orthologs_B = session.query(OrthologEcoli).filter( OrthologEcoli.ortholog_uniprot == uniprotkb_B).all() if (len(orthologs_B) == 0) & (refseq_B != ''): orthologs_B = session.query(OrthologEcoli).filter( OrthologEcoli.ortholog_refseq == refseq_B).all() for ortholog_A in orthologs_A: for ortholog_B in orthologs_B: if (ortholog_A is not None) and (ortholog_B is not None): if ortholog_A.strain_protein == ortholog_B.strain_protein: interactors.append( [[ortholog_A.protein, ortholog_A.ortholog_id], [ortholog_B.protein, ortholog_B.ortholog_id]]) for interactor_pair in interactors: is_new = 0 homogenous = (interactor_pair[0][0] == interactor_pair[1][0]) interaction = session.query(Interaction).filter( Interaction.interactors.contains(interactor_pair[0][0]), Interaction.interactors.contains(interactor_pair[1][0]), Interaction.homogenous == homogenous).first() if interaction is not None: if interaction.ortholog_derived is None: interaction.ortholog_derived = 'cfe' elif 'fe' not in interaction.ortholog_derived: interaction.ortholog_derived += ', cfe' session.commit() else: is_new = 1 interaction = Interaction( strain=interactor_pair[0][0].strain, interactors=[ interactor_pair[0][0], interactor_pair[1][0] ], type='p-p', ortholog_derived='fe') session.add(interaction), session.commit() detections, pmids, types, list = [], [], [], [] if row['Interaction detection method(s)'] != '-': detections = row['Interaction detection method(s)'].split( '|') list.append(detections) if row['Publication Identifier(s)'] != '-': pmids = row['Publication Identifier(s)'].split('|') list.append(pmids) if row['Interaction type(s)'] != '-': types = row['Interaction type(s)'].split('|') list.append(types) interactor_a, interactor_b = '', '' if interaction.interactors[0] == interactor_pair[0][0]: interactor_a = interactor_pair[0][1] interactor_b = interactor_pair[1][1] else: interactor_b = interactor_pair[0][1] interactor_a = interactor_pair[1][1] for num in range(0, len(list[0])): type = types[num].split('(')[1][:-1] pmid = pmids[num * 2].split('pubmed:')[1] detection = detections[num].split('(')[1][:-1] # there are more than one pmid sometimes reference = InteractionReference( interaction_id=interaction.id, detection_method=detection, pmid=pmid, source_db=row['Source database(s)'].split('(')[1][:-1], interactor_a=interactor_a, interactor_b=interactor_b) session.add(reference) if is_new: if interaction.is_experimental is None: if is_experimental_psimi( row['Interaction detection method(s)']. split('MI:')[1][:4]): interaction.is_experimental = 1 else: interaction.is_experimental = 0 elif is_experimental_psimi( row['Interaction detection method(s)'].split( 'MI:')[1][:4]): interaction.is_experimental = 1 source = session.query(InteractionSource).filter( InteractionSource.interaction_id == interaction.id, InteractionSource.data_source == 'DIP').first() if source is None: source = InteractionSource(interaction_id=interaction.id, data_source='DIP') session.add(source) session.commit() print(session.query(Interaction).count())
def parse_mpidb(session): with open('PAO1/PSICQUIC/MPIDB.txt') as csvfile: reader = csv.DictReader(csvfile, delimiter='\t') for row in reader: interactors = [] if (row['Taxid interactor A'].split('|')[0] != 'taxid:208964(pseae)') |\ (row['Taxid interactor B'].split('|')[0] != 'taxid:208964(pseae)'): continue A_id = row['#ID(s) interactor A'].split(':')[1] B_id = row['ID(s) interactor B'].split(':')[1] if session.query(Interactor).filter( Interactor.id == A_id).first() is not None: interactors.append( session.query(Interactor).filter( Interactor.id == A_id).one()) elif session.query(Protein).filter( Protein.uniprotkb == A_id).first() is not None: interactors.append( session.query(Protein).filter( Protein.uniprotkb == A_id).one()) if session.query(Interactor).filter( Interactor.id == B_id).first() is not None: interactors.append( session.query(Interactor).filter( Interactor.id == B_id).one()) elif session.query(Protein).filter( Protein.uniprotkb == B_id).first() is not None: interactors.append( session.query(Protein).filter( Protein.uniprotkb == B_id).one()) if len(interactors) != 2: continue homogenous = (interactors[0] == interactors[1]) interaction = session.query(Interaction).filter( (Interaction.interactors.contains(interactors[0])), (Interaction.interactors.contains(interactors[1])), (Interaction.homogenous == homogenous)).first() if interaction is None: type = interactors[0].type + '-' + interactors[1].type interaction = Interaction(strain='PAO1', type=type, homogenous=homogenous, interactors=interactors) if is_experimental_psimi( row['Interaction detection method(s)'].split( 'MI:')[1][:4]): interaction.is_experimental = 1 else: interaction.is_experimental = 0 session.add(interaction), session.commit() else: if is_experimental_psimi( row['Interaction detection method(s)'].split( 'MI:')[1][:4]): interaction.is_experimental = 1 reference = InteractionReference( interaction_id=interaction.id, detection_method=row['Interaction detection method(s)'].split( '(')[1][:-1], author_ln=row['Publication 1st author(s)'].split(' ')[0], pub_date=row['Publication 1st author(s)'].split('(')[1][:-1], pmid=row['Publication Identifier(s)'].split('pubmed:')[1][:8], confidence=row['Confidence value(s)'], interaction_type=row['Interaction type(s)'].split('(')[1][:-1], source_db=row['Source database(s)']) session.add(reference) for xref in row['Interaction identifier(s)'].split('|'): xref_field = xref.split(':') xref = session.query(InteractionXref).filter( InteractionXref.accession == xref_field[1], InteractionXref.interaction_id == interaction.id).first() if xref is None: xref = InteractionXref(interaction_id=interaction.id, accession=xref_field[1], data_source=xref_field[0]) session.add(xref) source = session.query(InteractionSource).filter( InteractionSource.interaction_id == interaction.id, InteractionSource.data_source == 'MPIDB').first() if source is None: source = InteractionSource(interaction_id=interaction.id, data_source='MPIDB') session.add(source) session.commit() print(session.query(Interaction).count())
def parse_mentha(file, strain, taxid, session): with open(file) as csvfile: reader = csv.DictReader(csvfile, delimiter='\t') for row in reader: interactors = [] if ((row['Taxid interactor A'].split('|')[0] != taxid) | (row['Taxid interactor B'].split('|')[0] != taxid)): continue A_id = row['#ID(s) interactor A'].split(':')[1] B_id = row['ID(s) interactor B'].split(':')[1] if session.query(Interactor).filter(Interactor.id == A_id).first() is not None: interactors.append(session.query(Interactor).filter(Interactor.id == A_id).one()) elif session.query(Protein).filter(Protein.uniprotkb == A_id).first() is not None: interactors.append(session.query(Protein).filter(Protein.uniprotkb == A_id).one()) if session.query(Interactor).filter(Interactor.id == B_id).first() is not None: interactors.append(session.query(Interactor).filter(Interactor.id == B_id).one()) elif session.query(Protein).filter(Protein.uniprotkb == B_id).first() is not None: interactors.append(session.query(Protein).filter(Protein.uniprotkb == B_id).one()) if len(interactors) != 2: continue homogenous = (interactors[0] == interactors[1]) interaction = session.query(Interaction).filter(Interaction.interactors.contains(interactors[0]), Interaction.interactors.contains(interactors[1]), Interaction.homogenous == homogenous).first() if interaction is None: type=(interactors[0].type + '-' + interactors[1].type) interaction = Interaction(strain=strain, type=type, homogenous=homogenous, interactors=interactors) if is_experimental_psimi(row['Interaction detection method(s)'].split('MI:')[1][:4]): interaction.is_experimental = 1 else: interaction.is_experimental = 0 session.add(interaction), session.commit() else: if is_experimental_psimi(row['Interaction detection method(s)'].split('MI:')[1][:4]): interaction.is_experimental = 1 reference = InteractionReference(interaction_id=interaction.id, detection_method=row['Interaction detection method(s)'].split('(')[1][:-1], pmid=row['Publication Identifier(s)'].split('pubmed:')[1][:8], interaction_type=row['Interaction type(s)'].split('(')[1][:-1], source_db=row['Source database(s)'].split('(')[1][:-1], confidence_score=row['Confidence value(s)']) session.add(reference) xref_field = row['Interaction identifier(s)'].split(':') xref = session.query(InteractionXref).filter(InteractionXref.accession == xref_field[1], InteractionXref.interaction_id == interaction.id).first() if xref is None: xref = InteractionXref(interaction_id=interaction.id, accession=xref_field[1], data_source=xref_field[0]) session.add(xref) source = session.query(InteractionSource).filter(InteractionSource.interaction_id == interaction.id, InteractionSource.data_source == 'mentha').first() if source is None: source = InteractionSource(interaction_id=interaction.id, data_source='mentha') session.add(source) print(session.query(Interaction).count())
def parse_irefindex(file, strain, taxid, session): with open(file) as csvfile: reader = csv.DictReader(csvfile, delimiter='\t') for row in reader: interactors = [] if ((row['Taxid interactor A'].split('|')[0] != taxid) | (row['Taxid interactor B'].split('|')[0] != taxid)): continue A_id = row['#ID(s) interactor A'].split(':') B_id = row['ID(s) interactor B'].split(':') if A_id[0] == 'uniprotkb': if session.query(Interactor).filter(Interactor.id == A_id[1]).first() is not None: interactors.append(session.query(Interactor).filter(Interactor.id == A_id[1]).one()) elif session.query(Protein).filter(Protein.uniprotkb == A_id[1]).first() is not None: interactors.append(session.query(Protein).filter(Protein.uniprotkb == A_id[1]).one()) elif A_id[0] == 'refseq': if session.query(Protein).filter(Protein.ncbi_acc == A_id[1]).first() is not None: interactors.append(session.query(Protein).filter(Protein.ncbi_acc == A_id[1]).one()) if B_id[0] == 'uniprotkb': if session.query(Interactor).filter(Interactor.id == B_id[1]).first() is not None: interactors.append(session.query(Interactor).filter(Interactor.id == B_id[1]).one()) elif session.query(Protein).filter(Protein.uniprotkb == B_id[1]).first() is not None: interactors.append(session.query(Protein).filter(Protein.uniprotkb == B_id[1]).one()) elif B_id[0] == 'refseq': if session.query(Protein).filter(Protein.ncbi_acc == B_id[1]).first() is not None: interactors.append(session.query(Protein).filter(Protein.ncbi_acc == B_id[1]).one()) if len(interactors) != 2: continue homogenous = (interactors[0] == interactors[1]) interaction = session.query(Interaction).filter((Interaction.interactors.contains(interactors[0])), (Interaction.interactors.contains(interactors[1])), (Interaction.homogenous == homogenous)).first() if interaction is None: type = interactors[0].type + '-' + interactors[1].type interaction = Interaction(strain=strain, type=type, homogenous=homogenous, interactors=interactors) if row['Interaction detection method(s)'] != '-': if is_experimental_psimi(row['Interaction detection method(s)'].split('MI:')[1][:4]): interaction.is_experimental = 1 else: interaction.is_experimental = 0 else: if is_experimental_psimi(row['Interaction detection method(s)'].split('MI:')[1][:4]): interaction.is_experimental = 1 elif (row['Interaction detection method(s)'] == '-') and (interaction.is_experimental == 0): interaction.is_experimental = None author, date, type= None, None, None pmids, detections = [None], [None] if row['Interaction detection method(s)'] != '-': del detections[0] for method in row['Interaction detection method(s)'].split('|'): detections.append(method.split('(')[1][:-1]) if (row['Interaction type(s)'] != '-'): type = row['Interaction type(s)'].split('(')[1][:-1] if (row['Publication 1st author(s)'] != '-'): author = row['Publication 1st author(s)'].split('-')[0][0].upper() + \ row['Publication 1st author(s)'].split('-')[0][1:] date = row['Publication 1st author(s)'].split('-')[1] if (row['Publication Identifier(s)'] != '-'): del pmids[0] for pmid in row['Publication Identifier(s)'].split('|'): pmids.append(pmid.split('pubmed:')[1][:8]) for pmid in pmids: for detection in detections: reference = InteractionReference(interaction_id=interaction.id, detection_method=detection, author_ln=author, pub_date=date, pmid=pmid, interaction_type=type, source_db=row['Source database(s)'].split('(')[1][:-1], confidence_score=row['Confidence value(s)']) session.add(reference) for xref in row['Interaction identifier(s)'].split('|'): xref_field = xref.split(':') xref = session.query(InteractionXref).filter(InteractionXref.accession == xref_field[1], InteractionXref.interaction_id == interaction.id).first() if xref is None: xref = InteractionXref(interaction_id=interaction.id, accession=xref_field[1], data_source=xref_field[0]) session.add(xref) source = session.query(InteractionSource).filter(InteractionSource.interaction_id == interaction.id, InteractionSource.data_source == 'iRefIndex').first() if source is None: source = InteractionSource(interaction_id=interaction.id, data_source='iRefIndex') session.add(source) print(session.query(Interaction).count())
def parse_ecoli_ebi_goa_nonintact(session): with open('Ecoli/PSICQUIC/EBI-GOA-nonIntAct.txt') as csvfile: reader = csv.DictReader(csvfile, delimiter='\t') for row in reader: interactors = [] uniprot_A, uniprot_B = None, None if 'uniprotkb:' in row['#ID(s) interactor A']: uniprot_A = row['#ID(s) interactor A'].split('uniprotkb:')[1] if 'uniprotkb:' in row['ID(s) interactor B']: uniprot_B = row['ID(s) interactor B'].split('uniprotkb:')[1] if (uniprot_A is None) | (uniprot_B is None): continue orthologs_A = session.query(OrthologEcoli).filter( OrthologEcoli.ortholog_uniprot == uniprot_A).all() orthologs_B = session.query(OrthologEcoli).filter( OrthologEcoli.ortholog_uniprot == uniprot_B).all() for ortholog_A in orthologs_A: for ortholog_B in orthologs_B: if (ortholog_A is not None) and (ortholog_B is not None): if ortholog_A.strain_protein == ortholog_B.strain_protein: interactors.append( [[ortholog_A.protein, ortholog_A.ortholog_id], [ortholog_B.protein, ortholog_B.ortholog_id]]) for interactor_pair in interactors: homogenous = (interactor_pair[0][0] == interactor_pair[1][0]) interaction = session.query(Interaction).filter( Interaction.interactors.contains(interactor_pair[0][0]), Interaction.interactors.contains(interactor_pair[1][0]), Interaction.homogenous == homogenous).first() if interaction is not None: if interaction.ortholog_derived is None: interaction.ortholog_derived = 'cfe' elif 'fe' not in interaction.ortholog_derived: interaction.ortholog_derived += ', cfe' session.commit() else: interaction = Interaction( strain=interactor_pair[0][0].strain, interactors=interactor_pair, type='p-p', ortholog_derived='fe') if is_experimental_psimi( row['Interaction detection method(s)'].split( 'MI:')[1][:4]): interaction.is_experimental = 1 session.add(interaction), session.commit() interactor_a, interactor_b = '', '' if interaction.interactors[0] == interactor_pair[0][0]: interactor_a = interactor_pair[0][1] interactor_b = interactor_pair[1][1] else: interactor_b = interactor_pair[0][1] interactor_a = interactor_pair[1][1] reference = InteractionReference( interaction_id=interaction.id, detection_method=row['Interaction detection method(s)']. split('(')[1][:-1], author_ln=row['Publication 1st author(s)'].split(' ')[0], pub_date=row['Publication 1st author(s)'].split('(')[1], pmid=row['Publication Identifier(s)'].split('pubmed:')[1], interaction_type=row['Interaction type(s)'].split( '(')[1][:-1], source_db=row['Source database(s)'].split('(')[1][:-1], interactor_a_id=row['#ID(s) interactor A'].split(':')[1], interactor_b_id=row['ID(s) interactor B'].split(':')[1]) session.add(reference) source = session.query(InteractionSource).filter( InteractionSource.interaction_id == interaction.id, InteractionSource.data_source == 'EBI-GOA non-IntAct').first() if source is None: source = InteractionSource( interaction_id=interaction.id, data_source='EBI-GOA non-IntAct') session.add(source) session.commit() print(session.query(Interaction).count())
def parse_ecoli_irefindex(session): with open('Ecoli/PSICQUIC/iRefIndex.txt') as csvfile: reader = csv.DictReader(csvfile, delimiter='\t') for row in reader: if (row['#ID(s) interactor A'] == '-') | (row['ID(s) interactor B'] == '-'): continue interactors = [] orthologs_A = [] id_A = row['#ID(s) interactor A'].split(':') if id_A[0] == 'uniprotkb': orthologs_A = session.query(OrthologEcoli).filter( OrthologEcoli.ortholog_uniprot == id_A[1]).all() elif id_A[0] == 'refseq': orthologs_A = session.query(OrthologEcoli).filter( OrthologEcoli.ortholog_refseq == id_A[1]).all() if len(orthologs_A) == 0: continue orthologs_B = [] id_B = row['ID(s) interactor B'].split(':') if id_B[0] == 'uniprotkb': orthologs_B = session.query(OrthologEcoli).filter( OrthologEcoli.ortholog_uniprot == id_B[1]).all() elif id_B[0] == 'refseq': orthologs_B = session.query(OrthologEcoli).filter( OrthologEcoli.ortholog_refseq == id_B[1]).all() for ortholog_A in orthologs_A: for ortholog_B in orthologs_B: if (ortholog_A is not None) and (ortholog_B is not None): if ortholog_A.strain_protein == ortholog_B.strain_protein: interactors.append( [[ortholog_A.protein, ortholog_A.ortholog_id], [ortholog_B.protein, ortholog_B.ortholog_id]]) for interactor_pair in interactors: homogenous = (interactor_pair[0][0] == interactor_pair[1][0]) interaction = session.query(Interaction).filter( Interaction.interactors.contains(interactor_pair[0][0]), Interaction.interactors.contains(interactor_pair[1][0]), Interaction.homogenous == homogenous).first() if interaction is not None: if interaction.ortholog_derived is None: interaction.ortholog_derived = 'cfe' elif 'fe' not in interaction.ortholog_derived: interaction.ortholog_derived += ', cfe' session.commit() else: interaction = Interaction( strain=interactor_pair[0][0].strain, interactors=[ interactor_pair[0][0], interactor_pair[1][0] ], type=(interactor_pair[0][0].type + '-' + interactor_pair[1][0]), ortholog_derived='fe') if 'MI:' in row['Interaction detection method(s)']: #iterate through all methods if is_experimental_psimi( row['Interaction detection method(s)'].split( 'MI:')[1][:4]): interaction.is_experimental = 1 session.add(interaction), session.commit() interactor_a, interactor_b = '', '' if interaction.interactors[0] == interactor_pair[0][0]: interactor_a = interactor_pair[0][1] interactor_b = interactor_pair[1][1] else: interactor_b = interactor_pair[0][1] interactor_a = interactor_pair[1][1] author, date, psimi_type, type = None, None, None, None confidences, psimi_detections, detections, pmids = [None], [ None ], [None], [None] if row['Publication 1st author(s)'] != '-': author = row['Publication 1st author(s)'].split(' ')[0] date = row['Publication 1st author(s)'].split('(')[1][:-1] if row['Interaction type(s)'] != '-': type = row['Interaction type(s)'].split('(')[1][:-1] if 'MI' in row['Interaction type(s)']: psimi_type = row['Interaction type(s)'].split( 'MI:')[1][:4] if row['Publication Identifier(s)'] != '-': del pmids[0] for pmid in row['Publication Identifier(s)'].split('|'): pmids.append(pmid.split(':')[1]) if row['Interaction detection method(s)'] != '-': del detections[0] del psimi_detections[0] for detection in row['Publication Identifier(s)'].split( '|'): detections.append(detection.split('(')[1][:-1]) psimi_detections.append(detection.split('MI:')[1][:4]) for pmid in pmids: for confidence in confidences: for (detection, psimi_detection) in zip(detections, psimi_detections): reference = InteractionReference( interaction_id=interaction.id, psimi_detection=psimi_detection, detection_method=detection, author_ln=author, date=date, psimi_type=psimi_type, interaction_type=type, psimi_db=row['Source database(s)'].split( 'MI')[1][:4], source_db=row['Source database(s)'].split( '(')[1][:-1], confidence=confidence, interactor_a=interactor_a, interactor_b=interactor_b) session.add(reference) source = session.query(InteractionSource).filter( InteractionSource.interaction_id == interaction.id, InteractionSource.data_source == 'iRefIndex').first() if source is None: source = InteractionSource(interaction_id=interaction.id, data_source='iRefIndex') session.add(source) session.commit() print(session.query(Interaction).count())