def add_paper(pmid, nex_session=None): if nex_session is None: nex_session = get_session() records = get_pubmed_record(str(pmid)) # print records[0] rec_file = StringIO(records[0]) record = Medline.read(rec_file) # print record source_id = get_source_id(nex_session, 'NCBI') ## insert into DBENTITY/REFERENCEDBENTITY/REFERENCEDOCUMENT [reference_id, authors, doi_url, pmc_url] = insert_referencedbentity(nex_session, pmid, source_id, record) # print reference_id, authors, doi_url, pmc_url insert_authors(nex_session, reference_id, authors, source_id) insert_pubtypes(nex_session, pmid, reference_id, record.get('PT', []), source_id) insert_urls(nex_session, pmid, reference_id, doi_url, pmc_url, source_id) insert_relations(nex_session, pmid, reference_id, record) return reference_id
def dump_data(): nex_session = get_session() dbentity_id_to_sgdid = dict([ (x.dbentity_id, x.sgdid) for x in nex_session.query(Dbentity).filter_by(subclass='LOCUS').all() ]) fw = open(datafile, "w") for x in nex_session.query(Locusdbentity).all(): if x.dbentity_id not in dbentity_id_to_sgdid: continue # if "_" in x.systematic_name and x.headline is None: # continue gene_name = x.gene_name if gene_name is None: gene_name = "" headline = x.headline if headline is None: headline = "" fw.write(x.systematic_name + "\t" + gene_name + "\t" + dbentity_id_to_sgdid[x.dbentity_id] + "\t" + headline + "\n") fw.close() nex_session.close()
def get_new_pmids(summary_file): nex_session = get_session() pmid_to_reference_id = dict([ (x.pmid, x.dbentity_id) for x in nex_session.query(Referencedbentity).all() ]) f = open(summary_file) new_pmids = [] for line in f: pieces = line.strip().split("\t") if len(pieces) < 3: print line continue pmids = pieces[2].strip().replace(" ", "").split("|") for pmid in pmids: if int(pmid) not in pmid_to_reference_id: if pmid not in new_pmids: new_pmids.append(pmid) continue f.close() for pmid in new_pmids: print "NEW PMID: ", pmid
def load_ontology(ontology_file): nex_session = get_session() source_to_id = dict([(x.display_name, x.source_id) for x in nex_session.query(Source).all()]) obiid_to_obi = dict([(x.obiid, x) for x in nex_session.query(Obi).all()]) term_to_ro_id = dict([(x.display_name, x.ro_id) for x in nex_session.query(Ro).all()]) obi_id_to_parent = {} for x in nex_session.query(ObiRelation).all(): parents = [] if x.child_id in obi_id_to_parent: parents = obi_id_to_parent[x.child_id] parents.append(x.parent_id) obi_id_to_parent[x.child_id] = parents #################################### fw = open(log_file, "w") is_sgd_term = {} data = read_owl(ontology_file, ontology) [update_log, to_delete_list] = load_new_data(nex_session, data, source_to_id, obiid_to_obi, term_to_ro_id['is a'], obi_id_to_parent, fw) write_summary_and_send_email(fw, update_log, to_delete_list) nex_session.close() fw.close()
def dump_data(goid): nex_session = get_session() go_id_to_go = dict([(x.go_id, (x.goid, x.display_name)) for x in nex_session.query(Go).all()]) goObj = nex_session.query(Go).filter_by(goid=goid).one_or_none() if goObj is None: print("The goid:", goid, " is not in the database.") return go_id = goObj.go_id parent_to_children = {} for x in nex_session.query(GoRelation).all(): children = [] if x.parent_id in parent_to_children: children = parent_to_children[x.parent_id] children.append(x.child_id) parent_to_children[x.parent_id] = children output_children(go_id, parent_to_children, go_id_to_go) nex_session.close()
def load_data(): nex_session = get_session() load_dna_data(nex_session) load_protein_data(nex_session) load_intergenic_data(nex_session)
def load_data(): nex_session = get_session() f = open(data_file) i = 0 for line in f: if line.startswith('sequence'): continue pieces = line.strip().split('\t') seqID = pieces[0] locus_id = int(pieces[1]) aligned_seq = pieces[2] print(seqID, locus_id, aligned_seq) x = Proteinsequencealignment(locus_id=locus_id, display_name=seqID, aligned_sequence=aligned_seq, created_by=CREATED_BY) nex_session.add(x) i = i + 1 if i > 500: # nex_session.commit() nex_session.rollback() i = 0 # nex_session.commit() nex_session.rollback()
def update_reference_data(): nex_session = get_session() log.info("Updating DBENTITY.display_name...") ## update display_name in DBENTITY table dbentity_id_to_citation = dict([ (x.dbentity_id, (x.citation, x.pmid)) for x in nex_session.query(Referencedbentity).all() ]) all_refs = nex_session.query(Dbentity).filter_by( subclass='REFERENCE').all() for x in all_refs: if x.dbentity_id not in dbentity_id_to_citation: log.info("The dbentity_id=" + str(x.dbentity_id) + " is not in the referencedbentity table.\n") continue (citation, pmid) = dbentity_id_to_citation.get(x.dbentity_id) display_name = citation.split(')')[0] + ')' if display_name == x.display_name: continue display_name_old = x.display_name x.display_name = display_name nex_session.add(x) nex_session.commit() log.info("PMID:" + str(pmid) + " display_name is changed from " + display_name_old + " to " + display_name) log.info("Done")
def load_ontology(ontology_file): nex_session = get_session() source_to_id = dict([(x.display_name, x.source_id) for x in nex_session.query(Source).all()]) psimodid_to_psimod = dict([(x.psimodid, x) for x in nex_session.query(Psimod).all()]) term_to_ro_id = dict([(x.display_name, x.ro_id) for x in nex_session.query(Ro).all()]) psimod_id_to_parent = {} for x in nex_session.query(PsimodRelation).all(): parents = [] if x.child_id in psimod_id_to_parent: parents = psimod_id_to_parent[x.child_id] parents.append(x.parent_id) psimod_id_to_parent[x.child_id] = parents #################################### fw = open(log_file, "w") data = read_obo(ontology_file) [update_log, to_delete_list] = load_new_data(nex_session, data, source_to_id, psimodid_to_psimod, term_to_ro_id['is a'], psimod_id_to_parent, fw) write_summary_and_send_email(fw, update_log, to_delete_list) nex_session.close() fw.close()
def update_reference_data(log_file): nex_session = get_session() pmid_to_reference_id = dict([(x.pmid, x.dbentity_id) for x in nex_session.query(Referencedbentity).all()]) reference_id_author_to_x = dict([((x.reference_id, x.display_name), x) for x in nex_session.query(Referenceauthor).all()]) fw = open(log_file,"w") fw.write(str(datetime.now()) + "\n") fw.write("Getting PMID list...\n") log.info("Getting data from the database...") pmid_all = [] for x in nex_session.query(Referencedbentity).all(): if x.pmid: pmid_all.append(x.pmid) ########################### # nex_session.close() # nex_session = get_session() ########################### i = 0 j = 0 pmids = [] for pmid in pmid_all: if pmid is None or pmid in [26842620, 27823544, 11483584]: continue i = i + 1 j = j + 1 # if j >= MAX_4_CONNECTION: # ########################### # nex_session.close() # nex_session = get_session() # ########################### # log.info("Reference updated: " + str(i)) # j = 0 # print "PMID: ", pmid pmids.append(str(pmid)) if len(pmids) >= MAX: records = get_pubmed_record_from_xml(','.join(pmids)) update_orcid(nex_session, fw, records, pmid_to_reference_id, reference_id_author_to_x ) pmids = [] if len(pmids) > 0: records = get_pubmed_record_from_xml(','.join(pmids)) update_orcid(nex_session, fw, records, pmid_to_reference_id, reference_id_author_to_x) fw.close() nex_session.close()
def load_data(): nex_session = get_session() log.info("Getting data from database...") source = nex_session.query(Source).filter_by( display_name='SGD').one_or_none() source_id = source.source_id locus_allele_to_id = dict([((x.locus_id, x.allele_id), x.locus_allele_id) for x in nex_session.query(LocusAllele).all()]) locus_allele_reference_to_id = dict([ ((x.locus_allele_id, x.reference_id), x.locusallele_reference_id) for x in nex_session.query(LocusalleleReference).all() ]) count = 0 loaded = {} ref_loaded = {} allPhenos = nex_session.query(Phenotypeannotation).all() for x in allPhenos: if x.allele_id is None: continue locus_allele_id = locus_allele_to_id.get((x.dbentity_id, x.allele_id)) if locus_allele_id is None: locus_allele_id = loaded.get((x.dbentity_id, x.allele_id)) if locus_allele_id is None: log.info("adding locus_allele: " + str(x.dbentity_id) + " and " + str(x.allele_id)) locus_allele_id = insert_locus_allele(nex_session, x.dbentity_id, x.allele_id, source_id, x.date_created, x.created_by) loaded[(x.dbentity_id, x.allele_id)] = locus_allele_id if (locus_allele_id, x.reference_id) not in locus_allele_reference_to_id and ( locus_allele_id, x.reference_id) not in ref_loaded: log.info("adding locusallele_reference: " + str(locus_allele_id) + " and " + str(x.reference_id)) insert_locusallele_reference(nex_session, locus_allele_id, x.reference_id, source_id, x.date_created, x.created_by) ref_loaded[(locus_allele_id, x.reference_id)] = 1 count = count + 1 if count >= 300: log.info("commiting data...") # nex_session.rollback() nex_session.commit() count = 0 # nex_session.rollback() nex_session.commit() nex_session.close() log.info("Done!")
def copy_files(): nex_session = get_session() copy_gff(nex_session) copy_gaf(nex_session) copy_gpad(nex_session) copy_gpi(nex_session) copy_noctua_gpad(nex_session)
def load_summaries(summary_file): nex_session = get_session() sgdid_to_locus_id = dict([(x.sgdid, x.dbentity_id) for x in nex_session.query(Dbentity).filter_by(subclass='LOCUS').all()]) locus_id_to_summary = dict([(x.locus_id, x) for x in nex_session.query(Locussummary).filter_by(summary_type=summary_type).all()]) sgd = nex_session.query(Source).filter_by(format_name='SGD').one_or_none() source_id = sgd.source_id uniprot_to_sgdid_list = read_gpi_file() f = open(summary_file) fw = open(log_file, "w") for line in f: pieces = line.strip().split("\t") if pieces[0] == 'Group': continue if len(pieces) < 8: print("BAD LINE:", line) continue curatorName = pieces[1].strip().replace(" [Expired account]", "") curator = curator_id.get(curatorName) if curator is None: print("The curator name:", pieces[1], " is not in the mapping file.") continue date_created = pieces[6].strip() summary_text = pieces[7].strip() sgdid_list = uniprot_to_sgdid_list.get(pieces[3].strip()) if sgdid_list is None: print("The uniprot ID:", pieces[3], " is not found in the GPI file.") continue for sgdid in sgdid_list: locus_id = sgdid_to_locus_id.get(sgdid) if locus_id is None: print("The sgdid:", sgdid, " is not in the database.") continue x = locus_id_to_summary.get(locus_id) if x is None: insert_locussummary(nex_session, fw, locus_id, summary_text, source_id, curator, date_created) else: update_summary(nex_session, fw, locus_id, summary_text, source_id, curator, date_created, x) f.close() fw.close()
def load_domains(): nex_session = get_session() fw = open(log_file, "w") read_data_and_update_database(nex_session, fw) nex_session.close() fw.close()
def standardize_name(infile, logfile): nex_session = get_session() name_to_locus = dict([(x.systematic_name, (x.dbentity_id, x.gene_name, x.name_description)) for x in nex_session.query(Locusdbentity).all()]) id_to_reference = dict([ (x.dbentity_id, (x.citation, x.pmid)) for x in nex_session.query(Referencedbentity).all() ]) locus_id_to_reference_list = {} for x in nex_session.query(LocusReferences).filter( LocusReferences.reference_class.in_( ['gene_name', 'name_description'])).all(): (citation, pmid) = id_to_reference[x.reference_id] reference_list = [] if x.locus_id in locus_id_to_reference_list: reference_list = locus_id_to_reference_list[x.locus_id] reference_list.append( (x.reference_id, citation, pmid, x.reference_class)) locus_id_to_reference_list[x.locus_id] = reference_list # log.info("Fixing...\n") fw = open(logfile, "w") f = open(infile) unique_papers = [] for line in f: pieces = line.strip().split("\t") if pieces[0] == 'ORF': continue (locus_id, gene_name, name_desc) = name_to_locus[pieces[0]] print(locus_id, gene_name, name_desc) reference_list = locus_id_to_reference_list.get(locus_id) print pieces[0], pieces[1], pieces[2], pieces[4], reference_list if reference_list is None: print "NO REF for ", pieces[0], locus_id, gene_name continue for reference_row in reference_list: (reference_id, citation, pmid, reference_class) = reference_row if (reference_id, citation) in unique_papers: continue print reference_id, citation unique_papers.append((reference_id, citation)) fw.close() f.close()
def get_relation_to_ro_id(relation_type, nex_session=None): from src.models import Ro global relation_to_ro_id if relation_to_ro_id is None: if nex_session is None: nex_session = get_session() relation_to_ro_id = {} for relation in nex_session.query(Ro).all(): relation_to_ro_id[relation.display_name] = relation.ro_id return None if relation_type not in relation_to_ro_id else relation_to_ro_id[ relation_type]
def load_data(): nex_session = get_session() all_links = get_links() for link in all_links: (display_name, link_url, index_key) = link insert_tools(nex_session, display_name, link_url, index_key) nex_session.commit() nex_session.close()
def dump_data(): nex_session = get_session() allele_id_to_name = dict([(x.allele_id, x.display_name) for x in nex_session.query(Allele).all()]) allPheno = nex_session.query(Phenotypeannotation).all() for x in allPheno: if x.allele_id: print(str(x.annotation_id) + "\t" + allele_id_to_name[x.allele_id]) nex_session.close()
def delete_obsolete_annotations(key_to_annotation, hasGoodAnnot, go_id_to_aspect, annotation_update_log, source_to_id, dbentity_id_with_new_pmid, dbentity_id_with_uniprot, fw): nex_session = get_session() evidence_to_eco_id = dict([(x.display_name, x.eco_id) for x in nex_session.query(EcoAlias).all()]) src_id = source_to_id['SGD'] to_be_deleted = key_to_annotation.values() try: ## add check to see if there are any valid htp annotations.. for x in nex_session.query(Goannotation).filter_by( source_id=src_id).filter_by( annotation_type='high-throughput').all(): hasGoodAnnot[(x.dbentity_id, go_id_to_aspect[x.go_id])] = 1 ## delete the old ones - for x in to_be_deleted: ## don't delete the annotations for the features with a pmid not in db yet ## (so keep the old annotations for now) if dbentity_id_with_new_pmid.get(x.dbentity_id) is not None: continue ## don't delete PAINT annotations (they are not in GPAD files yet) if x.source_id == source_to_id['GO_Central']: continue aspect = go_id_to_aspect[x.go_id] if x.eco_id == evidence_to_eco_id['ND'] and hasGoodAnnot.get( (x.dbentity_id, aspect)) is None: ## still keep the ND annotation if there is no good annotation available yet continue elif dbentity_id_with_uniprot.get(x.dbentity_id): ## don't want to delete the annotations that are not in GPAD file yet delete_extensions_evidences(nex_session, x.annotation_id) nex_session.delete(x) nex_session.commit() fw.write("DELETE GOANNOTATION: row=" + str(x) + "\n") key = (x.annotation_type, 'annotation_deleted') annotation_update_log[key] = annotation_update_log[key] + 1 finally: nex_session.close()
def change_name(infile, logfile): nex_session = get_session() name_to_locus_id = dict([(x.systematic_name, x.dbentity_id) for x in nex_session.query(Locusdbentity).all()]) pmid_to_reference_id = dict([(x.pmid, x.dbentity_id) for x in nex_session.query(Referencedbentity).all()]) sgd = nex_session.query(Source).filter_by(display_name='SGD').one_or_none() source_id = sgd.source_id fw = open(logfile, "w") f = open(infile) for line in f: if line.startswith('ORF'): continue pieces = line.strip().split("\t") orf_name = pieces[0] alias_name = pieces[1] alias_type = pieces[2] pmid = int(pieces[3]) date_created = pieces[4] created_by = pieces[5] locus_id = name_to_locus_id.get(orf_name) if locus_id is None: print("The ORF name:", orf_name, " is not in the database.") continue reference_id = pmid_to_reference_id.get(pmid) if reference_id is None: print("The PMID:", pmid, " is not in the database.") continue alias_id = insert_locus_alias(nex_session, fw, locus_id, alias_name, alias_type, source_id, date_created, created_by) insert_locusalias_reference(nex_session, fw, alias_id, reference_id, source_id, date_created, created_by) note_id = insert_locusnote(nex_session, fw, locus_id, alias_name, source_id, created_by, date_created) insert_locusnote_reference(nex_session, fw, note_id, reference_id, source_id, created_by, date_created) # nex_session.rollback() nex_session.commit() fw.close() f.close()
def add_papers(pmid_file, created_by): nex_session = get_session() f = open(pmid_file) for line in f: pmid = int(line.strip()) print "adding paper for ", pmid add_paper(pmid, created_by) f.close() nex_session.commit()
def load_data(infile, logfile): nex_session = get_session() name_to_locus_id = dict([(x.systematic_name, x.dbentity_id) for x in nex_session.query(Locusdbentity).all()]) citation_to_reference_id = dict([(x.citation, x.dbentity_id) for x in nex_session.query(Referencedbentity).all()]) sgd = nex_session.query(Source).filter_by(display_name='SGD').one_or_none() source_id = sgd.source_id fw = open(logfile, "w") f = open(infile) for line in f: pieces = line.strip().split("\t") if pieces[0] == 'feature_name': continue locus_id = name_to_locus_id[pieces[0]] display_name = pieces[1] colleague_id = None if pieces[2]: colleague_id = int(pieces[2]) reference_id = None if pieces[3]: reference_id = citation_to_reference_id.get(pieces[3].replace('"', '')) if reference_id is None: print "No citation provided or the citation is not in the database:", pieces[3] continue reservation_date = reformat_date(pieces[4]) expiration_date = reformat_date(pieces[5]) name_description = pieces[6].replace('"', '') created_by = pieces[7] insert_reservedname(nex_session, fw, locus_id, display_name, reference_id, colleague_id, source_id, reservation_date, expiration_date, name_description, created_by) if name_description: insert_locus_reference(nex_session, fw, locus_id, reference_id, source_id, reservation_date, created_by) note_id = insert_locusnote(nex_session, fw, locus_id, display_name, source_id, reservation_date, created_by) insert_locusnote_reference(nex_session, fw, note_id, reference_id, source_id, reservation_date, created_by) # nex_session.rollback() nex_session.commit() fw.close() f.close()
def load_ontology(ontology_file): nex_session = get_session() log.info(str(datetime.now())) log.info("Getting data from database...") source_to_id = dict([(x.display_name, x.source_id) for x in nex_session.query(Source).all()]) chebiid_to_chebi = dict([(x.chebiid, x) for x in nex_session.query(Chebi).all()]) chebi_id_to_alias = {} for x in nex_session.query(ChebiAlia).all(): if x.alias_type in ['PharmGKB ID', 'YeastPathway ID']: continue aliases = [] if x.chebi_id in chebi_id_to_alias: aliases = chebi_id_to_alias[x.chebi_id] aliases.append((x.display_name, x.alias_type)) chebi_id_to_alias[x.chebi_id] = aliases #################################### fw = open(log_file, "w") log.info("Reading data from ontology file...") is_3_star_term = {} data = read_owl(ontology_file, ontology, is_3_star_term) log.info("Updating chebi ontology data in the database...") [update_log, to_delete_list, term_name_changed] = load_new_data(nex_session, data, source_to_id, chebiid_to_chebi, chebi_id_to_alias, is_3_star_term, fw) log.info("Writing loading summary...") write_summary_and_send_email(fw, update_log, to_delete_list, term_name_changed) nex_session.close() fw.close() log.info(str(datetime.now())) log.info("Done!\n\n")
def load_ontology(ontology_file): nex_session = get_session() source_to_id = dict([(x.display_name, x.source_id) for x in nex_session.query(Source).all()]) taxid_to_taxonomy = dict([(x.taxid, x) for x in nex_session.query(Taxonomy).all()]) term_to_ro_id = dict([(x.display_name, x.ro_id) for x in nex_session.query(Ro).all()]) taxonomy_id_to_alias = {} for x in nex_session.query(TaxonomyAlia).all(): aliases = [] if x.taxonomy_id in taxonomy_id_to_alias: aliases = taxonomy_id_to_alias[x.taxonomy_id] aliases.append((x.display_name, x.alias_type)) taxonomy_id_to_alias[x.taxonomy_id] = aliases taxonomy_id_to_parent = {} for x in nex_session.query(TaxonomyRelation).all(): parents = [] if x.child_id in taxonomy_id_to_parent: parents = taxonomy_id_to_parent[x.child_id] parents.append(x.parent_id) taxonomy_id_to_parent[x.child_id] = parents #################################### fw = open(log_file, "w") [filtered_set, id_to_rank] = children_for_taxonomy_ancestor(ontology_file, ancestor) ## total 1037 in the filtered set ## print "COUNT=", len(filtered_set) data = read_owl(ontology_file, ontology) [update_log, to_delete_list ] = load_new_data(nex_session, data, source_to_id, taxid_to_taxonomy, term_to_ro_id['is a'], taxonomy_id_to_alias, taxonomy_id_to_parent, filtered_set, id_to_rank, fw) write_summary_and_send_email(fw, update_log, to_delete_list) nex_session.close() fw.close()
def update_data(infile): nex_session = get_session() fw = open(log_file,"w") uniprot_to_locus_id = dict([(x.display_name, x.locus_id) for x in nex_session.query(LocusAlias).filter_by(alias_type="UniProtKB ID").all()]) edam_to_id = dict([(x.format_name, x.edam_id) for x in nex_session.query(Edam).all()]) src = nex_session.query(Source).filter_by(display_name=SOURCE).one_or_none() source_id = src.source_id log.info(str(datetime.now())) log.info("Getting data from the database...") locus_id_to_ec_list_DB = {} for x in nex_session.query(LocusAlias).filter_by(alias_type=ALIAS_TYPE).all(): ec_list = [] if x.locus_id in locus_id_to_ec_list_DB: ec_list = locus_id_to_ec_list_DB[x.locus_id] ec_list.append(x.display_name) locus_id_to_ec_list_DB[x.locus_id] = ec_list log.info(str(datetime.now())) log.info("Reading data from enzyme.dat file and updating database...") locus_id_to_ec_list = read_enzyme_file(uniprot_to_locus_id, infile) for locus_id in locus_id_to_ec_list: if locus_id in locus_id_to_ec_list_DB: update_ec_list(nex_session, fw, locus_id, source_id, locus_id_to_ec_list[locus_id], locus_id_to_ec_list_DB[locus_id]) del locus_id_to_ec_list_DB[locus_id] else: add_ec_list(nex_session, fw, locus_id, source_id, locus_id_to_ec_list[locus_id]) delete_old_ec_list(nex_session, fw, locus_id_to_ec_list_DB) # nex_session.rollback() nex_session.commit() fw.close() log.info(str(datetime.now())) log.info("Done!")
def load_data(): nex_session = get_session() sgd = nex_session.query(Source).filter_by(format_name='SGD').one_or_none() source_id = sgd.source_id f = open(data_file) for line in f: if line.startswith('dbentity_id'): continue pieces = line.strip().split(" ") locus_id = int(pieces[0]) protein_name = " ".join(pieces[4:]) insert_into_database(nex_session, source_id, locus_id, protein_name) f.close()
def load_data(): nex_session = get_session() name_to_locus_id = dict([(x.systematic_name, x.dbentity_id) for x in nex_session.query(Locusdbentity).all()]) sgd = nex_session.query(Source).filter_by(format_name='SGD').one_or_none() source_id = sgd.source_id f = open(data_file) for line in f: pieces = line.strip().split(" ") name = pieces[0] locus_id = name_to_locus_id.get(name) product_name = " ".join(pieces[1:]) insert_into_database(nex_session, source_id, locus_id, product_name) f.close()
def load_ontology(ontology_file): nex_session = get_session() source_to_id = dict([(x.display_name, x.source_id) for x in nex_session.query(Source).all()]) soid_to_so = dict([(x.soid, x) for x in nex_session.query(So).all()]) term_to_ro_id = dict([(x.display_name, x.ro_id) for x in nex_session.query(Ro).all()]) so_id_to_alias = {} for x in nex_session.query(SoAlia).all(): aliases = [] if x.so_id in so_id_to_alias: aliases = so_id_to_alias[x.so_id] aliases.append((x.display_name, x.alias_type)) so_id_to_alias[x.so_id] = aliases so_id_to_parent = {} for x in nex_session.query(SoRelation).all(): parents = [] if x.child_id in so_id_to_parent: parents = so_id_to_parent[x.child_id] parents.append(x.parent_id) so_id_to_parent[x.child_id] = parents #################################### fw = open(log_file, "w") is_sgd_term = {} data = read_owl(ontology_file, ontology) [update_log, to_delete_list] = load_new_data(nex_session, data, source_to_id, soid_to_so, term_to_ro_id['is a'], so_id_to_alias, so_id_to_parent, fw) write_summary_and_send_email(fw, update_log, to_delete_list) nex_session.close() fw.close()
def load_data(): nex_session = get_session() log.info(str(datetime.now())) log.info("Getting data from database...") allele_to_dbentity_id = dict([ (x.display_name.upper(), x.dbentity_id) for x in nex_session.query(Dbentity).filter_by( subclass='ALLELE').all() ]) source = nex_session.query(Source).filter_by( display_name='SGD').one_or_none() source_id = source.source_id so = nex_session.query(So).filter_by( display_name='structural variant').one_or_none() so_id = so.so_id count = 0 allAllele = nex_session.query(Allele).all() for x in allAllele: if x.display_name.upper() in allele_to_dbentity_id: continue log.info("adding alleledbentiy: " + x.display_name + "...") insert_alleledbentity(nex_session, x.format_name, x.display_name, x.description, source_id, so_id, x.date_created, x.created_by) count = count + 1 if count >= 300: # nex_session.rollback() nex_session.commit() count = 0 # nex_session.rollback() nex_session.commit() nex_session.close() log.info("Done!") log.info(str(datetime.now()))
def load_ontology(ontology_file): nex_session = get_session() source_to_id = dict([(x.display_name, x.source_id) for x in nex_session.query(Source).all()]) roid_to_ro = dict([(x.roid, x) for x in nex_session.query(Ro).all()]) fw = open(log_file, "w") data = read_owl(ontology_file, ontology) [update_log, to_delete_list] = load_new_data(nex_session, data, source_to_id[src], roid_to_ro, fw) write_summary_and_send_email(fw, update_log, to_delete_list) nex_session.close() fw.close()