예제 #1
0
def add_paper(pmid, nex_session=None):
     
    if nex_session is None:
        nex_session = get_session()

    records = get_pubmed_record(str(pmid))
    # print records[0]

    rec_file = StringIO(records[0])
    record = Medline.read(rec_file)
    # print record

    source_id = get_source_id(nex_session, 'NCBI')

    ## insert into DBENTITY/REFERENCEDBENTITY/REFERENCEDOCUMENT
    [reference_id, authors, doi_url, pmc_url] = insert_referencedbentity(nex_session, 
                                                                         pmid, 
                                                                         source_id, 
                                                                         record)
    
    # print reference_id, authors, doi_url, pmc_url

    insert_authors(nex_session, reference_id, authors, source_id)

    insert_pubtypes(nex_session, pmid, reference_id, record.get('PT', []), source_id)
    
    insert_urls(nex_session, pmid, reference_id, doi_url, pmc_url, source_id)

    insert_relations(nex_session, pmid, reference_id, record)

    return reference_id
예제 #2
0
def dump_data():

    nex_session = get_session()

    dbentity_id_to_sgdid = dict([
        (x.dbentity_id, x.sgdid)
        for x in nex_session.query(Dbentity).filter_by(subclass='LOCUS').all()
    ])

    fw = open(datafile, "w")

    for x in nex_session.query(Locusdbentity).all():
        if x.dbentity_id not in dbentity_id_to_sgdid:
            continue
        # if "_" in x.systematic_name and x.headline is None:
        #    continue
        gene_name = x.gene_name
        if gene_name is None:
            gene_name = ""
        headline = x.headline
        if headline is None:
            headline = ""
        fw.write(x.systematic_name + "\t" + gene_name + "\t" +
                 dbentity_id_to_sgdid[x.dbentity_id] + "\t" + headline + "\n")

    fw.close()

    nex_session.close()
def get_new_pmids(summary_file):

    nex_session = get_session()

    pmid_to_reference_id = dict([
        (x.pmid, x.dbentity_id)
        for x in nex_session.query(Referencedbentity).all()
    ])

    f = open(summary_file)

    new_pmids = []

    for line in f:

        pieces = line.strip().split("\t")

        if len(pieces) < 3:
            print line
            continue

        pmids = pieces[2].strip().replace(" ", "").split("|")

        for pmid in pmids:
            if int(pmid) not in pmid_to_reference_id:
                if pmid not in new_pmids:
                    new_pmids.append(pmid)

        continue

    f.close()

    for pmid in new_pmids:
        print "NEW PMID: ", pmid
예제 #4
0
def load_ontology(ontology_file):

    nex_session = get_session()

    source_to_id = dict([(x.display_name, x.source_id)
                         for x in nex_session.query(Source).all()])
    obiid_to_obi = dict([(x.obiid, x) for x in nex_session.query(Obi).all()])
    term_to_ro_id = dict([(x.display_name, x.ro_id)
                          for x in nex_session.query(Ro).all()])

    obi_id_to_parent = {}
    for x in nex_session.query(ObiRelation).all():
        parents = []
        if x.child_id in obi_id_to_parent:
            parents = obi_id_to_parent[x.child_id]
        parents.append(x.parent_id)
        obi_id_to_parent[x.child_id] = parents

    ####################################
    fw = open(log_file, "w")

    is_sgd_term = {}
    data = read_owl(ontology_file, ontology)

    [update_log,
     to_delete_list] = load_new_data(nex_session, data, source_to_id,
                                     obiid_to_obi, term_to_ro_id['is a'],
                                     obi_id_to_parent, fw)

    write_summary_and_send_email(fw, update_log, to_delete_list)

    nex_session.close()

    fw.close()
def dump_data(goid):

    nex_session = get_session()

    go_id_to_go = dict([(x.go_id, (x.goid, x.display_name))
                        for x in nex_session.query(Go).all()])

    goObj = nex_session.query(Go).filter_by(goid=goid).one_or_none()

    if goObj is None:
        print("The goid:", goid, " is not in the database.")
        return

    go_id = goObj.go_id

    parent_to_children = {}
    for x in nex_session.query(GoRelation).all():
        children = []
        if x.parent_id in parent_to_children:
            children = parent_to_children[x.parent_id]
        children.append(x.child_id)
        parent_to_children[x.parent_id] = children

    output_children(go_id, parent_to_children, go_id_to_go)

    nex_session.close()
예제 #6
0
def load_data():

    nex_session = get_session()

    load_dna_data(nex_session)
    load_protein_data(nex_session)
    load_intergenic_data(nex_session)
def load_data():

    nex_session = get_session()

    f = open(data_file)

    i = 0

    for line in f:
        if line.startswith('sequence'):
            continue
        pieces = line.strip().split('\t')
        seqID = pieces[0]
        locus_id = int(pieces[1])
        aligned_seq = pieces[2]

        print(seqID, locus_id, aligned_seq)

        x = Proteinsequencealignment(locus_id=locus_id,
                                     display_name=seqID,
                                     aligned_sequence=aligned_seq,
                                     created_by=CREATED_BY)
        nex_session.add(x)

        i = i + 1
        if i > 500:
            # nex_session.commit()
            nex_session.rollback()
            i = 0

    # nex_session.commit()
    nex_session.rollback()
def update_reference_data():

    nex_session = get_session()

    log.info("Updating DBENTITY.display_name...")

    ## update display_name in DBENTITY table
    dbentity_id_to_citation = dict([
        (x.dbentity_id, (x.citation, x.pmid))
        for x in nex_session.query(Referencedbentity).all()
    ])

    all_refs = nex_session.query(Dbentity).filter_by(
        subclass='REFERENCE').all()

    for x in all_refs:
        if x.dbentity_id not in dbentity_id_to_citation:
            log.info("The dbentity_id=" + str(x.dbentity_id) +
                     " is not in the referencedbentity table.\n")
            continue
        (citation, pmid) = dbentity_id_to_citation.get(x.dbentity_id)
        display_name = citation.split(')')[0] + ')'
        if display_name == x.display_name:
            continue
        display_name_old = x.display_name
        x.display_name = display_name
        nex_session.add(x)
        nex_session.commit()
        log.info("PMID:" + str(pmid) + " display_name is changed from " +
                 display_name_old + " to " + display_name)

    log.info("Done")
예제 #9
0
def load_ontology(ontology_file):

    nex_session = get_session()

    source_to_id = dict([(x.display_name, x.source_id) for x in nex_session.query(Source).all()])
    psimodid_to_psimod =  dict([(x.psimodid, x) for x in nex_session.query(Psimod).all()])
    term_to_ro_id = dict([(x.display_name, x.ro_id) for x in nex_session.query(Ro).all()])
    
    psimod_id_to_parent = {}
    for x in nex_session.query(PsimodRelation).all():
        parents = []
        if x.child_id in psimod_id_to_parent:
            parents = psimod_id_to_parent[x.child_id]
        parents.append(x.parent_id)
        psimod_id_to_parent[x.child_id] = parents


    ####################################
    fw = open(log_file, "w")
    
    data = read_obo(ontology_file)
    
    [update_log, to_delete_list] = load_new_data(nex_session, data, 
                                                 source_to_id, 
                                                 psimodid_to_psimod, 
                                                 term_to_ro_id['is a'],
                                                 psimod_id_to_parent,
                                                 fw)
    
    write_summary_and_send_email(fw, update_log, to_delete_list)
    
    nex_session.close()

    fw.close()
def update_reference_data(log_file):
 
    nex_session = get_session()

    pmid_to_reference_id =  dict([(x.pmid, x.dbentity_id) for x in nex_session.query(Referencedbentity).all()])
    reference_id_author_to_x = dict([((x.reference_id, x.display_name), x) for x in nex_session.query(Referenceauthor).all()])

    fw = open(log_file,"w")

    fw.write(str(datetime.now()) + "\n")
    fw.write("Getting PMID list...\n")

    log.info("Getting data from the database...")

    pmid_all = []
    for x in nex_session.query(Referencedbentity).all():
        if x.pmid:
            pmid_all.append(x.pmid)
            
    ###########################
    # nex_session.close()
    # nex_session = get_session()
    ###########################
    i = 0
    j = 0
    pmids = []
    for pmid in pmid_all:

        if pmid is None or pmid in [26842620, 27823544, 11483584]:
            continue

        i = i + 1
        j = j + 1
        
        # if j >= MAX_4_CONNECTION:
        #    ###########################
        #    nex_session.close()
        #    nex_session = get_session()
        #    ###########################
        #    log.info("Reference updated: " + str(i))
        #    j = 0

        # print "PMID: ", pmid
            
        pmids.append(str(pmid))
        if len(pmids) >= MAX:
            records = get_pubmed_record_from_xml(','.join(pmids))
            update_orcid(nex_session, fw, records, pmid_to_reference_id, 
                         reference_id_author_to_x )
            pmids = []
    
    if len(pmids) > 0:
        records = get_pubmed_record_from_xml(','.join(pmids))
        update_orcid(nex_session, fw, records, pmid_to_reference_id,
                     reference_id_author_to_x)

    fw.close()
    nex_session.close()
예제 #11
0
def load_data():

    nex_session = get_session()

    log.info("Getting data from database...")

    source = nex_session.query(Source).filter_by(
        display_name='SGD').one_or_none()
    source_id = source.source_id
    locus_allele_to_id = dict([((x.locus_id, x.allele_id), x.locus_allele_id)
                               for x in nex_session.query(LocusAllele).all()])
    locus_allele_reference_to_id = dict([
        ((x.locus_allele_id, x.reference_id), x.locusallele_reference_id)
        for x in nex_session.query(LocusalleleReference).all()
    ])

    count = 0

    loaded = {}
    ref_loaded = {}
    allPhenos = nex_session.query(Phenotypeannotation).all()
    for x in allPhenos:
        if x.allele_id is None:
            continue
        locus_allele_id = locus_allele_to_id.get((x.dbentity_id, x.allele_id))
        if locus_allele_id is None:
            locus_allele_id = loaded.get((x.dbentity_id, x.allele_id))
        if locus_allele_id is None:
            log.info("adding locus_allele: " + str(x.dbentity_id) + " and " +
                     str(x.allele_id))
            locus_allele_id = insert_locus_allele(nex_session, x.dbentity_id,
                                                  x.allele_id, source_id,
                                                  x.date_created, x.created_by)
            loaded[(x.dbentity_id, x.allele_id)] = locus_allele_id

        if (locus_allele_id,
                x.reference_id) not in locus_allele_reference_to_id and (
                    locus_allele_id, x.reference_id) not in ref_loaded:
            log.info("adding locusallele_reference: " + str(locus_allele_id) +
                     " and " + str(x.reference_id))
            insert_locusallele_reference(nex_session, locus_allele_id,
                                         x.reference_id, source_id,
                                         x.date_created, x.created_by)
            ref_loaded[(locus_allele_id, x.reference_id)] = 1

        count = count + 1

        if count >= 300:
            log.info("commiting data...")
            # nex_session.rollback()
            nex_session.commit()
            count = 0

    # nex_session.rollback()
    nex_session.commit()

    nex_session.close()
    log.info("Done!")
예제 #12
0
def copy_files():

    nex_session = get_session()

    copy_gff(nex_session)
    copy_gaf(nex_session)
    copy_gpad(nex_session)
    copy_gpi(nex_session)
    copy_noctua_gpad(nex_session)
def load_summaries(summary_file):
    
    nex_session = get_session()

    sgdid_to_locus_id = dict([(x.sgdid, x.dbentity_id) for x in nex_session.query(Dbentity).filter_by(subclass='LOCUS').all()])    
    locus_id_to_summary = dict([(x.locus_id, x) for x in nex_session.query(Locussummary).filter_by(summary_type=summary_type).all()])

    sgd = nex_session.query(Source).filter_by(format_name='SGD').one_or_none()
    source_id = sgd.source_id

    uniprot_to_sgdid_list = read_gpi_file()

    f = open(summary_file)
    fw = open(log_file, "w")
    
    for line in f:

        pieces = line.strip().split("\t")
        if pieces[0] == 'Group':
            continue
        
        if len(pieces) < 8:
            print("BAD LINE:", line)
            continue
        
        curatorName = pieces[1].strip().replace(" [Expired account]", "")
        curator = curator_id.get(curatorName)
        if curator is None:
            print("The curator name:", pieces[1], " is not in the mapping file.")
            continue

        date_created = pieces[6].strip()
        summary_text = pieces[7].strip()

        sgdid_list = uniprot_to_sgdid_list.get(pieces[3].strip())
        if sgdid_list is None:
            print("The uniprot ID:", pieces[3], " is not found in the GPI file.")
            continue

    
        for sgdid in sgdid_list:
            locus_id = sgdid_to_locus_id.get(sgdid)
            if locus_id is None:
                print("The sgdid:", sgdid, " is not in the database.")
                continue

            x = locus_id_to_summary.get(locus_id)
            if x is None:
                insert_locussummary(nex_session, fw, locus_id, summary_text, 
                                    source_id, curator, date_created)
            else:
                update_summary(nex_session, fw, locus_id, summary_text, source_id, 
                               curator, date_created, x)
            
    f.close()
    fw.close()
예제 #14
0
def load_domains():

    nex_session = get_session()

    fw = open(log_file, "w")
    
    read_data_and_update_database(nex_session, fw)

    nex_session.close()

    fw.close()
def standardize_name(infile, logfile):

    nex_session = get_session()

    name_to_locus = dict([(x.systematic_name, (x.dbentity_id, x.gene_name,
                                               x.name_description))
                          for x in nex_session.query(Locusdbentity).all()])
    id_to_reference = dict([
        (x.dbentity_id, (x.citation, x.pmid))
        for x in nex_session.query(Referencedbentity).all()
    ])

    locus_id_to_reference_list = {}
    for x in nex_session.query(LocusReferences).filter(
            LocusReferences.reference_class.in_(
                ['gene_name', 'name_description'])).all():
        (citation, pmid) = id_to_reference[x.reference_id]
        reference_list = []
        if x.locus_id in locus_id_to_reference_list:
            reference_list = locus_id_to_reference_list[x.locus_id]
        reference_list.append(
            (x.reference_id, citation, pmid, x.reference_class))
        locus_id_to_reference_list[x.locus_id] = reference_list

    # log.info("Fixing...\n")

    fw = open(logfile, "w")
    f = open(infile)

    unique_papers = []

    for line in f:
        pieces = line.strip().split("\t")
        if pieces[0] == 'ORF':
            continue
        (locus_id, gene_name, name_desc) = name_to_locus[pieces[0]]
        print(locus_id, gene_name, name_desc)

        reference_list = locus_id_to_reference_list.get(locus_id)
        print pieces[0], pieces[1], pieces[2], pieces[4], reference_list

        if reference_list is None:
            print "NO REF for ", pieces[0], locus_id, gene_name
            continue

        for reference_row in reference_list:
            (reference_id, citation, pmid, reference_class) = reference_row
            if (reference_id, citation) in unique_papers:
                continue
            print reference_id, citation
            unique_papers.append((reference_id, citation))

    fw.close()
    f.close()
예제 #16
0
def get_relation_to_ro_id(relation_type, nex_session=None):
    from src.models import Ro
    global relation_to_ro_id
    if relation_to_ro_id is None:
        if nex_session is None:
            nex_session = get_session()
        relation_to_ro_id = {}
        for relation in nex_session.query(Ro).all():
            relation_to_ro_id[relation.display_name] = relation.ro_id
    return None if relation_type not in relation_to_ro_id else relation_to_ro_id[
        relation_type]
예제 #17
0
def load_data():

    nex_session = get_session()

    all_links = get_links()

    for link in all_links:
        (display_name, link_url, index_key) = link
        insert_tools(nex_session, display_name, link_url, index_key)

    nex_session.commit()

    nex_session.close()
예제 #18
0
def dump_data():

    nex_session = get_session()

    allele_id_to_name = dict([(x.allele_id, x.display_name)
                              for x in nex_session.query(Allele).all()])

    allPheno = nex_session.query(Phenotypeannotation).all()
    for x in allPheno:
        if x.allele_id:
            print(str(x.annotation_id) + "\t" + allele_id_to_name[x.allele_id])

    nex_session.close()
예제 #19
0
def delete_obsolete_annotations(key_to_annotation, hasGoodAnnot,
                                go_id_to_aspect, annotation_update_log,
                                source_to_id, dbentity_id_with_new_pmid,
                                dbentity_id_with_uniprot, fw):

    nex_session = get_session()

    evidence_to_eco_id = dict([(x.display_name, x.eco_id)
                               for x in nex_session.query(EcoAlias).all()])

    src_id = source_to_id['SGD']

    to_be_deleted = key_to_annotation.values()

    try:

        ## add check to see if there are any valid htp annotations..

        for x in nex_session.query(Goannotation).filter_by(
                source_id=src_id).filter_by(
                    annotation_type='high-throughput').all():
            hasGoodAnnot[(x.dbentity_id, go_id_to_aspect[x.go_id])] = 1

        ## delete the old ones -

        for x in to_be_deleted:

            ## don't delete the annotations for the features with a pmid not in db yet
            ## (so keep the old annotations for now)
            if dbentity_id_with_new_pmid.get(x.dbentity_id) is not None:
                continue

            ## don't delete PAINT annotations (they are not in GPAD files yet)
            if x.source_id == source_to_id['GO_Central']:
                continue

            aspect = go_id_to_aspect[x.go_id]
            if x.eco_id == evidence_to_eco_id['ND'] and hasGoodAnnot.get(
                (x.dbentity_id, aspect)) is None:
                ## still keep the ND annotation if there is no good annotation available yet
                continue
            elif dbentity_id_with_uniprot.get(x.dbentity_id):
                ## don't want to delete the annotations that are not in GPAD file yet
                delete_extensions_evidences(nex_session, x.annotation_id)
                nex_session.delete(x)
                nex_session.commit()
                fw.write("DELETE GOANNOTATION: row=" + str(x) + "\n")
                key = (x.annotation_type, 'annotation_deleted')
                annotation_update_log[key] = annotation_update_log[key] + 1
    finally:
        nex_session.close()
예제 #20
0
def change_name(infile, logfile):

    nex_session = get_session()

    name_to_locus_id = dict([(x.systematic_name, x.dbentity_id) for x in nex_session.query(Locusdbentity).all()])
    pmid_to_reference_id = dict([(x.pmid, x.dbentity_id) for x in nex_session.query(Referencedbentity).all()])
    sgd = nex_session.query(Source).filter_by(display_name='SGD').one_or_none()
    source_id = sgd.source_id

    fw = open(logfile, "w")    
    f = open(infile)

    for line in f:
        if line.startswith('ORF'):
            continue
        pieces = line.strip().split("\t")
        orf_name = pieces[0]
        alias_name = pieces[1]
        alias_type = pieces[2]
        pmid = int(pieces[3])
        date_created = pieces[4]
        created_by = pieces[5]

        locus_id = name_to_locus_id.get(orf_name)       
        if locus_id is None:
            print("The ORF name:", orf_name, " is not in the database.")
            continue

        reference_id = pmid_to_reference_id.get(pmid)
        if reference_id is None:
            print("The PMID:", pmid, " is not in the database.")
            continue

        alias_id = insert_locus_alias(nex_session, fw, locus_id, alias_name, 
                                      alias_type, source_id, date_created, created_by)

        insert_locusalias_reference(nex_session, fw, alias_id, reference_id,
                                    source_id, date_created, created_by)

        note_id = insert_locusnote(nex_session, fw, locus_id, alias_name, source_id,                                                   
                                   created_by, date_created)                                                                                
    
        insert_locusnote_reference(nex_session, fw, note_id, reference_id,                                                              
                                   source_id, created_by, date_created)      
        
    # nex_session.rollback()
    nex_session.commit()

    fw.close()
    f.close()
예제 #21
0
def add_papers(pmid_file, created_by):
 
    nex_session = get_session()

    f = open(pmid_file)

    for line in f:
        pmid = int(line.strip())
        print "adding paper for ", pmid
        add_paper(pmid, created_by)
        
    f.close()
    
    nex_session.commit()
예제 #22
0
def load_data(infile, logfile):

    nex_session = get_session()

    name_to_locus_id = dict([(x.systematic_name, x.dbentity_id) for x in nex_session.query(Locusdbentity).all()])
    citation_to_reference_id = dict([(x.citation, x.dbentity_id) for x in nex_session.query(Referencedbentity).all()])
    sgd = nex_session.query(Source).filter_by(display_name='SGD').one_or_none()
    source_id = sgd.source_id
    
    fw = open(logfile, "w")    
    f = open(infile)

    for line in f:
        pieces = line.strip().split("\t")
        if pieces[0] == 'feature_name':
            continue
        locus_id = name_to_locus_id[pieces[0]]
        display_name = pieces[1]

        colleague_id = None
        if pieces[2]:
            colleague_id = int(pieces[2])
        reference_id = None
        if pieces[3]:
            reference_id = citation_to_reference_id.get(pieces[3].replace('"', ''))
        if reference_id is None:
            print "No citation provided or the citation is not in the database:", pieces[3]
            continue
        reservation_date = reformat_date(pieces[4])
        expiration_date = reformat_date(pieces[5])
        name_description = pieces[6].replace('"', '')
        created_by = pieces[7]

        insert_reservedname(nex_session, fw, locus_id, display_name, reference_id, colleague_id, 
                            source_id, reservation_date, expiration_date, name_description, created_by)

        if name_description:
            insert_locus_reference(nex_session, fw, locus_id, reference_id, source_id, reservation_date, created_by)

        note_id = insert_locusnote(nex_session, fw, locus_id, display_name, source_id, reservation_date, created_by)

        insert_locusnote_reference(nex_session, fw, note_id, reference_id, source_id, reservation_date, created_by)

    # nex_session.rollback()
    nex_session.commit()

    fw.close()
    f.close()
예제 #23
0
def load_ontology(ontology_file):

    nex_session = get_session()

    log.info(str(datetime.now()))
    log.info("Getting data from database...")

    source_to_id = dict([(x.display_name, x.source_id)
                         for x in nex_session.query(Source).all()])
    chebiid_to_chebi = dict([(x.chebiid, x)
                             for x in nex_session.query(Chebi).all()])

    chebi_id_to_alias = {}
    for x in nex_session.query(ChebiAlia).all():
        if x.alias_type in ['PharmGKB ID', 'YeastPathway ID']:
            continue
        aliases = []
        if x.chebi_id in chebi_id_to_alias:
            aliases = chebi_id_to_alias[x.chebi_id]
        aliases.append((x.display_name, x.alias_type))
        chebi_id_to_alias[x.chebi_id] = aliases

    ####################################
    fw = open(log_file, "w")

    log.info("Reading data from ontology file...")

    is_3_star_term = {}
    data = read_owl(ontology_file, ontology, is_3_star_term)

    log.info("Updating chebi ontology data in the database...")

    [update_log, to_delete_list,
     term_name_changed] = load_new_data(nex_session, data, source_to_id,
                                        chebiid_to_chebi, chebi_id_to_alias,
                                        is_3_star_term, fw)

    log.info("Writing loading summary...")

    write_summary_and_send_email(fw, update_log, to_delete_list,
                                 term_name_changed)

    nex_session.close()

    fw.close()

    log.info(str(datetime.now()))
    log.info("Done!\n\n")
예제 #24
0
def load_ontology(ontology_file):

    nex_session = get_session()

    source_to_id = dict([(x.display_name, x.source_id)
                         for x in nex_session.query(Source).all()])
    taxid_to_taxonomy = dict([(x.taxid, x)
                              for x in nex_session.query(Taxonomy).all()])
    term_to_ro_id = dict([(x.display_name, x.ro_id)
                          for x in nex_session.query(Ro).all()])

    taxonomy_id_to_alias = {}
    for x in nex_session.query(TaxonomyAlia).all():
        aliases = []
        if x.taxonomy_id in taxonomy_id_to_alias:
            aliases = taxonomy_id_to_alias[x.taxonomy_id]
        aliases.append((x.display_name, x.alias_type))
        taxonomy_id_to_alias[x.taxonomy_id] = aliases

    taxonomy_id_to_parent = {}
    for x in nex_session.query(TaxonomyRelation).all():
        parents = []
        if x.child_id in taxonomy_id_to_parent:
            parents = taxonomy_id_to_parent[x.child_id]
        parents.append(x.parent_id)
        taxonomy_id_to_parent[x.child_id] = parents

    ####################################
    fw = open(log_file, "w")

    [filtered_set,
     id_to_rank] = children_for_taxonomy_ancestor(ontology_file, ancestor)

    ## total 1037 in the filtered set
    ## print "COUNT=", len(filtered_set)

    data = read_owl(ontology_file, ontology)

    [update_log, to_delete_list
     ] = load_new_data(nex_session, data, source_to_id, taxid_to_taxonomy,
                       term_to_ro_id['is a'], taxonomy_id_to_alias,
                       taxonomy_id_to_parent, filtered_set, id_to_rank, fw)

    write_summary_and_send_email(fw, update_log, to_delete_list)

    nex_session.close()

    fw.close()
예제 #25
0
def update_data(infile):

    nex_session = get_session()

    fw = open(log_file,"w")

    uniprot_to_locus_id = dict([(x.display_name, x.locus_id) for x in nex_session.query(LocusAlias).filter_by(alias_type="UniProtKB ID").all()])
    edam_to_id = dict([(x.format_name, x.edam_id) for x in nex_session.query(Edam).all()])
    src = nex_session.query(Source).filter_by(display_name=SOURCE).one_or_none()
    source_id = src.source_id
    
    log.info(str(datetime.now()))
    log.info("Getting data from the database...")

    locus_id_to_ec_list_DB = {}
    for x in nex_session.query(LocusAlias).filter_by(alias_type=ALIAS_TYPE).all():
        ec_list = []
        if x.locus_id in locus_id_to_ec_list_DB:
            ec_list = locus_id_to_ec_list_DB[x.locus_id]
        ec_list.append(x.display_name)
        locus_id_to_ec_list_DB[x.locus_id] = ec_list

    log.info(str(datetime.now()))
    log.info("Reading data from enzyme.dat file and updating database...")

    locus_id_to_ec_list = read_enzyme_file(uniprot_to_locus_id, infile)

    for locus_id in locus_id_to_ec_list:
        if locus_id in locus_id_to_ec_list_DB:
            update_ec_list(nex_session, fw, locus_id, source_id, 
                           locus_id_to_ec_list[locus_id], 
                           locus_id_to_ec_list_DB[locus_id])
            del locus_id_to_ec_list_DB[locus_id]
        else:
            add_ec_list(nex_session, fw, locus_id, source_id, 
                        locus_id_to_ec_list[locus_id])
        
    delete_old_ec_list(nex_session, fw, locus_id_to_ec_list_DB)
    
    # nex_session.rollback()

    nex_session.commit()

    fw.close()

    log.info(str(datetime.now()))
    log.info("Done!")
예제 #26
0
def load_data():

    nex_session = get_session()

    sgd = nex_session.query(Source).filter_by(format_name='SGD').one_or_none()
    source_id = sgd.source_id

    f = open(data_file)
    for line in f:
        if line.startswith('dbentity_id'):
            continue
        pieces = line.strip().split(" ")
        locus_id = int(pieces[0])
        protein_name = " ".join(pieces[4:])

        insert_into_database(nex_session, source_id, locus_id, protein_name)

    f.close()
예제 #27
0
def load_data():

    nex_session = get_session()

    name_to_locus_id = dict([(x.systematic_name, x.dbentity_id)
                             for x in nex_session.query(Locusdbentity).all()])
    sgd = nex_session.query(Source).filter_by(format_name='SGD').one_or_none()
    source_id = sgd.source_id

    f = open(data_file)
    for line in f:
        pieces = line.strip().split(" ")
        name = pieces[0]
        locus_id = name_to_locus_id.get(name)
        product_name = " ".join(pieces[1:])
        insert_into_database(nex_session, source_id, locus_id, product_name)

    f.close()
예제 #28
0
def load_ontology(ontology_file):

    nex_session = get_session()

    source_to_id = dict([(x.display_name, x.source_id) for x in nex_session.query(Source).all()])
    soid_to_so =  dict([(x.soid, x) for x in nex_session.query(So).all()])
    term_to_ro_id = dict([(x.display_name, x.ro_id) for x in nex_session.query(Ro).all()])
    
    so_id_to_alias = {}
    for x in nex_session.query(SoAlia).all():
        aliases = []
        if x.so_id in so_id_to_alias:
            aliases = so_id_to_alias[x.so_id]
        aliases.append((x.display_name, x.alias_type))
        so_id_to_alias[x.so_id] = aliases

    so_id_to_parent = {}
    for x in nex_session.query(SoRelation).all():
        parents = []
        if x.child_id in so_id_to_parent:
            parents = so_id_to_parent[x.child_id]
        parents.append(x.parent_id)
        so_id_to_parent[x.child_id] = parents


    ####################################
    fw = open(log_file, "w")
    
    is_sgd_term = {}
    data = read_owl(ontology_file, ontology)
    
    [update_log, to_delete_list] = load_new_data(nex_session, data, 
                                                 source_to_id, 
                                                 soid_to_so, 
                                                 term_to_ro_id['is a'],
                                                 so_id_to_alias,
                                                 so_id_to_parent,
                                                 fw)
    
    write_summary_and_send_email(fw, update_log, to_delete_list)
    
    nex_session.close()

    fw.close()
예제 #29
0
def load_data():

    nex_session = get_session()

    log.info(str(datetime.now()))
    log.info("Getting data from database...")

    allele_to_dbentity_id = dict([
        (x.display_name.upper(), x.dbentity_id)
        for x in nex_session.query(Dbentity).filter_by(
            subclass='ALLELE').all()
    ])

    source = nex_session.query(Source).filter_by(
        display_name='SGD').one_or_none()
    source_id = source.source_id
    so = nex_session.query(So).filter_by(
        display_name='structural variant').one_or_none()
    so_id = so.so_id

    count = 0

    allAllele = nex_session.query(Allele).all()
    for x in allAllele:
        if x.display_name.upper() in allele_to_dbentity_id:
            continue
        log.info("adding alleledbentiy: " + x.display_name + "...")
        insert_alleledbentity(nex_session, x.format_name, x.display_name,
                              x.description, source_id, so_id, x.date_created,
                              x.created_by)
        count = count + 1
        if count >= 300:
            # nex_session.rollback()
            nex_session.commit()
            count = 0

    # nex_session.rollback()
    nex_session.commit()

    nex_session.close()
    log.info("Done!")
    log.info(str(datetime.now()))
예제 #30
0
def load_ontology(ontology_file):

    nex_session = get_session()

    source_to_id = dict([(x.display_name, x.source_id)
                         for x in nex_session.query(Source).all()])
    roid_to_ro = dict([(x.roid, x) for x in nex_session.query(Ro).all()])

    fw = open(log_file, "w")

    data = read_owl(ontology_file, ontology)

    [update_log,
     to_delete_list] = load_new_data(nex_session, data, source_to_id[src],
                                     roid_to_ro, fw)

    write_summary_and_send_email(fw, update_log, to_delete_list)

    nex_session.close()

    fw.close()