Esempio n. 1
0
    def bioconcept_alias_starter():
        bud_session = bud_session_maker()
        nex_session = nex_session_maker()

        key_to_bioconcept = dict([(x.unique_key(), x) for x in nex_session.query(Bioconcept).all()])
        key_to_source = dict([(x.unique_key(), x) for x in nex_session.query(Source).all()])

        #Go aliases
        for old_goterm in make_db_starter(bud_session.query(Go).options(joinedload('go_gosynonyms')), 1000)():
            go_key = (get_go_format_name(old_goterm.go_go_id), 'GO')

            if go_key in key_to_bioconcept:
                for go_gosynonym in old_goterm.go_gosynonyms:
                    synonym = go_gosynonym.gosynonym
                    yield {'display_name': synonym.name,
                           'source': key_to_source['SGD'],
                           'bioconcept_id': key_to_bioconcept[go_key].id,
                           'date_created': synonym.date_created,
                           'created_by': synonym.created_by}
            else:
                print 'Go term not found: ' + str(go_key)
                yield None

        #Phenotype aliases
        for cvtermsynonym in bud_session.query(CVTermSynonym).join(CVTerm).filter(CVTerm.cv_no == 6).all():
            observable = cvtermsynonym.cvterm.name.lower()
            if observable == 'observable':
                observable = 'ypo'
            phenotype_key = (create_format_name(observable), 'OBSERVABLE')

            if phenotype_key in key_to_bioconcept:
                yield {'display_name': cvtermsynonym.synonym,
                       'source': key_to_source['SGD'],
                       'bioconcept_id': key_to_bioconcept[phenotype_key].id,
                       'date_created': cvtermsynonym.date_created,
                       'created_by': cvtermsynonym.created_by}
            else:
                print 'Phenotype not found: ' + str(phenotype_key)
                yield None

        for cvterm_dbxref in bud_session.query(CVTermDbxref).join(CVTerm).filter(CVTerm.cv_no == 6).options(joinedload('dbxref')).all():
            observable = cvterm_dbxref.cvterm.name.lower()
            if observable == 'observable':
                observable = 'ypo'
            phenotype_key = (create_format_name(observable), 'OBSERVABLE')

            if phenotype_key in key_to_bioconcept:
                yield {'display_name': cvterm_dbxref.dbxref.dbxref_id,
                       'source': key_to_source['SGD'],
                       'category': cvterm_dbxref.dbxref.dbxref_type,
                       'bioconcept_id': key_to_bioconcept[phenotype_key].id,
                       'date_created': cvterm_dbxref.dbxref.date_created,
                       'created_by': cvterm_dbxref.dbxref.created_by}
            else:
                print 'Phenotype not found: ' + str(phenotype_key)
                yield None

        bud_session.close()
        nex_session.close()
Esempio n. 2
0
def create_phenotype_format_name(observable, qualifier):
    if qualifier is None:
        format_name = create_format_name(observable.lower())
    else:
        observable = '.' if observable is None else observable
        qualifier = '.' if qualifier is None else qualifier
        format_name = create_format_name(qualifier.lower() + '_' + observable.lower())
    return format_name
Esempio n. 3
0
    def phenotype_starter():
        bud_session = bud_session_maker()
        nex_session = nex_session_maker()

        key_to_source = dict([(x.unique_key(), x) for x in nex_session.query(Source).all()])
        key_to_observable = dict([(x.unique_key(), x) for x in nex_session.query(Observable).all()])

        for bud_obj in bud_session.query(Phenotype).all():
            observable_key = (create_format_name(bud_obj.observable).lower(), 'OBSERVABLE')
            if observable_key in key_to_observable:
                yield {'source': key_to_source['SGD'],
                       'observable': key_to_observable[observable_key],
                       'qualifier': bud_obj.qualifier,
                       'date_created': bud_obj.date_created,
                       'created_by': bud_obj.created_by}

        for bud_obj in make_db_starter(bud_session.query(PhenotypeFeature).join(PhenotypeFeature.phenotype).filter(Phenotype.observable.in_(chemical_phenotypes)), 1000)():
            if bud_obj.experiment is None:
                yield None

            chemicals = bud_obj.experiment.chemicals
            if len(chemicals) == 0:
                yield None

            chemical = ' and '.join([x[0] for x in chemicals])

            old_observable = bud_obj.phenotype.observable
            description = None
            if old_observable == 'resistance to chemicals':
                new_observable = bud_obj.phenotype.observable.replace('chemicals', chemical)
                description = 'The level of resistance to exposure to ' + chemical + '.'
            elif old_observable == 'chemical compound accumulation':
                new_observable = bud_obj.phenotype.observable.replace('chemical compound', chemical)
                description = 'The production and/or storage of ' + chemical + '.'
            elif old_observable == 'chemical compound excretion':
                new_observable = bud_obj.phenotype.observable.replace('chemical compound', chemical)
                description = 'The excretion from the cell of ' + chemical + '.'
            else:
                new_observable = None

            if new_observable is not None:
                observable_key = (create_format_name(new_observable).lower(), 'OBSERVABLE')
                if observable_key in key_to_observable:
                    yield {'source': key_to_source['SGD'],
                           'observable': key_to_observable[observable_key],
                           'qualifier': bud_obj.phenotype.qualifier,
                           'description': description,
                           'date_created': bud_obj.date_created,
                           'created_by': bud_obj.created_by}
        bud_session.close()
        nex_session.close()
Esempio n. 4
0
    def author_reference_starter():
        bud_session = bud_session_maker()
        nex_session = nex_session_maker()

        id_to_reference = dict([(x.id, x) for x in nex_session.query(Reference).all()])
        key_to_author = dict([(x.unique_key(), x) for x in nex_session.query(Author).all()])
        key_to_source = dict([(x.unique_key(), x) for x in nex_session.query(Source)])

        for old_author_reference in bud_session.query(OldAuthorReference).all():
            author_key = create_format_name(old_author_reference.author.name)
            reference_id = old_author_reference.reference_id
            if author_key in key_to_author and reference_id in id_to_reference:
                yield {'id': old_author_reference.id,
                       'source': key_to_source['PubMed'],
                       'author': key_to_author[author_key],
                       'reference': id_to_reference[reference_id],
                       'order': old_author_reference.order,
                       'author_type': old_author_reference.type,
                       'date_created': old_author_reference.author.date_created,
                       'created_by': old_author_reference.author.created_by}
            else:
                print 'Author or reference not found: ' + str(author_key) + ' ' + str(reference_id)

        bud_session.close()
        nex_session.close()
Esempio n. 5
0
 def __init__(self, obj_json):
     UpdateByJsonMixin.__init__(self, obj_json)
     if self.display_name == 'observable':
         self.display_name = 'Yeast Phenotype Ontology'
         self.format_name = 'ypo'
         self.link = '/ontology/phenotype/ypo/overview'
     else:
         self.format_name = create_format_name(self.display_name.lower())
         self.link = '/observable/' + self.format_name + '/overview'
Esempio n. 6
0
    def reftype_starter():
        bud_session = bud_session_maker()
        nex_session = nex_session_maker()

        key_to_source = dict([(x.unique_key(), x) for x in nex_session.query(Source)])

        for old_reftype in bud_session.query(RefType).all():
            source_key = create_format_name(old_reftype.source)
            source = None if source_key not in key_to_source else key_to_source[source_key]
            yield {'id': old_reftype.id,
                   'display_name': old_reftype.name,
                   'source': source,
                   'date_created': old_reftype.date_created,
                   'created_by': old_reftype.created_by}

        bud_session.close()
        nex_session.close()
Esempio n. 7
0
 def __init__(self, obj_json):
     UpdateByJsonMixin.__init__(self, obj_json)
     self.format_name = create_format_name(obj_json["display_name"])
     self.link = "/tag/" + self.format_name + "/overview"
Esempio n. 8
0
 def __init__(self, obj_json):
     UpdateByJsonMixin.__init__(self, obj_json)
     self.format_name = create_format_name(obj_json.get("display_name")).replace(".", "")
     self.link = "/strain/" + self.format_name + "/overview"
Esempio n. 9
0
    def reference_starter():
        bud_session = bud_session_maker()
        nex_session = nex_session_maker()

        key_to_journal = dict([(x.unique_key(), x) for x in nex_session.query(Journal).all()])
        key_to_book = dict([(x.unique_key(), x) for x in nex_session.query(Book).all()])
        key_to_source = dict([(x.unique_key(), x) for x in nex_session.query(Source).all()])
        reference_id_to_doi = dict([(x.reference_id, x.url.url[18:]) for x in bud_session.query(Ref_URL).options(joinedload('url')).all() if x.url.url_type == 'DOI full text'])
        reference_id_to_pmcid = dict([(x.reference_id, x.url.url.replace('http://www.ncbi.nlm.nih.gov/pmc/articles/', '')[:-1]) for x in bud_session.query(Ref_URL).options(joinedload('url')).all() if x.url.url_type == 'PMC full text'])

        for old_reference in bud_session.query(Reference).order_by(Reference.id.desc()).options(joinedload('book'), joinedload('journal')).all():
            citation = create_citation(old_reference.citation)
            display_name = create_display_name(citation)

            new_journal = None
            old_journal = old_reference.journal
            if old_journal is not None:
                abbreviation = old_journal.abbreviation
                if old_journal.issn == '0948-5023':
                    abbreviation = 'J Mol Model (Online)'
                journal_key = (old_journal.full_name, abbreviation)
                new_journal = None if journal_key not in key_to_journal else key_to_journal[journal_key]

            new_book = None
            old_book = old_reference.book
            if old_book is not None:
                book_key = (old_book.title, old_book.volume_title)
                new_book = None if book_key not in key_to_book else key_to_book[book_key]

            pubmed_id = None
            if old_reference.pubmed_id is not None:
                pubmed_id = old_reference.pubmed_id

            year = None
            if old_reference.year is not None:
                year = int(old_reference.year)

            source_key = create_format_name(old_reference.source)
            source = None
            if source_key in key_to_source:
                source = key_to_source[source_key]
            else:
                print 'Source not found: ' + source_key
                yield None

            doi = None if old_reference.id not in reference_id_to_doi else reference_id_to_doi[old_reference.id]
            pmcid = None if old_reference.id not in reference_id_to_pmcid else reference_id_to_pmcid[old_reference.id]

            yield {'id': old_reference.id,
                   'display_name': display_name,
                   'sgdid': old_reference.dbxref_id,
                   'source': source,
                   'ref_status': old_reference.status,
                   'pubmed_id': pubmed_id,
                   'fulltext_status': old_reference.pdf_status,
                   'citation': citation,
                   'year': year,
                   'date_published': old_reference.date_published,
                   'date_revised': old_reference.date_revised,
                   'issue': old_reference.issue,
                   'page': old_reference.page,
                   'volume': old_reference.volume,
                   'title': old_reference.title,
                   'journal': new_journal,
                   'book': new_book,
                   'doi': doi,
                   'pubmed_central_id': pmcid,
                   'date_created': old_reference.date_created,
                   'created_by': old_reference.created_by}

        bud_session.close()
        nex_session.close()
Esempio n. 10
0
 def __init__(self, obj_json):
     UpdateByJsonMixin.__init__(self, obj_json)
     self.format_name = create_format_name(self.display_name)
Esempio n. 11
0
 def __init__(self, obj_json):
     UpdateByJsonMixin.__init__(self, obj_json)
     self.format_name = create_format_name(self.display_name)
     self.link = '/author/' + self.format_name + '/overview'
Esempio n. 12
0
    def bioentity_url_starter():
        bud_session = bud_session_maker()
        nex_session = nex_session_maker()

        key_to_source = dict([(x.unique_key(), x) for x in nex_session.query(Source).all()])
        id_to_bioentity = dict([(x.id, x) for x in nex_session.query(Bioentity).all()])

        for bud_obj in make_db_starter(bud_session.query(FeatUrl).options(joinedload('url')), 1000)():
            old_url = bud_obj.url
            url_type = old_url.url_type
            link = old_url.url

            bioentity_id = bud_obj.feature_id

            for old_webdisplay in old_url.displays:
                if bioentity_id in id_to_bioentity:
                    bioentity = id_to_bioentity[bioentity_id]
                    if url_type == 'query by SGDID':
                        link = link.replace('_SUBSTITUTE_THIS_', str(bioentity.sgdid))
                    elif url_type == 'query by SGD ORF name with anchor' or url_type == 'query by SGD ORF name' or url_type == 'query by ID assigned by database':
                        link = link.replace('_SUBSTITUTE_THIS_', str(bioentity.format_name))
                    else:
                        print "Can't handle this url. " + str(old_url.url_type)
                        yield None

                    category = None if old_webdisplay.label_location not in category_mapping else category_mapping[old_webdisplay.label_location]

                    yield {'display_name': old_webdisplay.label_name,
                           'link': link,
                           'source': key_to_source[create_format_name(old_url.source)],
                           'category': category,
                           'bioentity_id': bioentity_id,
                           'date_created': old_url.date_created,
                           'created_by': old_url.created_by}
                else:
                    #print 'Bioentity not found: ' + str(bioentity_id)
                    yield None

        for bud_obj in make_db_starter(bud_session.query(DbxrefFeat).options(joinedload('dbxref'), joinedload('dbxref.dbxref_urls')), 1000)():
            old_urls = bud_obj.dbxref.urls
            dbxref_id = bud_obj.dbxref.dbxref_id

            bioentity_id = bud_obj.feature_id
            for old_url in old_urls:
                for old_webdisplay in old_url.displays:
                    if bioentity_id in id_to_bioentity:
                        bioentity = id_to_bioentity[bioentity_id]
                        url_type = old_url.url_type
                        link = old_url.url

                        if url_type == 'query by SGD ORF name with anchor' or url_type == 'query by SGD ORF name':
                            link = link.replace('_SUBSTITUTE_THIS_', bioentity.format_name)
                        elif url_type == 'query by ID assigned by database':
                            link = link.replace('_SUBSTITUTE_THIS_', str(dbxref_id))
                        elif url_type == 'query by SGDID':
                            link = link.replace('_SUBSTITUTE_THIS_', bioentity.sgdid)
                        else:
                            print "Can't handle this url. " + str(old_url.url_type)
                            yield None

                        category = None if old_webdisplay.label_location not in category_mapping else category_mapping[old_webdisplay.label_location]

                        yield {'display_name': old_webdisplay.label_name,
                                   'link': link,
                                   'source': key_to_source[create_format_name(old_url.source)],
                                   'category': category,
                                   'bioentity_id': bioentity_id,
                                   'date_created': old_url.date_created,
                                   'created_by': old_url.created_by}
                    else:
                        #print 'Bioentity not found: ' + str(bioentity_id)
                        yield None

        for locus in nex_session.query(Locus).all():
            yield {'display_name': 'SPELL',
                        'link': 'http://spell.yeastgenome.org/search/show_results?search_string=' + locus.format_name,
                        'source': key_to_source['SGD'],
                        'category': 'LOCUS_EXPRESSION',
                        'bioentity_id': locus.id}
            yield {'display_name': 'Gene/Sequence Resources',
                        'link': '/cgi-bin/seqTools?back=1&seqname=' + locus.format_name,
                        'source': key_to_source['SGD'],
                        'category': 'LOCUS_SEQUENCE',
                        'bioentity_id': locus.id}
            yield {'display_name': 'ORF Map',
                        'link': '/cgi-bin/ORFMAP/ORFmap?dbid=' + locus.sgdid,
                        'source': key_to_source['SGD'],
                        'category': 'LOCUS_SEQUENCE',
                        'bioentity_id': locus.id}
            yield {'display_name': 'GBrowse',
                        'link': 'http://browse.yeastgenome.org/fgb2/gbrowse/scgenome/?name=' + locus.format_name,
                        'source': key_to_source['SGD'],
                        'category': 'LOCUS_SEQUENCE',
                        'bioentity_id': locus.id}

            yield {'display_name': 'BLASTN',
                        'link': '/cgi-bin/blast-sgd.pl?name=' + locus.format_name,
                        'source': key_to_source['SGD'],
                        'category': 'LOCUS_SEQUENCE_SECTION',
                        'bioentity_id': locus.id}
            yield {'display_name': 'BLASTP',
                        'link': '/cgi-bin/blast-sgd.pl?name=' + locus.format_name + '&suffix=prot',
                        'source': key_to_source['SGD'],
                        'category': 'LOCUS_SEQUENCE_SECTION',
                        'bioentity_id': locus.id}
            yield {'display_name': 'Variant Viewer',
                        'link': '/variant-viewer#/' + locus.sgdid,
                        'source': key_to_source['SGD'],
                        'category': 'LOCUS_SEQUENCE_OTHER_STRAINS',
                        'bioentity_id': locus.id}

            yield {'display_name': 'Yeast Phenotype Ontology',
                        'link': '/ontology/phenotype/ypo/overview',
                        'source': key_to_source['SGD'],
                        'category': 'LOCUS_PHENOTYPE_ONTOLOGY',
                        'bioentity_id': locus.id}

        bud_session.close()
        nex_session.close()
Esempio n. 13
0
 def __init__(self, obj_json):
     UpdateByJsonMixin.__init__(self, obj_json)
     self.format_name = None if obj_json.get('display_name') is None else create_format_name(obj_json.get('display_name'))
Esempio n. 14
0
 def __init__(self, obj_json):
     UpdateByJsonMixin.__init__(self, obj_json)
     self.format_name = None if obj_json.get('display_name') is None else create_format_name(obj_json.get('display_name'))[:95]
     self.link = None if self.format_name is None else '/chemical/' + self.format_name + '/overview'
Esempio n. 15
0
    def bioconcept_relation_starter():
        bud_session = bud_session_maker()
        nex_session = nex_session_maker()

        key_to_bioconcept = dict([(x.unique_key(), x) for x in nex_session.query(Bioconcept).all()])
        key_to_source = dict([(x.unique_key(), x) for x in nex_session.query(Source).all()])

        # GO relations
        for gopath in make_db_starter(bud_session.query(GoPath).filter(GoPath.generation == 1).options(joinedload('child'), joinedload('ancestor')), 1000)():
            parent_key = (get_go_format_name(gopath.ancestor.go_go_id), 'GO')
            child_key = (get_go_format_name(gopath.child.go_go_id), 'GO')

            if parent_key in key_to_bioconcept and child_key in key_to_bioconcept:
                yield {'source': key_to_source['SGD'],
                        'relation_type': gopath.relationship_type,
                        'parent_id': key_to_bioconcept[parent_key].id,
                        'child_id': key_to_bioconcept[child_key].id}
            else:
                print 'Could not find go. Parent: ' + str(parent_key) + ' Child: ' + str(child_key)
                yield None

        old_gosets = bud_session.query(GoSet).filter(GoSet.name == 'Yeast GO-Slim').options(joinedload('go')).all()
        slim_ids = set()
        for old_goset in old_gosets:
            go_key = (get_go_format_name(old_goset.go.go_go_id), 'GO')
            if go_key[0] != 'GO:0008150' and go_key[0] != 'GO:0003674' and go_key[0] != 'GO:0005575' and go_key in key_to_bioconcept:
                slim_ids.add(key_to_bioconcept[go_key].id)
            else:
                print 'GO term not found: ' + str(go_key)

        #Go Slim
        go_child_id_to_parent_ids = {}
        for go_relation in nex_session.query(Bioconceptrelation).filter(Bioconceptrelation.relation_type == 'is a'):
            if go_relation.child_id in go_child_id_to_parent_ids:
                go_child_id_to_parent_ids[go_relation.child_id].append(go_relation.parent_id)
            else:
                go_child_id_to_parent_ids[go_relation.child_id] = [go_relation.parent_id]

        for child_id in go_child_id_to_parent_ids:
            parent_ids = go_child_id_to_parent_ids[child_id]
            while len(parent_ids) > 0:
                new_parent_ids = set()
                for parent_id in parent_ids:
                    if parent_id in slim_ids:
                        yield {'source': key_to_source['SGD'],
                               'parent_id': parent_id,
                               'child_id': child_id,
                               'relation_type': 'GO_SLIM'}
                        if parent_id in go_child_id_to_parent_ids:
                            new_parent_ids.update(go_child_id_to_parent_ids[parent_id])
                parent_ids = new_parent_ids

        #Phenotype relations
        for cvtermrel in bud_session.query(CVTermRel).options(joinedload('child'), joinedload('parent')).all():
            parent_key = (create_format_name(cvtermrel.parent.name.lower()), 'OBSERVABLE')
            child_key = (create_format_name(cvtermrel.child.name.lower()), 'OBSERVABLE')

            if parent_key == ('observable', 'OBSERVABLE'):
                parent_key = ('ypo', 'OBSERVABLE')

            if parent_key in key_to_bioconcept and child_key in key_to_bioconcept:
                yield {'source': key_to_source['SGD'],
                       'relation_type': cvtermrel.relationship_type,
                       'parent_id': key_to_bioconcept[parent_key].id,
                       'child_id': key_to_bioconcept[child_key].id,
                       'date_created': cvtermrel.date_created,
                       'created_by': cvtermrel.created_by}

        for old_phenotype in make_db_starter(bud_session.query(OldPhenotype).filter(OldPhenotype.observable.in_(chemical_phenotypes)).options(
                                        joinedload('phenotype_features'), joinedload('phenotype_features.experiment')), 1000)():
            for phenotype_feature in old_phenotype.phenotype_features:
                chemical = ' and '.join([x[0] for x in phenotype_feature.experiment.chemicals])
                old_observable = old_phenotype.observable
                if old_observable == 'resistance to chemicals':
                    new_observable = old_phenotype.observable.replace('chemicals', chemical)
                else:
                    new_observable = old_phenotype.observable.replace('chemical compound', chemical)

                parent_key = (create_format_name(old_observable.lower()), 'OBSERVABLE')
                child_key = (create_format_name(new_observable.lower()), 'OBSERVABLE')

                if parent_key in key_to_bioconcept and child_key in key_to_bioconcept:
                    yield {'source': key_to_source['SGD'],
                           'relation_type': 'is a',
                           'parent_id': key_to_bioconcept[parent_key].id,
                           'child_id': key_to_bioconcept[child_key].id}
                else:
                    print 'Could not find phenotype. Parent: ' + str(parent_key) + ' Child: ' + str(child_key)
                    yield None

        #Phenotype Slim
        phenotype_slim = {'cell_death', 'chromosome-plasmid_maintenance', 'intracellular_transport', 'mitotic_cell_cycle',
                          'prion_state', 'stress_resistance', 'budding', 'filamentous_growth', 'lifespan', 'sexual_cycle',
                          'viable', 'inviable', 'competitive_fitness', 'viability', 'haploinsufficient', 'haploproficient',
                          'metabolism_and_growth', 'cellular_morphology', 'culture_appearance', 'ypo'}
        for phenotype in nex_session.query(Phenotype).all():
            ancestor = phenotype.observable
            while ancestor is not None and ancestor.format_name not in phenotype_slim:
                if len(ancestor.parents) > 0:
                    ancestor = ancestor.parents[0].parent
                else:
                    ancestor = None

            if ancestor is not None:
                yield {'source': key_to_source['SGD'],
                        'parent_id': ancestor.id,
                        'child_id': phenotype.id,
                        'relation_type': 'PHENOTYPE_SLIM'}

        bud_session.close()
        nex_session.close()
Esempio n. 16
0
 def __init__(self, obj_json):
     UpdateByJsonMixin.__init__(self, obj_json)
     self.format_name = create_format_name(obj_json.get('display_name')).replace('.', '')
     self.link = '/strain/' + self.format_name + '/overview'
Esempio n. 17
0
 def __init__(self, obj_json):
     UpdateByJsonMixin.__init__(self, obj_json)
     self.format_name = create_format_name(obj_json['display_name'])
     self.link = '/tag/' + self.format_name + '/overview'
Esempio n. 18
0
 def __init__(self, obj_json):
     UpdateByJsonMixin.__init__(self, obj_json)
     self.display_name = self.title
     self.format_name = create_format_name(self.title + '' if self.volume_title is None else ('_' + self.volume_title))
Esempio n. 19
0
 def __init__(self, obj_json):
     UpdateByJsonMixin.__init__(self, obj_json)
     self.format_name = create_format_name(obj_json.get('display_name'))
     if obj_json.get('eco_id') in eco_id_to_category:
         self.category = eco_id_to_category[obj_json.get('eco_id')]
Esempio n. 20
0
 def __init__(self, obj_json):
     UpdateByJsonMixin.__init__(self, obj_json)
     self.display_name = self.title if self.title is not None else self.med_abbr
     self.format_name = create_format_name(self.display_name[:99] if self.med_abbr is None else self.display_name[:50] + '_' + self.med_abbr[:49])