def test_colleague_model_search_result_dict_with_urls(self): source = factory.SourceFactory() colleague = factory.ColleagueFactory() instances = DBSession.query(Colleague).all() self.assertEqual(1, len(instances)) self.assertEqual(colleague, instances[0]) colleague_url_1 = factory.ColleagueUrlFactory(colleague_id=colleague.colleague_id) colleague_url_2 = factory.ColleagueUrlFactory(colleague_id=colleague.colleague_id, url_type="Lab") instances = DBSession.query(Colleague).all() self.assertEqual(1, len(instances)) self.assertEqual(colleague, instances[0]) self.assertEqual(colleague.to_search_results_dict(), { 'format_name': colleague.format_name, 'first_name': colleague.first_name, 'last_name': colleague.last_name, 'organization': colleague.institution, 'work_phone': colleague.work_phone, 'fax': colleague.fax, 'email': colleague.email, 'webpages': { 'lab_url': colleague_url_2.obj_url, 'research_summary_url': colleague_url_1.obj_url } })
def index_colleagues(): colleagues = DBSession.query(Colleague).all() print "Indexing " + str(len(colleagues)) + " colleagues" bulk_data = [] for c in colleagues: description_fields = [] for field in [c.institution, c.country]: if field: description_fields.append(field) description = ", ".join(description_fields) position = "Lab Member" if c.is_pi == 1: position = "Head of Lab" locus = set() locus_ids = DBSession.query(ColleagueLocus.locus_id).filter(ColleagueLocus.colleague_id == c.colleague_id).all() if len(locus_ids) > 0: ids_query = [k[0] for k in locus_ids] locus_names = ( DBSession.query(Locusdbentity.gene_name, Locusdbentity.systematic_name) .filter(Locusdbentity.dbentity_id.in_(ids_query)) .all() ) for l in locus_names: if l[0]: locus.add(l[0]) if l[1]: locus.add(l[1]) obj = { "name": c.last_name + ", " + c.first_name, "category": "colleague", "href": "/colleague/" + c.format_name + "/overview", "description": description, "first_name": c.first_name, "last_name": c.last_name, "institution": c.institution, "position": position, "country": c.country, "state": c.state, "colleague_loci": sorted(list(locus)), } c._include_keywords_to_dict(obj) # adds 'keywords' to obj bulk_data.append({"index": {"_index": INDEX_NAME, "_type": DOC_TYPE, "_id": c.format_name}}) bulk_data.append(obj) if len(bulk_data) == 1000: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True) bulk_data = [] if len(bulk_data) > 0: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True)
def test_dbuser_model(self): instances = DBSession.query(Dbuser).all() self.assertEqual(0, len(instances)) dbuser = factory.DbuserFactory() instances = DBSession.query(Dbuser).all() self.assertEqual(1, len(instances)) self.assertEqual(dbuser, instances[0])
def test_source_model(self): instances = DBSession.query(Source).all() self.assertEqual(0, len(instances)) source = factory.SourceFactory() instances = DBSession.query(Source).all() self.assertEqual(1, len(instances)) self.assertEqual(source, instances[0])
def index_phenotypes(): phenotypes = DBSession.query(Phenotype).all() bulk_data = [] print "Indexing " + str(len(phenotypes)) + " phenotypes" for phenotype in phenotypes: annotations = DBSession.query(Phenotypeannotation).filter_by(phenotype_id=phenotype.phenotype_id).all() references = set([]) loci = set([]) chemicals = set([]) mutant = set([]) for annotation in annotations: references.add(annotation.reference.display_name) loci.add(annotation.dbentity.display_name) mutant.add(annotation.mutant.display_name) annotation_conds = ( DBSession.query(PhenotypeannotationCond) .filter_by(annotation_id=annotation.annotation_id, condition_class="chemical") .all() ) for annotation_cond in annotation_conds: chemicals.add(annotation_cond.condition_name) qualifier = None if phenotype.qualifier: qualifier = phenotype.qualifier.display_name obj = { "name": phenotype.display_name, "href": phenotype.obj_url, "description": phenotype.description, "observable": phenotype.observable.display_name, "qualifier": qualifier, "references": list(references), "phenotype_loci": list(loci), "number_annotations": len(list(loci)), "chemical": list(chemicals), "mutant_type": list(mutant), "category": "phenotype", "keys": [], } bulk_data.append({"index": {"_index": INDEX_NAME, "_type": DOC_TYPE, "_id": phenotype.format_name}}) bulk_data.append(obj) if len(bulk_data) == 500: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True) bulk_data = [] if len(bulk_data) > 0: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True)
def test_obi_model(self): source = factory.SourceFactory() instances = DBSession.query(Obi).all() self.assertEqual(0, len(instances)) obi = factory.ObiFactory() instances = DBSession.query(Obi).all() self.assertEqual(1, len(instances)) self.assertEqual(obi, instances[0])
def test_keywords_model(self): source = factory.SourceFactory() instances = DBSession.query(Keyword).all() self.assertEqual(0, len(instances)) keyword = factory.KeywordFactory() instances = DBSession.query(Keyword).all() self.assertEqual(1, len(instances)) self.assertEqual(keyword, instances[0])
def test_edam_model(self): source = factory.SourceFactory() instances = DBSession.query(Edam).all() self.assertEqual(0, len(instances)) edam = factory.EdamFactory() instances = DBSession.query(Edam).all() self.assertEqual(1, len(instances)) self.assertEqual(edam, instances[0])
def test_taxonomy_model(self): source = factory.SourceFactory() instances = DBSession.query(Taxonomy).all() self.assertEqual(0, len(instances)) taxonomy = factory.TaxonomyFactory() instances = DBSession.query(Taxonomy).all() self.assertEqual(1, len(instances)) self.assertEqual(taxonomy, instances[0])
def test_reporter_model(self): source = factory.SourceFactory() instances = DBSession.query(Reporter).all() self.assertEqual(0, len(instances)) reporter = factory.ReporterFactory() instances = DBSession.query(Reporter).all() self.assertEqual(1, len(instances)) self.assertEqual(reporter, instances[0])
def test_filepath_model(self): source = factory.SourceFactory() instances = DBSession.query(Filepath).all() self.assertEqual(0, len(instances)) filepath = factory.FilepathFactory() instances = DBSession.query(Filepath).all() self.assertEqual(1, len(instances)) self.assertEqual(filepath, instances[0])
def test_locusdbentity_model(self): source = factory.SourceFactory() instances = DBSession.query(Locusdbentity).all() self.assertEqual(0, len(instances)) locus = factory.LocusdbentityFactory() instances = DBSession.query(Locusdbentity).all() self.assertEqual(1, len(instances)) self.assertEqual(locus, instances[0])
def test_apo_model(self): source = factory.SourceFactory() instances = DBSession.query(Apo).all() self.assertEqual(0, len(instances)) apo = factory.ApoFactory() instances = DBSession.query(Apo).all() self.assertEqual(1, len(instances)) self.assertEqual(apo, instances[0])
def test_allele_model(self): source = factory.SourceFactory() instances = DBSession.query(Allele).all() self.assertEqual(0, len(instances)) allele = factory.AlleleFactory() instances = DBSession.query(Allele).all() self.assertEqual(1, len(instances)) self.assertEqual(allele, instances[0])
def test_book_model(self): source = factory.SourceFactory() instances = DBSession.query(Book).all() self.assertEqual(0, len(instances)) book = factory.BookFactory() instances = DBSession.query(Book).all() self.assertEqual(1, len(instances)) self.assertEqual(book, instances[0])
def test_journal_model(self): source = factory.SourceFactory() instances = DBSession.query(Journal).all() self.assertEqual(0, len(instances)) journal = factory.JournalFactory() instances = DBSession.query(Journal).all() self.assertEqual(1, len(instances)) self.assertEqual(journal, instances[0])
def test_chebiurl_model(self): source = factory.SourceFactory() chebi = factory.ChebiFactory() instances = DBSession.query(ChebiUrl).all() self.assertEqual(0, len(instances)) chebiurl = factory.ChebiUrlFactory() instances = DBSession.query(ChebiUrl).all() self.assertEqual(1, len(instances)) self.assertEqual(chebiurl, instances[0])
def test_colleague_model(self): instances = DBSession.query(Colleague).all() self.assertEqual(0, len(instances)) source = factory.SourceFactory() colleague = factory.ColleagueFactory() instances = DBSession.query(Colleague).all() self.assertEqual(1, len(instances)) self.assertEqual(colleague, instances[0]) self.assertEqual(colleague.source, source)
def test_phenotype_model(self): source = factory.SourceFactory() apo = factory.ApoFactory() instances = DBSession.query(Phenotype).all() self.assertEqual(0, len(instances)) pheno = factory.PhenotypeFactory() instances = DBSession.query(Phenotype).all() self.assertEqual(1, len(instances)) self.assertEqual(pheno, instances[0])
def index_go_terms(): go_id_blacklist = load_go_id_blacklist("scripts/go_id_blacklist.lst") gos = DBSession.query(Go).all() print "Indexing " + str(len(gos) - len(go_id_blacklist)) + " GO terms" bulk_data = [] for go in gos: if go.goid in go_id_blacklist: continue synonyms = DBSession.query(GoAlias.display_name).filter_by(go_id=go.go_id).all() references = set([]) go_loci = set([]) annotations = DBSession.query(Goannotation).filter_by(go_id=go.go_id).all() for annotation in annotations: if annotation.go_qualifier != "NOT": go_loci.add(annotation.dbentity.display_name) references.add(annotation.reference.display_name) numerical_id = go.goid.split(":")[1] key_values = [go.goid, "GO:" + str(int(numerical_id)), numerical_id, str(int(numerical_id))] keys = set([]) for k in key_values: if k is not None: keys.add(k.lower()) obj = { "name": go.display_name, "href": go.obj_url, "description": go.description, "synonyms": [s[0] for s in synonyms], "go_id": go.goid, "go_loci": sorted(list(go_loci)), "number_annotations": len(annotations), "references": list(references), "category": go.go_namespace.replace(" ", "_"), "keys": keys, } bulk_data.append({"index": {"_index": INDEX_NAME, "_type": DOC_TYPE, "_id": go.goid}}) bulk_data.append(obj) if len(bulk_data) == 800: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True) bulk_data = [] if len(bulk_data) > 0: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True)
def test_colleague_keywords_model(self): source = factory.SourceFactory() colleague = factory.ColleagueFactory() keyword = factory.KeywordFactory() instances = DBSession.query(ColleagueKeyword).all() self.assertEqual(0, len(instances)) colleague_keyword = factory.ColleagueKeywordFactory() instances = DBSession.query(ColleagueKeyword).all() self.assertEqual(1, len(instances)) self.assertEqual(colleague_keyword, instances[0])
def test_colleague_association_model(self): source = factory.SourceFactory() colleague = factory.ColleagueFactory() colleague = factory.ColleagueFactory(colleague_id=113699) instances = DBSession.query(ColleagueAssociation).all() self.assertEqual(0, len(instances)) association = factory.ColleagueAssociationFactory() instances = DBSession.query(ColleagueAssociation).all() self.assertEqual(1, len(instances)) self.assertEqual(association, instances[0])
def test_filekeyword_model(self): source = factory.SourceFactory() filedbentity = factory.FiledbentityFactory() filepath = factory.FilepathFactory() edam = factory.EdamFactory() keyword = factory.KeywordFactory() instances = DBSession.query(FileKeyword).all() self.assertEqual(0, len(instances)) fkeyword = factory.FileKeywordFactory() instances = DBSession.query(FileKeyword).all() self.assertEqual(1, len(instances)) self.assertEqual(fkeyword, instances[0])
def test_reference_document_model(self): source = factory.SourceFactory() journal = factory.JournalFactory() book = factory.BookFactory() refdbentity = factory.ReferencedbentityFactory() instances = DBSession.query(ReferenceDocument).all() self.assertEqual(0, len(instances)) refdoc = factory.ReferenceDocumentFactory() instances = DBSession.query(ReferenceDocument).all() self.assertEqual(1, len(instances)) self.assertEqual(refdoc, instances[0])
def test_colleague_model_search_results_doesnt_send_email_if_required(self): source = factory.SourceFactory() colleague = factory.ColleagueFactory(display_email=0) instances = DBSession.query(Colleague).all() self.assertEqual(1, len(instances)) self.assertEqual(colleague, instances[0]) self.assertNotIn('email', colleague.to_search_results_dict())
def test_colleague_model_info_dict_doesnt_send_email_if_required(self): source = factory.SourceFactory() colleague = factory.ColleagueFactory(display_email = 0) instances = DBSession.query(Colleague).all() colleague_url_1 = factory.ColleagueUrlFactory(colleague_id=colleague.colleague_id) colleague_url_2 = factory.ColleagueUrlFactory(colleague_id=colleague.colleague_id, url_type="Lab") self.assertEqual(colleague.to_info_dict(), { 'orcid': colleague.orcid, 'first_name': colleague.first_name, 'last_name': colleague.last_name, 'position': colleague.job_title, 'profession': colleague.profession, 'organization': colleague.institution, 'address': [colleague.address1], 'city': colleague.city, 'state': colleague.state, 'country': colleague.country, 'postal_code': colleague.postal_code, 'work_phone': colleague.work_phone, 'fax': colleague.fax, 'webpages': { 'lab_url': colleague_url_2.obj_url, 'research_summary_url': colleague_url_1.obj_url }, 'research_interests': colleague.research_interest, 'last_update': str(colleague.date_last_modified) })
def index_observables(): observables = DBSession.query(Apo).filter_by(apo_namespace="observable").all() print "Indexing " + str(len(observables)) + " observables" bulk_data = [] for observable in observables: obj = { "name": observable.display_name, "href": observable.obj_url, "description": observable.description, "category": "observable", "keys": [], } bulk_data.append( {"index": {"_index": INDEX_NAME, "_type": DOC_TYPE, "_id": "observable_" + str(observable.apo_id)}} ) bulk_data.append(obj) if len(bulk_data) == 300: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True) bulk_data = [] if len(bulk_data) > 0: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True)
def test_keyword_model_to_dict(self): source = factory.SourceFactory() instances = DBSession.query(Keyword).all() self.assertEqual(0, len(instances)) keyword = factory.KeywordFactory() self.assertEqual(keyword.to_dict(), {'id': keyword.keyword_id, 'name': keyword.display_name})
def test_edam_model_to_dict(self): source = factory.SourceFactory() instances = DBSession.query(Edam).all() self.assertEqual(0, len(instances)) edam = factory.EdamFactory() self.assertEqual(edam.to_dict(), {'id': edam.edam_id, 'name': edam.format_name})
def test_colleague_model_should_include_urls_in_dict(self): source = factory.SourceFactory() colleague = factory.ColleagueFactory() instances = DBSession.query(Colleague).all() colleague_url_1 = factory.ColleagueUrlFactory(colleague_id=colleague.colleague_id) colleague_url_2 = factory.ColleagueUrlFactory(colleague_id=colleague.colleague_id, url_type="Lab") colleague_dict = {} colleague._include_urls_to_dict(colleague_dict) self.assertEqual(colleague_dict, {'webpages': {'lab_url': colleague_url_2.obj_url, 'research_summary_url': colleague_url_1.obj_url}})
def index_complex_names(): complexes = DBSession.query(Complexdbentity).all() print(("Indexing " + str(len(complexes)) + " complex names")) bulk_data = [] for c in complexes: synonyms = DBSession.query(ComplexAlias.display_name).filter_by( complex_id=c.dbentity_id).all() references = set([]) refs = DBSession.query(ComplexReference).filter_by( complex_id=c.dbentity_id).all() for ref in refs: references.add(ref.reference.display_name) complex_loci = set([]) annotations = DBSession.query(Complexbindingannotation).filter_by( complex_id=c.dbentity_id).all() for a in annotations: interactor = a.interactor if interactor.locus_id is not None: complex_loci.add(interactor.locus.display_name) key_values = [c.intact_id, c.complex_accession, c.sgdid] keys = set([]) for k in key_values: if k is not None: keys.add(k.lower()) obj = { "name": c.display_name, "complex_name": c.display_name, "href": "/complex/" + c.complex_accession, "description": c.description + "; " + c.properties, "category": "complex", "synonyms": [s[0] for s in synonyms], "systematic_name": c.systematic_name, "intact_id": c.intact_id, "complex_accession": c.complex_accession, "complex_loci": sorted(list(complex_loci)), "references": list(references), "keys": list(keys) } bulk_data.append( {"index": { "_index": INDEX_NAME, "_id": str(uuid.uuid4()) }}) bulk_data.append(obj) if len(bulk_data) == 800: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True) bulk_data = [] if len(bulk_data) > 0: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True)
def index_downloads(): bulk_data = [] dbentity_file_obj = IndexESHelper.get_file_dbentity_keyword() files = DBSession.query(Filedbentity).filter( Filedbentity.is_public == True, Filedbentity.s3_url != None).all() print(("indexing " + str(len(files)) + " download files")) for x in files: try: keyword = [] status = "" temp = dbentity_file_obj.get(x.dbentity_id) if temp: keyword = temp if (x.dbentity_status == "Active" or x.dbentity_status == "Archived"): if x.dbentity_status == "Active": status = "Active" else: status = "Archived" obj = { "name": x.display_name, "raw_display_name": x.display_name, "filename": " ".join(x.display_name.split("_")), "file_name_format": " ".join(x.display_name.split("_")), "href": x.s3_url if x else None, "category": "download", "description": x.description, "keyword": keyword, "format": str(x.format.display_name), "status": str(status), "file_size": str(IndexESHelper.convertBytes(x.file_size)) if x.file_size is not None else x.file_size, "year": str(x.year), "readme_url": x.readme_file.s3_url if x.readme_file else None, "topic": x.topic.display_name, "data": x.data.display_name, "path_id": x.get_path_id() } bulk_data.append( {"index": { "_index": INDEX_NAME, "_id": str(uuid.uuid4()) }}) bulk_data.append(obj) if len(bulk_data) == 50: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True) bulk_data = [] except Exception as e: logging.error(e.message) if len(bulk_data) > 0: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True)
def index_go_terms(): go_id_blacklist = load_go_id_blacklist( "scripts/search/go_id_blacklist.lst") gos = DBSession.query(Go).all() print(("Indexing " + str(len(gos) - len(go_id_blacklist)) + " GO terms")) bulk_data = [] for go in gos: if go.goid in go_id_blacklist: continue synonyms = DBSession.query( GoAlias.display_name).filter_by(go_id=go.go_id).all() references = set([]) gene_ontology_loci = set([]) annotations = DBSession.query(Goannotation).filter_by( go_id=go.go_id).all() for annotation in annotations: if annotation.go_qualifier != "NOT": gene_ontology_loci.add(annotation.dbentity.display_name) references.add(annotation.reference.display_name) numerical_id = go.goid.split(":")[1] key_values = [ go.goid, "GO:" + str(int(numerical_id)), numerical_id, str(int(numerical_id)) ] keys = set([]) for k in key_values: if k is not None: keys.add(k.lower()) obj = { "name": go.display_name, "go_name": go.display_name, "href": go.obj_url, "description": go.description, "synonyms": [s[0] for s in synonyms], "go_id": go.goid, "gene_ontology_loci": sorted(list(gene_ontology_loci)), "number_annotations": len(annotations), "references": list(references), "category": go.go_namespace.replace(" ", "_"), "keys": list(keys) } bulk_data.append( {"index": { "_index": INDEX_NAME, "_id": str(uuid.uuid4()) }}) bulk_data.append(obj) if len(bulk_data) == 800: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True) bulk_data = [] if len(bulk_data) > 0: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True)
def index_references(): _ref_loci = IndexESHelper.get_dbentity_locus_note() _references = DBSession.query(Referencedbentity).all() _abstracts = IndexESHelper.get_ref_abstracts() _authors = IndexESHelper.get_ref_authors() _aliases = IndexESHelper.get_ref_aliases() bulk_data = [] print(("Indexing " + str(len(_references)) + " references")) for reference in _references: reference_loci = [] if len(_ref_loci) > 0: temp_loci = _ref_loci.get(reference.dbentity_id) if temp_loci is not None: reference_loci = list( set([ x.display_name for x in IndexESHelper.flattern_list(temp_loci) ])) abstract = _abstracts.get(reference.dbentity_id) if abstract is not None: abstract = abstract[0] sec_sgdids = _aliases.get(reference.dbentity_id) sec_sgdid = None authors = _authors.get(reference.dbentity_id) if sec_sgdids is not None: sec_sgdid = sec_sgdids[0] if authors is None: authors = [] journal = reference.journal if journal: journal = journal.display_name key_values = [ reference.pmcid, reference.pmid, "pmid: " + str(reference.pmid), "pmid:" + str(reference.pmid), "pmid " + str(reference.pmid), reference.sgdid ] keys = set([]) for k in key_values: if k is not None: keys.add(str(k).lower()) obj = { "name": reference.citation, "reference_name": reference.citation, "href": reference.obj_url, "description": abstract, "author": authors, "journal": journal, "year": str(reference.year), "reference_loci": reference_loci, "secondary_sgdid": sec_sgdid, "category": "reference", "keys": list(keys) } bulk_data.append( {"index": { "_index": INDEX_NAME, "_id": str(uuid.uuid4()) }}) bulk_data.append(obj) if len(bulk_data) == 1000: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True) bulk_data = [] if len(bulk_data) > 0: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True)
def load_tsv_filedbentities(): engine = create_engine(NEX2_URI, pool_recycle=3600) DBSession.configure(bind=engine) # open ssh connection to download server client = paramiko.SSHClient() client.load_system_host_keys() username = input('Username for legacy download server: ') password = getpass.getpass('Password for %s@%s: ' % (username, HOSTNAME)) client.connect(HOSTNAME, 22, username, password, gss_auth=False, gss_kex=False) sftp_client = client.open_sftp() f =open(INPUT_FILE_NAME) i = 0 for line in f: val = line.split("\t") if val[0] == 'bun path': continue if len(val) > 0: i = i + 1 ### added by Shuai if len(val) < 14: print(val) return ### raw_date = val[13] if len(raw_date): raw_date = datetime.strptime(val[13], '%Y-%m-%d') else: raw_date = None raw_status = val[4].strip() if raw_status == 'Archive': raw_status = 'Archived' bun_path = val[0].strip() new_path = val[1].strip() if new_path.startswith("datasets/"): new_path = "/datasets" if bun_path[0] != '/': bun_path = bun_path.replace('genome-sequences/', '/genome-sequences/') if new_path[0] != '/': new_path = new_path.replace('genome-sequences/', '/genome-sequences/') readme_file = val[18] obj = { 'bun_path': bun_path, 'new_path': new_path, 'display_name': val[3].strip(), 'status': raw_status, 'source': val[5].strip(), 'topic_edam_id': val[7].upper().replace('TOPIC', 'EDAM').strip(), 'data_edam_id': val[9].upper().replace('DATA', 'EDAM').strip(), 'format_edam_id': val[11].upper().replace('FORMAT', 'EDAM').strip(), 'file_extension': val[12].strip(), 'file_date': raw_date, 'is_public': (val[15] == '1'), 'is_in_spell': (val[16] == '1'), 'is_in_browser': (val[17] == '1'), 'readme_name': readme_file, 'description': val[19].decode('utf-8', 'ignore').replace('"', ''), 'pmids': val[20], 'keywords': val[21].replace('"', '') } create_and_upload_file(obj, i, sftp_client) client.close()
def index_genes(): # Indexing just the S228C genes # dbentity: 1364643 (id) -> straindbentity -> 274901 (taxonomy_id) # list of dbentities comes from table DNASequenceAnnotation with taxonomy_id 274901 # feature_type comes from DNASequenceAnnotation as well gene_ids_so = DBSession.query( Dnasequenceannotation.dbentity_id, Dnasequenceannotation.so_id).filter( Dnasequenceannotation.taxonomy_id == 274901).all() dbentity_ids_to_so = {} dbentity_ids = set([]) so_ids = set([]) for gis in gene_ids_so: dbentity_ids.add(gis[0]) so_ids.add(gis[1]) dbentity_ids_to_so[gis[0]] = gis[1] # add some non S288C genes not_s288c = DBSession.query(Locusdbentity.dbentity_id).filter( Locusdbentity.not_in_s288c == True).all() for id in not_s288c: dbentity_ids.add(id[0]) # assume non S288C features to be ORFs dbentity_ids_to_so[id[0]] = 263757 all_genes = DBSession.query(Locusdbentity).filter( Locusdbentity.dbentity_id.in_(list(dbentity_ids))).all() # make list of merged/deleted genes so they don"t redirect when they show up as an alias merged_deleted_r = DBSession.query(Locusdbentity.format_name).filter( Locusdbentity.dbentity_status.in_(["Merged", "Deleted"])).all() merged_deleted = [d[0] for d in merged_deleted_r] feature_types_db = DBSession.query(So.so_id, So.display_name).filter( So.so_id.in_(list(so_ids))).all() feature_types = {} for ft in feature_types_db: feature_types[ft[0]] = ft[1] tc_numbers_db = DBSession.query(LocusAlias).filter_by( alias_type="TC number").all() tc_numbers = {} for tc in tc_numbers_db: if tc.locus_id in tc_numbers: tc_numbers[tc.locus_id].append(tc.display_name) else: tc_numbers[tc.locus_id] = [tc.display_name] ec_numbers_db = DBSession.query(LocusAlias).filter_by( alias_type="EC number").all() ec_numbers = {} for ec in ec_numbers_db: if ec.locus_id in ec_numbers: ec_numbers[ec.locus_id].append(ec.display_name) else: ec_numbers[ec.locus_id] = [ec.display_name] secondary_db = DBSession.query(LocusAlias).filter_by( alias_type="SGDID Secondary").all() secondary_sgdids = {} for sid in secondary_db: if sid.locus_id in secondary_sgdids: secondary_sgdids[sid.locus_id].append(sid.display_name) else: secondary_sgdids[sid.locus_id] = [sid.display_name] bulk_data = [] print(("Indexing " + str(len(all_genes)) + " genes")) ##### test newer methods ########## _summary = IndexESHelper.get_locus_dbentity_summary() _protein = IndexESHelper.get_locus_dbentity_alias(["NCBI protein name"]) _phenos = IndexESHelper.get_locus_phenotypeannotation() _goids = IndexESHelper.get_locus_go_annotation() _aliases_raw = IndexESHelper.get_locus_dbentity_alias( ["Uniform", "Non-uniform", "Retired name", "UniProtKB ID"]) ################################### not_mapped_genes = IndexESHelper.get_not_mapped_genes() is_quick_flag = True for gene in all_genes: if gene.gene_name: _name = gene.gene_name if gene.systematic_name and gene.gene_name != gene.systematic_name: _name += " / " + gene.systematic_name else: _name = gene.systematic_name _systematic_name = gene.systematic_name #summary = DBSession.query(Locussummary.text).filter_by(locus_id=gene.dbentity_id).all() summary = [] if (_summary is not None): summary = _summary.get(gene.dbentity_id) #protein = DBSession.query(LocusAlias.display_name).filter_by(locus_id=gene.dbentity_id, alias_type="NCBI protein name").one_or_none() protein = _protein.get(gene.dbentity_id) if protein is not None: protein = protein[0].display_name # TEMP don"t index due to schema schange # sequence_history = DBSession.query(Locusnoteannotation.note).filter_by(dbentity_id=gene.dbentity_id, note_type="Sequence").all() # gene_history = DBSession.query(Locusnoteannotation.note).filter_by(dbentity_id=gene.dbentity_id, note_type="Locus").all() #phenotype_ids = DBSession.query(Phenotypeannotation.phenotype_id).filter_by(dbentity_id=gene.dbentity_id).all() phenotype_ids = [] if _phenos is not None: temp = _phenos.get(gene.dbentity_id) if temp is not None: phenotype_ids = [x.phenotype_id for x in temp] if len(phenotype_ids) > 0: phenotypes = DBSession.query(Phenotype.display_name).filter( Phenotype.phenotype_id.in_(phenotype_ids)).all() else: phenotypes = [] #go_ids = DBSession.query(Goannotation.go_id).filter(and_(Goannotation.go_qualifier != "NOT", Goannotation.dbentity_id == gene.dbentity_id)).all() go_ids = _goids.get(gene.dbentity_id) if go_ids is not None: go_ids = [x.go_id for x in go_ids] else: go_ids = [] go_annotations = { "cellular component": set([]), "molecular function": set([]), "biological process": set([]) } if len(go_ids) > 0: #go_ids = [g[0] for g in go_ids] go = DBSession.query(Go.display_name, Go.go_namespace).filter( Go.go_id.in_(go_ids)).all() for g in go: go_annotations[g[1]].add(g[0] + " (direct)") go_slim_ids = DBSession.query(Goslimannotation.goslim_id).filter( Goslimannotation.dbentity_id == gene.dbentity_id).all() if len(go_slim_ids) > 0: go_slim_ids = [g[0] for g in go_slim_ids] go_slim = DBSession.query( Goslim.go_id, Goslim.display_name).filter( Goslim.goslim_id.in_(go_slim_ids)).all() go_ids = [g[0] for g in go_slim] go = DBSession.query(Go.go_id, Go.go_namespace).filter( Go.go_id.in_(go_ids)).all() for g in go: for gs in go_slim: if (gs[0] == g[0]): go_annotations[g[1]].add(gs[1]) # add "quick direct" keys such as aliases, SGD, UniProt ID and format aliases #aliases_raw = DBSession.query(LocusAlias.display_name, LocusAlias.alias_type).filter(and_(LocusAlias.locus_id==gene.dbentity_id, LocusAlias.alias_type.in_())).all() aliases_raw = _aliases_raw.get(gene.dbentity_id) alias_quick_direct_keys = [] aliases = [] if aliases_raw is not None: for alias_item in aliases_raw: name = alias_item.display_name if name not in merged_deleted: alias_quick_direct_keys.append(name) if alias_item.alias_type != "UniProtKB ID": aliases.append(name) '''for d in aliases_raw: name = d[0] if name not in merged_deleted: alias_quick_direct_keys.append(name) if d[1] != "UniProtKB ID": aliases.append(name)''' # make everything in keys lowercase to ignore case keys = [] _keys = [gene.gene_name, gene.systematic_name, gene.sgdid ] + alias_quick_direct_keys # Add SGD:<gene SGDID> to list of keywords for quick search _keys.append("SGD:{}".format(gene.sgdid)) # If this gene has a reservedname associated with it, add that reservedname to # the list of keywords used for the quick search of this gene reservedname = DBSession.query(Reservedname).filter_by( locus_id=gene.dbentity_id).one_or_none() if reservedname: _keys.append(reservedname.display_name) for k in _keys: if k: keys.append(k.lower()) obj = { "name": _name, "locus_name": _name, "sys_name": _systematic_name, "href": gene.obj_url, "description": gene.description, "category": "locus", "feature_type": feature_types[dbentity_ids_to_so[gene.dbentity_id]], "name_description": gene.name_description, "summary": summary, "locus_summary": "summary", "phenotypes": [p[0] for p in phenotypes], "aliases": aliases, "cellular_component": list(go_annotations["cellular component"] - set([ "cellular component", "cellular component (direct)", "cellular_component", "cellular_component (direct)" ])), "biological_process": list(go_annotations["biological process"] - set([ "biological process (direct)", "biological process", "biological_process (direct)", "biological_process" ])), "molecular_function": list(go_annotations["molecular function"] - set([ "molecular function (direct)", "molecular function", "molecular_function (direct)", "molecular_function" ])), "ec_number": ec_numbers.get(gene.dbentity_id), "protein": protein, "tc_number": tc_numbers.get(gene.dbentity_id), "secondary_sgdid": secondary_sgdids.get(gene.dbentity_id), "status": gene.dbentity_status, # TEMP don"t index due to schema change # "sequence_history": [s[0] for s in sequence_history], # "gene_history": [g[0] for g in gene_history], "bioentity_id": gene.dbentity_id, "keys": list(keys), "is_quick_flag": str(is_quick_flag) } bulk_data.append( {"index": { "_index": INDEX_NAME, "_id": str(uuid.uuid4()) }}) bulk_data.append(obj) if len(bulk_data) == 1000: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True) bulk_data = [] if len(bulk_data) > 0: es.bulk(index=INDEX_NAME, body=bulk_data, refresh=True)
def update_database_load_file_to_s3(nex_session, data_file, gzip_file, source_to_id, edam_to_id): local_file = open(gzip_file, mode='rb') import hashlib gff_md5sum = hashlib.md5(gzip_file.encode()).hexdigest() row = nex_session.query(Filedbentity).filter_by( md5sum=gff_md5sum).one_or_none() if row is not None: return gzip_file = gzip_file.replace("scripts/dumping/ncbi/data/", "") nex_session.query(Dbentity).filter( Dbentity.display_name.like('RNAcentral.%.json.gz')).filter( Dbentity.dbentity_status == 'Active').update( {"dbentity_status": 'Archived'}, synchronize_session='fetch') nex_session.commit() data_id = edam_to_id.get('EDAM:3495') # data:3495 RNA sequence topic_id = edam_to_id.get('EDAM:0099') # topic:0099 RNA format_id = edam_to_id.get('EDAM:3464') # format:3464 JSON format from sqlalchemy import create_engine from src.models import DBSession engine = create_engine(os.environ['NEX2_URI'], pool_recycle=3600) DBSession.configure(bind=engine) upload_file(CREATED_BY, local_file, filename=gzip_file, file_extension='gz', description='JSON file for yeast RNA genes', display_name=gzip_file, data_id=data_id, format_id=format_id, topic_id=topic_id, status='Active', readme_file_id=None, is_public='1', is_in_spell='0', is_in_browser='0', file_date=datetime.now(), source_id=source_to_id['SGD'], md5sum=gff_md5sum) rnaFile = nex_session.query(Dbentity).filter_by( display_name=gzip_file, dbentity_status='Active').one_or_none() if rnaFile is None: log.info("The " + gzip_file + " is not in the database.") return file_id = rnaFile.dbentity_id path = nex_session.query(Path).filter_by( path="/reports/chromosomal-features").one_or_none() if path is None: log.info( "The path: /reports/chromosomal-features is not in the database.") return path_id = path.path_id x = FilePath(file_id=file_id, path_id=path_id, source_id=source_to_id['SGD'], created_by=CREATED_BY) nex_session.add(x) nex_session.commit() log.info("Done uploading " + data_file)
def update_database_load_file_to_s3(nex_session, gaf_file, is_public, source_to_id, edam_to_id, datestamp): # gene_association.sgd.20171204.gaf.gz # gene_association.sgd-yeastmine.20171204.gaf.gz # datestamp = str(datetime.now()).split(" ")[0].replace("-", "") gzip_file = gaf_file + "." + datestamp + ".gaf.gz" import gzip import shutil with open(gaf_file, 'rb') as f_in, gzip.open(gzip_file, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) local_file = open(gzip_file, mode='rb') ### upload a current GAF file to S3 with a static URL for Go Community ### if is_public == '1': upload_gaf_to_s3(local_file, "latest/gene_association.sgd.gaf.gz") ########################################################################## import hashlib gaf_md5sum = hashlib.md5(gaf_file.encode()).hexdigest() row = nex_session.query(Filedbentity).filter_by(md5sum = gaf_md5sum).one_or_none() if row is not None: return gzip_file = gzip_file.replace("scripts/dumping/curation/data/", "") # nex_session.query(Dbentity).filter_by(display_name=gzip_file, dbentity_status='Active').update({"dbentity_status": 'Archived'}) if is_public == '1': nex_session.query(Dbentity).filter(Dbentity.display_name.like('gene_association.sgd%')).filter(Dbentity.dbentity_status=='Active').update({"dbentity_status":'Archived'}, synchronize_session='fetch') nex_session.commit() data_id = edam_to_id.get('EDAM:2048') ## data:2048 Report topic_id = edam_to_id.get('EDAM:0085') ## topic:0085 Functional genomics format_id = edam_to_id.get('EDAM:3475') ## format:3475 TSV if "yeastmine" not in gaf_file: from sqlalchemy import create_engine from src.models import DBSession engine = create_engine(os.environ['NEX2_URI'], pool_recycle=3600) DBSession.configure(bind=engine) readme = nex_session.query(Dbentity).filter_by(display_name="gene_association.README", dbentity_status='Active').one_or_none() if readme is None: log.info("gene_association.README is not in the database.") return readme_file_id = readme.dbentity_id # path.path = /reports/function upload_file(CREATED_BY, local_file, filename=gzip_file, file_extension='gz', description='All GO annotations for yeast genes (protein and RNA) in GAF file format', display_name=gzip_file, data_id=data_id, format_id=format_id, topic_id=topic_id, status='Active', readme_file_id=readme_file_id, is_public=is_public, is_in_spell='0', is_in_browser='0', file_date=datetime.now(), source_id=source_to_id['SGD'], md5sum=gaf_md5sum) gaf = nex_session.query(Dbentity).filter_by(display_name=gzip_file, dbentity_status='Active').one_or_none() if gaf is None: log.info("The " + gzip_file + " is not in the database.") return file_id = gaf.dbentity_id path = nex_session.query(Path).filter_by(path="/reports/function").one_or_none() if path is None: log.info("The path /reports/function is not in the database.") return path_id = path.path_id x = FilePath(file_id = file_id, path_id = path_id, source_id = source_to_id['SGD'], created_by = CREATED_BY) nex_session.add(x) nex_session.commit() log.info("Done uploading " + gaf_file)
def load_csv_filedbentities(): engine = create_engine(NEX2_URI, pool_recycle=3600) DBSession.configure(bind=engine) # open ssh connection to download server client = paramiko.SSHClient() client.load_system_host_keys() username = raw_input('Username for legacy download server: ') password = getpass.getpass('Password for %s@%s: ' % (username, HOSTNAME)) client.connect(HOSTNAME, 22, username, password, gss_auth=False, gss_kex=False) sftp_client = client.open_sftp() o = open(INPUT_FILE_NAME, 'rU') reader = csv.reader(o) for i, val in enumerate(reader): if i > 0: if val[0] == '': logging.info('Found a blank value, DONE!') return ### added by Shuai if len(val) < 14: print(val) return ### raw_date = val[13] if len(raw_date): temp = format_csv_date_string(val[13]) if temp is not None: raw_date = datetime.strptime(temp, '%Y-%m-%d') else: raw_date = datetime.strptime(val[13], '%Y-%m-%d') else: raw_date = None raw_status = val[4].strip() if raw_status == 'Archive': raw_status = 'Archived' bun_path = val[0].strip() new_path = val[1].strip() if bun_path[0] != '/': bun_path = bun_path.replace('genome-sequences/', '/genome-sequences/') if new_path[0] != '/': new_path = new_path.replace('genome-sequences/', '/genome-sequences/') obj = { 'bun_path': bun_path, 'new_path': new_path, 'display_name': val[3].strip(), 'status': raw_status, 'source': val[5].strip(), 'topic_edam_id': val[7].upper().replace('TOPIC', 'EDAM').strip(), 'data_edam_id': val[9].upper().replace('DATA', 'EDAM').strip(), 'format_edam_id': val[11].upper().replace('FORMAT', 'EDAM').strip(), 'file_extension': val[12].strip(), 'file_date': raw_date, 'is_public': (val[15] == '1'), 'is_in_spell': (val[16] == '1'), 'is_in_browser': (val[17] == '1'), 'readme_name': val[18], 'description': val[19].decode('utf-8', 'ignore'), 'pmids': val[20], 'keywords': val[21] } create_and_upload_file(obj, i, sftp_client) client.close()
def query_objects(self, context): service = self.parent.selected_object(context) return DBSession.query(Payment).filter( Payment.taxation_service_id == service.id).all()
def query_objects(self, context): club = self.parent.selected_object(context) return DBSession.query(Dog).filter( Dog.dog_training_club_id == club.id).all()
def update_database_load_file_to_s3(nex_session, gzip_file, source_to_id, edam_to_id): local_file = open(gzip_file, mode='rb') import hashlib file_md5sum = hashlib.md5(local_file.read()).hexdigest() row = nex_session.query(Filedbentity).filter_by(md5sum = file_md5sum).one_or_none() if row is not None: return if "tbl" in gzip_file: nex_session.query(Dbentity).filter(Dbentity.display_name.like('ncbi_tbl_files.%.tar.gz')).filter(Dbentity.dbentity_status=='Active').update({"dbentity_status":'Archived'}, synchronize_session='fetch') elif "sqn" in gzip_file: nex_session.query(Dbentity).filter(Dbentity.display_name.like('ncbi_sqn_files.%.tar.gz')).filter(Dbentity.dbentity_status=='Active').update({"dbentity_status":'Archived'}, synchronize_session='fetch') else: nex_session.query(Dbentity).filter(Dbentity.display_name.like('ncbi_gbf_files.%.tar.gz')).filter(Dbentity.dbentity_status=='Active').update({"dbentity_status":'Archived'}, synchronize_session='fetch') nex_session.commit() data_id = edam_to_id.get('EDAM:3671') ## data:3671 Text topic_id = edam_to_id.get('EDAM:0085') ## topic:0085 Functional genomics format_id = edam_to_id.get('EDAM:3507') ## format:3507 Document format if "tbl" in gzip_file: from sqlalchemy import create_engine from src.models import DBSession engine = create_engine(os.environ['NEX2_URI'], pool_recycle=3600) DBSession.configure(bind=engine) # readme = nex_session.query(Dbentity).filter_by(display_name="ncbi_tab_files.README", dbentity_status='Active').one_or_none() # if readme is None: # log.info("ncbi_tbl_files.README is not in the database.") # return # readme_file_id = readme.dbentity_id readme_file_id = None # path.path = /reports/function upload_file(CREATED_BY, local_file, filename=gzip_file, file_extension='gz', description='All yeast features in tbl file format', display_name=gzip_file, data_id=data_id, format_id=format_id, topic_id=topic_id, status='Active', readme_file_id=readme_file_id, is_public='1', is_in_spell='0', is_in_browser='0', file_date=datetime.now(), source_id=source_to_id['SGD'], md5sum=file_md5sum) file = nex_session.query(Dbentity).filter_by(display_name=gzip_file, dbentity_status='Active').one_or_none() if file is None: log.info("The " + gzip_file + " is not in the database.") return file_id = file.dbentity_id path = nex_session.query(Path).filter_by(path="/reports/function").one_or_none() if path is None: log.info("The path /reports/function is not in the database.") return path_id = path.path_id x = FilePath(file_id = file_id, path_id = path_id, source_id = source_to_id['SGD'], created_by = CREATED_BY) nex_session.add(x) nex_session.commit()
def get_readme_file(cls, id): _data = DBSession.query(Filedbentity).filter_by(Filedbentity.dbentity_id == id).all()
def query_objects(self, context): return DBSession.query(TaxationService).all()
def update_database_load_file_to_s3(nex_session, gff_file, gzip_file, source_to_id, edam_to_id): local_file = open(gzip_file, mode='rb') ### upload a current GFF file to S3 with a static URL for Go Community ### upload_gff_to_s3(local_file, "latest/saccharomyces_cerevisiae.gff.gz") ########################################################################## import hashlib gff_md5sum = hashlib.md5(gzip_file.encode()).hexdigest() row = nex_session.query(Filedbentity).filter_by( md5sum=gff_md5sum).one_or_none() if row is not None: return gzip_file = gzip_file.replace("scripts/dumping/curation/data/", "") nex_session.query(Dbentity).filter(Dbentity.display_name.like('saccharomyces_cerevisiae.%.gff.gz')).filter( Dbentity.dbentity_status == 'Active').update({"dbentity_status": 'Archived'}, synchronize_session='fetch') nex_session.commit() data_id = edam_to_id.get('EDAM:3671') # data:3671 Text # topic:3068 Literature and language topic_id = edam_to_id.get('EDAM:3068') format_id = edam_to_id.get('EDAM:3507') # format:3507 Document format from sqlalchemy import create_engine from src.models import DBSession engine = create_engine(os.environ['NEX2_URI'], pool_recycle=3600) DBSession.configure(bind=engine) readme = nex_session.query(Dbentity).filter_by( display_name="saccharomyces_cerevisiae_gff.README", dbentity_status='Active').one_or_none() if readme is None: log.info("saccharomyces_cerevisiae_gff.README is not in the database.") return readme_file_id = readme.dbentity_id # path.path = /reports/chromosomal-features upload_file(CREATED_BY, local_file, filename=gzip_file, file_extension='gz', description='GFF file for yeast genes (protein and RNA)', display_name=gzip_file, data_id=data_id, format_id=format_id, topic_id=topic_id, status='Active', readme_file_id=readme_file_id, is_public='1', is_in_spell='0', is_in_browser='0', file_date=datetime.now(), source_id=source_to_id['SGD'], md5sum=gff_md5sum) gff = nex_session.query(Dbentity).filter_by( display_name=gzip_file, dbentity_status='Active').one_or_none() if gff is None: log.info("The " + gzip_file + " is not in the database.") return file_id = gff.dbentity_id path = nex_session.query(Path).filter_by( path="/reports/chromosomal-features").one_or_none() if path is None: log.info("The path: /reports/chromosomal-features is not in the database.") return path_id = path.path_id x = FilePath(file_id=file_id, path_id=path_id, source_id=source_to_id['SGD'], created_by=CREATED_BY) nex_session.add(x) nex_session.commit() log.info("Done uploading " + gff_file)
def load_csv_filedbentities(): engine = create_engine(NEX2_URI, pool_recycle=3600) DBSession.configure(bind=engine) o = open(INPUT_FILE_NAME, 'rU') reader = csv.reader(o) for i, val in enumerate(reader): if i > 0: ### added by Shuai if len(val) == 0: continue if val[0] == '': logging.info('Found a blank value, DONE!') return ### added by Shuai if len(val) < 14: print(val) return ### raw_date = val[13] if len(raw_date): temp = format_csv_date_string(val[13]) if temp is not None: raw_date = datetime.strptime(temp, '%Y-%m-%d') else: raw_date = datetime.strptime(val[13], '%Y-%m-%d') else: raw_date = None raw_status = val[4].strip() if raw_status == 'Archive': raw_status = 'Archived' bun_path = val[0].strip() new_path = val[1].strip() if bun_path[0] != '/': bun_path = bun_path.replace('genome-sequences/', '/genome-sequences/') if new_path[0] != '/': new_path = new_path.replace('genome-sequences/', '/genome-sequences/') readme_file = val[18] obj = { 'bun_path': bun_path, 'new_path': new_path, 'display_name': val[3].strip(), 'status': raw_status, 'source': val[5].strip(), 'topic_edam_id': val[7].upper().replace('TOPIC', 'EDAM').strip(), 'data_edam_id': val[9].upper().replace('DATA', 'EDAM').strip(), 'format_edam_id': val[11].upper().replace('FORMAT', 'EDAM').strip(), 'file_extension': val[12].strip(), 'file_date': raw_date, 'is_public': (val[15] == '1'), 'is_in_spell': (val[16] == '1'), 'is_in_browser': (val[17] == '1'), 'readme_name': readme_file, 'description': val[19].decode('utf-8', 'ignore').replace('"', ''), 'pmids': val[20], 'keywords': val[21].replace('"', '') } create_and_upload_file(obj, i)
from sqlalchemy import create_engine, and_ from elasticsearch import Elasticsearch # from mapping import mapping from es7_mapping import mapping import os import requests from threading import Thread import json import collections from index_es_helpers import IndexESHelper import concurrent.futures import uuid import logging engine = create_engine(os.environ["NEX2_URI"], pool_recycle=3600) DBSession.configure(bind=engine) Base.metadata.bind = engine INDEX_NAME = os.environ.get("ES_INDEX_NAME", "searchable_items_aws") DOC_TYPE = "searchable_item" ES_URI = os.environ["WRITE_ES_URI"] es = Elasticsearch(ES_URI, retry_on_timeout=True) def delete_mapping(): print("Deleting mapping...") response = requests.delete(ES_URI + INDEX_NAME + "/") if response.status_code != 200: print(("ERROR: " + str(response.json()))) else: print("SUCCESS")
def insert_author_response(request): try: sgd = DBSession.query(Source).filter_by(display_name='Direct submission').one_or_none() source_id = sgd.source_id created_by = 'OTTO' email = request.params.get('email') if email == '': return HTTPBadRequest(body=json.dumps({'error': "Please enter your email address."}), content_type='text/json') is_email_valid = validate_email(email, verify=False) if not is_email_valid: msg = email + ' is not a valid email.' return HTTPBadRequest(body=json.dumps({'error': msg}), content_type='text/json') pmid = request.params.get('pmid') pmid = pmid.replace('PMID:', '').replace('Pubmed ID:', '').strip() if pmid == '': return HTTPBadRequest(body=json.dumps({'error': "Please enter Pubmed ID for your paper."}), content_type='text/json') if pmid.isdigit(): pmid = int(pmid) else: return HTTPBadRequest(body=json.dumps({'error': "Please enter a number for Pubmed ID."}), content_type='text/json') x = DBSession.query(Authorresponse).filter_by(author_email=email, pmid=int(pmid)).one_or_none() if x is not None: return HTTPBadRequest(body=json.dumps({'error': "You have already subomitted info for PMID:" + str(pmid)+"."}), content_type='text/json') has_novel_research = '0' if request.params.get('has_novel_research'): has_novel_research = '1' has_large_scale_data = '0' if request.params.get('has_large_scale_data'): has_large_scale_data = '1' research_results = request.params.get('research_result') dataset_description = request.params.get('dataset_desc') gene_list = request.params.get('genes') other_description = request.params.get('other_desc') x = Authorresponse(source_id = source_id, pmid = pmid, author_email = email, has_novel_research = has_novel_research, has_large_scale_data = has_large_scale_data, has_fast_track_tag = '0', curator_checked_datasets = '0', curator_checked_genelist = '0', no_action_required = '0', research_results = research_results, gene_list = gene_list, dataset_description = dataset_description, other_description = other_description, created_by = created_by) DBSession.add(x) transaction.commit() return {'curation_id': 0} except Exception as e: transaction.abort() return HTTPBadRequest(body=json.dumps({'error': "ERROR: " + str(e)}), content_type='text/json')
def set_invaild_token(cls, token): row = DBSession().query(cls).filter(cls.token == token).first() if not row: row.status = 0 row.save()