def records_by_author (context, author): cursor = context.get_cursor() queued_records = [] source_catalog = context.get_source_catalog() complete_mapping = source_catalog.get_complete_mapping() term_list = complete_mapping['author'] term_clause = ' OR '.join(['queued_record_metadata.term_id=%s' % term.uid for term in term_list]) try: select_clause = """ SELECT queued_records.uid FROM queued_records, queued_record_metadata, studies WHERE queued_records.uid = queued_record_metadata.queued_record_id AND queued_records.uid = studies.record_id AND queued_records.status = 2 AND studies.article_type >= 2 AND studies.article_type < 8 AND (%s) """ % term_clause cursor.execute(select_clause + """ AND value LIKE %s """, str(author) + '%' ) rows = cursor.fetchall() for row in rows: queued_record = QueuedRecord(context, row[0]) queued_records.append(queued_record) except Exception, e: context.logger.error('Records by author: %s', e)
def records_by_year (context, year, term_map={}): cursor = context.get_cursor() queued_records = [] year_terms = term_map['pubdate'] year_clause = ' OR '.join(['queued_record_metadata.term_id=%s' % term.uid for term in year_terms]) try: select_clause = """ SELECT queued_records.uid FROM queued_records, queued_record_metadata, studies WHERE queued_records.uid = queued_record_metadata.queued_record_id AND queued_records.uid = studies.record_id AND queued_records.status = 2 AND studies.article_type >= 2 AND studies.article_type < 8 AND (%s) """ % year_clause cursor.execute(select_clause + """ AND SUBSTRING(queued_record_metadata.value, 1, 4) LIKE %s """, str(year) + '%' ) rows = cursor.fetchall() for row in rows: queued_record = QueuedRecord(context, row[0]) queued_records.append(queued_record) except Exception, e: context.logger.error('Records by year: %s', e)
def records_by_journal (context, issn, term_map={}): journal_title = '' queued_records = [] issn_terms = term_map['issn'] issn_clause = ' OR '.join(['queued_record_metadata.term_id=%s' % term.uid for term in issn_terms]) cursor = context.get_cursor() cursor.execute(""" SELECT journal_title FROM medline_journals WHERE issn = %s """, issn) try: rows = cursor.fetchall() if len(rows) != 1: raise Exception('Journal %s not found' % issn) journal_title = rows[0][0] select_clause = """ SELECT queued_records.uid FROM queued_records, queued_record_metadata, studies WHERE queued_record_metadata.queued_record_id = queued_records.uid AND queued_records.uid = studies.record_id AND queued_records.status = 2 AND studies.article_type >= 2 AND studies.article_type < 8 AND (%s) """ % issn_clause cursor.execute(select_clause + """ AND queued_record_metadata.value = %s """, issn ) rows = cursor.fetchall() for row in rows: queued_record = QueuedRecord(context, row[0]) queued_records.append(queued_record) except Exception, e: context.logger.error('Records by journal: %s', e)
def records_by_methodology (context, methodology_id): queued_records = [] cursor = context.get_cursor() try: cursor.execute(""" SELECT queued_records.uid FROM queued_records, studies, methodologies WHERE queued_records.uid = studies.record_id AND studies.uid = methodologies.study_id AND queued_records.status = 2 AND studies.article_type >= 2 AND studies.article_type < 8 AND methodologies.study_type_id = %s """, methodology_id ) rows = cursor.fetchall() for row in rows: queued_record = QueuedRecord(context, row[0]) queued_records.append(queued_record) except Exception, e: context.logger.error('Records by methodology: %s', e)
def records_by_concept (context, concept, concept_id): cursor = context.get_cursor() queued_records = [] table_name = concept_tables[concept] try: select_clause = """ SELECT queued_records.uid FROM queued_records, studies, %s WHERE %s.study_id = studies.uid AND studies.record_id = queued_records.uid AND %s.concept_id = %s AND queued_records.status = 2 AND studies.article_type >= 2 AND studies.article_type < 8 """ % (table_name, table_name, table_name, concept_id) cursor.execute(select_clause) rows = cursor.fetchall() for row in rows: queued_record = QueuedRecord(context, row[0]) queued_records.append(queued_record) except Exception, e: context.logger.error('Records by concept: %s', e)
sys.exit(0) query_str = options.boolean.join( [' "%s" [%s] ' % (term, options.field) for term in args]) #print query_str.strip() search_index = SearchIndex(context) hit_list = [] hits, searcher = search_index.search(query_str) for i, doc in hits: hit_list.append(doc.get('uid')) searcher.close() output = [] for id in hit_list: rec = QueuedRecord(context, int(id)) if options.locations: study = Study(context, rec.study_id) for loc in study.locations: out = [] out.extend((id, loc.uid, loc.study_id, loc.feature_id)) feature = Feature(uid=loc.feature_id) feature.load(context) out.extend((feature.latitude, feature.longitude, feature.name, feature.feature_type, feature.country_code)) output.append('\t'.join([str(v) for v in out])) else: mm = rec.get_mapped_metadata(ctm) if mm['author']: first_author = mm['author'][0] else:
# for query by ArcGIS. # # $Id$ import canary.context from canary.gazeteer import Feature from canary.loader import QueuedRecord from canary.study import Study if __name__ == '__main__': out_file = open('/tmp/export-location-info.txt', 'w') all = {} context = canary.context.Context() for i in range(4000): try: rec = QueuedRecord(context, i) if not rec \ or not rec.status == rec.STATUS_CURATED: raise 'ValueError' study = Study(context, rec.study_id) if not study.status == study.STATUS_TYPES['curated'] \ or not (study.article_type >= study.ARTICLE_TYPES['traditional'] \ and study.article_type <= study.ARTICLE_TYPES['curated']): raise 'ValueError' for loc in study.locations: out = [] out.extend((loc.uid, loc.study_id, loc.feature_id)) feature = Feature(uid=loc.feature_id)
search_index = SearchIndex(context) searches = ( 'Lead [exposure]', 'Hantavirus [exposure]', 'peromyscus [species]', 'Michigan [location]', 'DDT [exposure]', '2003 [year]', 'cats and dogs', '"Burger J" [author]', 'cross-sectional [methodology]', 'case-control [methodology] and cattle [species]', 'disease-model [methodology]', '"age factors" [risk_factors]', 'Sarin [exposure]', 'Arsenic [exposure]', '"Bacillus anthracis" [exposure]', '"West Nile Virus" [exposure]', '"Water Pollutants, Chemical" [exposure]', ) for t in searches: hits, searcher = search_index.search(t) result_set = [] for i, doc in hits: uid = doc.get(str('uid')) record = QueuedRecord(context, uid) if record.status == record.STATUS_CURATED: result_set.append(record) searcher.close() makeplot(context, t, result_set)
pubmed_source = source_catalog.get_source(13) pubmed_search = PubmedSearch() if options.canary_id: rows = [[options.canary_id,],] else: # get all active queuedrecord ids cursor.execute(""" SELECT uid FROM queued_records """) rows = cursor.fetchall() parser = Parser(pubmed_source) for row in rows: qr = QueuedRecord(context, row[0]) print 'Fetching pubmed data for ui %s' % qr.unique_identifier pm_data = pubmed_search.fetch(qr.unique_identifier) fetched_records = parser.parse(mapped_terms=complete_mapping, is_email=False, data=pm_data) if len(fetched_records) != 1: print 'Fetch for %s (%s) found %s records, ignoring' % (ui, qr.uid, len(fetched_records)) else: print 'Orig metadata:', qr.metadata fetched_rec = fetched_records[0] print 'Fetched metadata:', fetched_rec.metadata fetched_rec_metadata = fetched_rec.get_mapped_metadata(complete_mapping) print 'Fetched metadata, mapped:', fetched_rec_metadata if options.update:
rows = [ [ options.canary_id, ], ] else: # get all active queuedrecord ids cursor.execute(""" SELECT uid FROM queued_records """) rows = cursor.fetchall() parser = Parser(pubmed_source) for row in rows: qr = QueuedRecord(context, row[0]) print 'Fetching pubmed data for ui %s' % qr.unique_identifier pm_data = pubmed_search.fetch(qr.unique_identifier) fetched_records = parser.parse(mapped_terms=complete_mapping, is_email=False, data=pm_data) if len(fetched_records) != 1: print 'Fetch for %s (%s) found %s records, ignoring' % ( ui, qr.uid, len(fetched_records)) else: print 'Orig metadata:', qr.metadata fetched_rec = fetched_records[0] print 'Fetched metadata:', fetched_rec.metadata fetched_rec_metadata = fetched_rec.get_mapped_metadata( complete_mapping)