Example #1
0
def records_by_author (context, author):
    cursor = context.get_cursor()
    queued_records = []
    source_catalog = context.get_source_catalog()
    complete_mapping = source_catalog.get_complete_mapping()
    term_list = complete_mapping['author']
    term_clause = ' OR '.join(['queued_record_metadata.term_id=%s' % term.uid for term in term_list])
    
    try:
        select_clause = """
            SELECT queued_records.uid
            FROM queued_records, queued_record_metadata, studies
            WHERE queued_records.uid = queued_record_metadata.queued_record_id
            AND queued_records.uid = studies.record_id
            AND queued_records.status = 2
            AND studies.article_type >= 2
            AND studies.article_type < 8
            AND (%s)
            """ % term_clause
        cursor.execute(select_clause + """
            AND value LIKE %s
            """, str(author) + '%'
            )
        rows = cursor.fetchall()
        for row in rows:
            queued_record = QueuedRecord(context, row[0])
            queued_records.append(queued_record)
    except Exception, e:
        context.logger.error('Records by author: %s', e)
Example #2
0
def records_by_year (context, year, term_map={}):
    cursor = context.get_cursor()
    queued_records = []
    year_terms = term_map['pubdate']
    year_clause = ' OR '.join(['queued_record_metadata.term_id=%s' % term.uid for term in year_terms])
    
    try:
        select_clause = """
            SELECT queued_records.uid
            FROM queued_records, queued_record_metadata, studies
            WHERE queued_records.uid = queued_record_metadata.queued_record_id
            AND queued_records.uid = studies.record_id
            AND queued_records.status = 2
            AND studies.article_type >= 2
            AND studies.article_type < 8
            AND (%s)
            """ % year_clause
        cursor.execute(select_clause + """
            AND SUBSTRING(queued_record_metadata.value, 1, 4) LIKE %s
            """, str(year) + '%'
            )
        rows = cursor.fetchall()
        for row in rows:
            queued_record = QueuedRecord(context, row[0])
            queued_records.append(queued_record)
    except Exception, e:
        context.logger.error('Records by year: %s', e)
Example #3
0
def records_by_journal (context, issn, term_map={}):
    journal_title = ''
    queued_records = []
    issn_terms = term_map['issn']
    issn_clause = ' OR '.join(['queued_record_metadata.term_id=%s' % term.uid for term in issn_terms])
    
    cursor = context.get_cursor()
    cursor.execute("""
        SELECT journal_title
        FROM medline_journals
        WHERE issn = %s
        """, issn)
    try:
        rows = cursor.fetchall()
        if len(rows) != 1:
            raise Exception('Journal %s not found' % issn)
        
        journal_title = rows[0][0]
        
        select_clause = """
            SELECT queued_records.uid
            FROM queued_records, queued_record_metadata, studies
            WHERE queued_record_metadata.queued_record_id = queued_records.uid
            AND queued_records.uid = studies.record_id
            AND queued_records.status = 2
            AND studies.article_type >= 2
            AND studies.article_type < 8
            AND (%s)
            """ % issn_clause
        cursor.execute(select_clause + """
            AND queued_record_metadata.value = %s
            """, issn
            )
        rows = cursor.fetchall()
        for row in rows:
            queued_record = QueuedRecord(context, row[0])
            queued_records.append(queued_record)
    except Exception, e:
        context.logger.error('Records by journal: %s', e)
Example #4
0
def records_by_methodology (context, methodology_id):
    queued_records = []
    
    cursor = context.get_cursor()
    try:
        cursor.execute("""
            SELECT queued_records.uid
            FROM queued_records, studies, methodologies
            WHERE queued_records.uid = studies.record_id
            AND studies.uid = methodologies.study_id
            AND queued_records.status = 2
            AND studies.article_type >= 2
            AND studies.article_type < 8
            AND methodologies.study_type_id = %s        
            """, methodology_id
            )
        rows = cursor.fetchall()
        for row in rows:
            queued_record = QueuedRecord(context, row[0])
            queued_records.append(queued_record)
    except Exception, e:
        context.logger.error('Records by methodology: %s', e)
Example #5
0
def records_by_concept (context, concept, concept_id):
    cursor = context.get_cursor()
    queued_records = []
    table_name = concept_tables[concept]
    try:
        select_clause = """
            SELECT queued_records.uid
            FROM queued_records, studies, %s
            WHERE %s.study_id = studies.uid
            AND studies.record_id = queued_records.uid
            AND %s.concept_id = %s
            AND queued_records.status = 2
            AND studies.article_type >= 2
            AND studies.article_type < 8
            """ % (table_name, 
                table_name,
                table_name, concept_id)
        cursor.execute(select_clause)
        rows = cursor.fetchall()
        for row in rows:
            queued_record = QueuedRecord(context, row[0])
            queued_records.append(queued_record)
    except Exception, e:
        context.logger.error('Records by concept: %s', e)
Example #6
0
        sys.exit(0)

    query_str = options.boolean.join(
        [' "%s" [%s] ' % (term, options.field) for term in args])
    #print query_str.strip()

    search_index = SearchIndex(context)
    hit_list = []
    hits, searcher = search_index.search(query_str)
    for i, doc in hits:
        hit_list.append(doc.get('uid'))
    searcher.close()

    output = []
    for id in hit_list:
        rec = QueuedRecord(context, int(id))
        if options.locations:
            study = Study(context, rec.study_id)
            for loc in study.locations:
                out = []
                out.extend((id, loc.uid, loc.study_id, loc.feature_id))
                feature = Feature(uid=loc.feature_id)
                feature.load(context)
                out.extend((feature.latitude, feature.longitude, feature.name,
                            feature.feature_type, feature.country_code))
                output.append('\t'.join([str(v) for v in out]))
        else:
            mm = rec.get_mapped_metadata(ctm)
            if mm['author']:
                first_author = mm['author'][0]
            else:
Example #7
0
# for query by ArcGIS.
#
# $Id$

import canary.context
from canary.gazeteer import Feature
from canary.loader import QueuedRecord
from canary.study import Study

if __name__ == '__main__':
    out_file = open('/tmp/export-location-info.txt', 'w')
    all = {}
    context = canary.context.Context()
    for i in range(4000):
        try:
            rec = QueuedRecord(context, i)

            if not rec \
                or not rec.status == rec.STATUS_CURATED:
                raise 'ValueError'

            study = Study(context, rec.study_id)
            if not study.status == study.STATUS_TYPES['curated'] \
                or not (study.article_type >= study.ARTICLE_TYPES['traditional'] \
                    and study.article_type <= study.ARTICLE_TYPES['curated']):
                raise 'ValueError'

            for loc in study.locations:
                out = []
                out.extend((loc.uid, loc.study_id, loc.feature_id))
                feature = Feature(uid=loc.feature_id)
    search_index = SearchIndex(context)
    searches = (
        'Lead [exposure]',
        'Hantavirus [exposure]',
        'peromyscus [species]',
        'Michigan [location]',
        'DDT [exposure]',
        '2003 [year]',
        'cats and dogs',
        '"Burger J" [author]',
        'cross-sectional [methodology]',
        'case-control [methodology] and cattle [species]',
        'disease-model [methodology]',
        '"age factors" [risk_factors]',
        'Sarin [exposure]',
        'Arsenic [exposure]',
        '"Bacillus anthracis" [exposure]',
        '"West Nile Virus" [exposure]',
        '"Water Pollutants, Chemical" [exposure]',
    )
    for t in searches:
        hits, searcher = search_index.search(t)
        result_set = []
        for i, doc in hits:
            uid = doc.get(str('uid'))
            record = QueuedRecord(context, uid)
            if record.status == record.STATUS_CURATED:
                result_set.append(record)
        searcher.close()
        makeplot(context, t, result_set)
Example #9
0
    pubmed_source = source_catalog.get_source(13)
    pubmed_search = PubmedSearch()

    if options.canary_id:
        rows = [[options.canary_id,],]
    else:
        # get all active queuedrecord ids
        cursor.execute("""
            SELECT uid
            FROM queued_records
            """)
        rows = cursor.fetchall()

    parser = Parser(pubmed_source)
    for row in rows:
        qr = QueuedRecord(context, row[0])
        print 'Fetching pubmed data for ui %s' % qr.unique_identifier
        pm_data = pubmed_search.fetch(qr.unique_identifier)
        fetched_records = parser.parse(mapped_terms=complete_mapping,
            is_email=False, data=pm_data)

        if len(fetched_records) != 1:
            print 'Fetch for %s (%s) found %s records, ignoring' % (ui,
                qr.uid, len(fetched_records))
        else:
            print 'Orig metadata:', qr.metadata
            fetched_rec = fetched_records[0]
            print 'Fetched metadata:', fetched_rec.metadata
            fetched_rec_metadata = fetched_rec.get_mapped_metadata(complete_mapping)
            print 'Fetched metadata, mapped:', fetched_rec_metadata
            if options.update:
Example #10
0
        rows = [
            [
                options.canary_id,
            ],
        ]
    else:
        # get all active queuedrecord ids
        cursor.execute("""
            SELECT uid
            FROM queued_records
            """)
        rows = cursor.fetchall()

    parser = Parser(pubmed_source)
    for row in rows:
        qr = QueuedRecord(context, row[0])
        print 'Fetching pubmed data for ui %s' % qr.unique_identifier
        pm_data = pubmed_search.fetch(qr.unique_identifier)
        fetched_records = parser.parse(mapped_terms=complete_mapping,
                                       is_email=False,
                                       data=pm_data)

        if len(fetched_records) != 1:
            print 'Fetch for %s (%s) found %s records, ignoring' % (
                ui, qr.uid, len(fetched_records))
        else:
            print 'Orig metadata:', qr.metadata
            fetched_rec = fetched_records[0]
            print 'Fetched metadata:', fetched_rec.metadata
            fetched_rec_metadata = fetched_rec.get_mapped_metadata(
                complete_mapping)