Example #1
0
def import_stuff(file_list):
    for filename in file_list:
        article_doc = pq(filename=filename)
        try:
            article_tag = article_doc('article')
            article_type = article_tag.attr('article-type')
            if article_type != DESIRED_ARTICLE_TYPE:
                logging.debug("Skipping article of type %s" % article_type)
                continue
            (pmid, pmc) = pubmed_ids(article_doc)
            logging.info("Processing article %d / %d" % (pmc, pmid))
            save_article_to_neo4j(article_doc)
        except Exception as e:
            # When in doubt, wrap it all in a massive try/except for bonus insanity!
            logging.error(e)
Example #2
0
def import_stuff(file_list):
    for filename in file_list:
        article_doc = pq(filename=filename)
        try:
            article_tag = article_doc('article')
            article_type = article_tag.attr('article-type')
            if article_type != DESIRED_ARTICLE_TYPE:
                logging.debug("Skipping article of type %s" % article_type)
                continue
            (pmid, pmc) = pubmed_ids(article_doc)
            logging.info("Processing article %d / %d" % (pmc, pmid))
            save_article_to_neo4j(article_doc)
        except Exception as e:
            # When in doubt, wrap it all in a massive try/except for bonus insanity!
            logging.error(e)
Example #3
0
def save_article_to_neo4j(article_doc):
    # Which key? PMC
    (pmid, pmc) = pubmed_ids(article_doc)
    pmc_str = str(pmc)
    pmid_str = str(pmid)
    try:
        node = one_index_to_rule_them_all.get('pmc', pmc_str)[0]
    except IndexError:
        node = gdb.node()

    node['pmc'] = pmc
    node['pmid'] = pmid
    node['title'] = article_doc('article-title').text()

    one_index_to_rule_them_all.add('pmc', pmc_str, node)
    one_index_to_rule_them_all.add('pmid', pmid_str, node)
Example #4
0
def save_article_to_neo4j(article_doc):
    # Which key? PMC
    (pmid, pmc) = pubmed_ids(article_doc)
    pmc_str = str(pmc)
    pmid_str = str(pmid)
    try:
        node = one_index_to_rule_them_all.get('pmc', pmc_str)[0]
    except IndexError:
        node = gdb.node()

    node['pmc'] = pmc
    node['pmid'] = pmid
    node['title'] = article_doc('article-title').text()

    one_index_to_rule_them_all.add('pmc', pmc_str, node)
    one_index_to_rule_them_all.add('pmid', pmid_str, node)