Ejemplo n.º 1
0
def list_articles(target_directory, supplementary_materials=False, skip=[]):
    listing = listdir(target_directory)
    for filename in listing:
        result_tree = ElementTree()
        result_tree.parse(path.join(target_directory, filename))
        for tree in result_tree.iterfind('article'):
            pmcid = _get_pmcid(tree)
            if pmcid in skip:
                continue

            result = {}
            result['name'] = pmcid
            result['doi'] = _get_article_doi(tree)
            result['article-categories'] = _get_article_categories(tree)
            result['article-contrib-authors'] = _get_article_contrib_authors(
                tree)
            result['article-title'] = _get_article_title(tree)
            result['article-abstract'] = _get_article_abstract(tree)
            result['journal-title'] = _get_journal_title(tree)
            result['article-year'], \
                result['article-month'], \
                result['article-day'] = _get_article_date(tree)
            result['article-url'] = _get_article_url(tree)
            result['article-license-url'], \
                result['article-license-text'], \
                result['article-copyright-statement'] = _get_article_licensing(tree)
            result['article-copyright-holder'] = _get_article_copyright_holder(
                tree)

            if supplementary_materials:
                result[
                    'supplementary-materials'] = _get_supplementary_materials(
                        tree)
            yield result
def list_articles(target_directory, supplementary_materials=False, skip=[]):
    listing = listdir(target_directory)
    for filename in listing:
        result_tree = ElementTree()
        result_tree.parse(path.join(target_directory, filename))
        for tree in result_tree.iterfind('article'):
            pmcid = _get_pmcid(tree)
            if pmcid in skip:
                continue

            result = {}
            result['name'] = pmcid
            result['doi'] = _get_article_doi(tree)
            result['article-categories'] = _get_article_categories(tree)
            result['article-contrib-authors'] = _get_article_contrib_authors(tree)
            result['article-title'] = _get_article_title(tree)
            result['article-abstract'] = _get_article_abstract(tree)
            result['journal-title'] = _get_journal_title(tree)
            result['article-year'], \
                result['article-month'], \
                result['article-day'] = _get_article_date(tree)
            result['article-url'] = _get_article_url(tree)
            result['article-license-url'], \
                result['article-license-text'], \
                result['article-copyright-statement'] = _get_article_licensing(tree)
            result['article-copyright-holder'] = _get_article_copyright_holder(tree)

            if supplementary_materials:
                result['supplementary-materials'] = _get_supplementary_materials(tree)
            yield result
def list_articles(target_directory, supplementary_materials=False, skip=[]):
    result_tree = ElementTree()
    result_tree.parse(path.join(target_directory, 'file.xml'))
    for tree in [result_tree]:
        result = {}
        result['name'] = _get_article_doi(tree)
        result['doi'] = _get_article_doi(tree)
        result['article-contrib-authors'] = _get_article_contrib_authors(tree)
        result['article-title'] = _get_article_title(tree)
        result['article-abstract'] = _get_article_abstract(tree)
        result['journal-title'] = _get_journal_title(tree)
        result['article-year'], \
            result['article-month'], \
            result['article-day'] = _get_article_date(tree)
        result['article-url'] = _get_article_url(tree)
        result['article-license-url'] = _get_article_license_url(tree)
        result['article-copyright-holder'] = _get_article_copyright_holder(tree)

        if supplementary_materials:
            result['supplementary-materials'] = _get_supplementary_materials(tree)
        yield result
Ejemplo n.º 4
0
def list_articles(target_directory, supplementary_materials=False, skip=[]):
    result_tree = ElementTree()
    result_tree.parse(path.join(target_directory, 'file.xml'))
    for tree in [result_tree]:
        result = {}
        result['name'] = _get_article_doi(tree)
        result['doi'] = _get_article_doi(tree)
        result['article-contrib-authors'] = _get_article_contrib_authors(tree)
        result['article-title'] = _get_article_title(tree)
        result['article-abstract'] = _get_article_abstract(tree)
        result['journal-title'] = _get_journal_title(tree)
        result['article-year'], \
            result['article-month'], \
            result['article-day'] = _get_article_date(tree)
        result['article-url'] = _get_article_url(tree)
        result['article-license-url'] = _get_article_license_url(tree)
        result['article-copyright-holder'] = _get_article_copyright_holder(
            tree)

        if supplementary_materials:
            result['supplementary-materials'] = _get_supplementary_materials(
                tree)
        yield result