def list_articles(target_directory, supplementary_materials=False, skip=[]): listing = listdir(target_directory) for filename in listing: result_tree = ElementTree() result_tree.parse(path.join(target_directory, filename)) for tree in result_tree.iterfind('article'): pmcid = _get_pmcid(tree) if pmcid in skip: continue result = {} result['name'] = pmcid result['doi'] = _get_article_doi(tree) result['article-categories'] = _get_article_categories(tree) result['article-contrib-authors'] = _get_article_contrib_authors( tree) result['article-title'] = _get_article_title(tree) result['article-abstract'] = _get_article_abstract(tree) result['journal-title'] = _get_journal_title(tree) result['article-year'], \ result['article-month'], \ result['article-day'] = _get_article_date(tree) result['article-url'] = _get_article_url(tree) result['article-license-url'], \ result['article-license-text'], \ result['article-copyright-statement'] = _get_article_licensing(tree) result['article-copyright-holder'] = _get_article_copyright_holder( tree) if supplementary_materials: result[ 'supplementary-materials'] = _get_supplementary_materials( tree) yield result
def list_articles(target_directory, supplementary_materials=False, skip=[]): listing = listdir(target_directory) for filename in listing: result_tree = ElementTree() result_tree.parse(path.join(target_directory, filename)) for tree in result_tree.iterfind('article'): pmcid = _get_pmcid(tree) if pmcid in skip: continue result = {} result['name'] = pmcid result['doi'] = _get_article_doi(tree) result['article-categories'] = _get_article_categories(tree) result['article-contrib-authors'] = _get_article_contrib_authors(tree) result['article-title'] = _get_article_title(tree) result['article-abstract'] = _get_article_abstract(tree) result['journal-title'] = _get_journal_title(tree) result['article-year'], \ result['article-month'], \ result['article-day'] = _get_article_date(tree) result['article-url'] = _get_article_url(tree) result['article-license-url'], \ result['article-license-text'], \ result['article-copyright-statement'] = _get_article_licensing(tree) result['article-copyright-holder'] = _get_article_copyright_holder(tree) if supplementary_materials: result['supplementary-materials'] = _get_supplementary_materials(tree) yield result
def list_articles(target_directory, supplementary_materials=False, skip=[]): result_tree = ElementTree() result_tree.parse(path.join(target_directory, 'file.xml')) for tree in [result_tree]: result = {} result['name'] = _get_article_doi(tree) result['doi'] = _get_article_doi(tree) result['article-contrib-authors'] = _get_article_contrib_authors(tree) result['article-title'] = _get_article_title(tree) result['article-abstract'] = _get_article_abstract(tree) result['journal-title'] = _get_journal_title(tree) result['article-year'], \ result['article-month'], \ result['article-day'] = _get_article_date(tree) result['article-url'] = _get_article_url(tree) result['article-license-url'] = _get_article_license_url(tree) result['article-copyright-holder'] = _get_article_copyright_holder(tree) if supplementary_materials: result['supplementary-materials'] = _get_supplementary_materials(tree) yield result
def list_articles(target_directory, supplementary_materials=False, skip=[]): result_tree = ElementTree() result_tree.parse(path.join(target_directory, 'file.xml')) for tree in [result_tree]: result = {} result['name'] = _get_article_doi(tree) result['doi'] = _get_article_doi(tree) result['article-contrib-authors'] = _get_article_contrib_authors(tree) result['article-title'] = _get_article_title(tree) result['article-abstract'] = _get_article_abstract(tree) result['journal-title'] = _get_journal_title(tree) result['article-year'], \ result['article-month'], \ result['article-day'] = _get_article_date(tree) result['article-url'] = _get_article_url(tree) result['article-license-url'] = _get_article_license_url(tree) result['article-copyright-holder'] = _get_article_copyright_holder( tree) if supplementary_materials: result['supplementary-materials'] = _get_supplementary_materials( tree) yield result