def _get_page_count(self): try: return get_attribute_in_tag(self.document, 'page-count', 'count')[0] except IndexError: print("Can't find page count", file=sys.stderr) return ''
def _get_license(self): license = '' license_type = '' license_url = '' for tag in self.document.getElementsByTagName('license'): license = get_value_in_tag(tag, 'ext-link') license_type = tag.getAttribute('license-type') license_url = get_attribute_in_tag(tag, 'ext-link', 'xlink:href') if license_url: license_url = license_url[0] return license, license_type, license_url
def _get_article_type(self): article_type = get_attribute_in_tag(self.document, 'article', 'article-type') if article_type: article_type = article_type[0] return article_type
def test_get_attribute_in_tag(self): self.assertEqual(get_attribute_in_tag(self.document, "Bar", "name"), ["a", "b"]) self.assertEqual(get_attribute_in_tag(self.document, "Bar", "A"), []) self.assertEqual(get_attribute_in_tag(self.document, "A", "Bar"), [])