def test_editions(self): pandata = Pandata(EDITIONTEST_FILENAME) (ed1,ed2) = pandata.get_edition_list() self.assertEqual(ed1.publisher, "Project Gutenberg") self.assertEqual(ed2.publisher, "Recovering the Classics") self.assertEqual(ed2.isbn, "9781111122223") self.assertEqual(ed1.isbn, "") self.assertEqual(ed1.edition_identifiers['edition_id'], "repo:Space-Viking_20728#default") self.assertEqual(ed2.edition_identifiers['edition_id'], u'repo:Space-Viking_20728#9781111122223') pandata = Pandata('https://github.com/gitenberg-dev/metadata/raw/master/samples/pandata.yaml') [ed] = pandata.get_edition_list()
def test_smart_properties(self): pandata = Pandata(TESTDATA_FILENAME) self.assertEqual(pandata.publication_date, '2007-03-03') pandata.metadata["gutenberg_issued"] = None self.assertNotEqual(pandata.publication_date, '2007-03-03') self.assertEqual(pandata._edition, 'Space-Viking') self.assertTrue(pandata.subjects[0][0] in ('lcsh', 'lcc'))
def add_by_webpage(url, work=None, user=None): edition = None scraper = get_scraper(url) loader = BasePandataLoader(url) pandata = Pandata() pandata.metadata = scraper.metadata for metadata in pandata.get_edition_list(): edition = loader.load_from_pandata(metadata, work) work = edition.work loader.load_ebooks(pandata, edition, user=user) return edition if edition else None
def load_from_yaml(yaml_url, test_mode=False): """ This really should be called 'load_from_github_yaml' if mock_ebook is True, don't construct list of ebooks from a release -- rather use an epub """ all_metadata = Pandata(yaml_url) loader = GithubLoader(yaml_url) for metadata in all_metadata.get_edition_list(): edition = loader.load_from_pandata(metadata) loader.load_ebooks(metadata, edition, test_mode) return edition.work_id if edition else None
def add_by_sitemap(url, maxnum=None): editions = [] for bookdata in scrape_sitemap(url, maxnum=maxnum): edition = work = None loader = BasePandataLoader(bookdata.base) pandata = Pandata() pandata.metadata = bookdata.metadata for metadata in pandata.get_edition_list(): edition = loader.load_from_pandata(metadata, work) work = edition.work loader.load_ebooks(pandata, edition) if edition: editions.append(edition) return editions
def repo_metadata(): md = Pandata("metadata.yaml") cover = None for cover in md.covers: cover = cover.get('image_path', None) return { 'repo_name': md._repo, 'version': md._version, 'title': md.title, 'author': "; ".join(md.authnames()), 'author_for_calibre': " & ".join(md.authnames()), 'cover': cover, 'book_id': md.identifiers.get('gutenberg', '0') }
def add_from_bookdatas(bookdatas): ''' bookdatas are iterators of scrapers ''' editions = [] for bookdata in bookdatas: edition = work = None loader = BasePandataLoader(bookdata.base) pandata = Pandata() pandata.metadata = bookdata.metadata for metadata in pandata.get_edition_list(): edition = loader.load_from_pandata(metadata, work) work = edition.work loader.load_ebooks(pandata, edition) if edition: editions.append(edition) return editions
def test_load_from_string(self): pandata = Pandata() pandata.load(TEST_YAML_STRING) self.assertEqual(pandata.authnames()[0], 'Kafka, Franz')
def test_load_from_url(self): pandata = Pandata( 'https://github.com/gitenberg-dev/metadata/raw/master/samples/pandata.yaml' ) self.assertEqual(pandata._edition, 'Space-Viking')
def test_conversion(self): yaml = pg_rdf_to_yaml(TESTDATA_PGRDFFILENAME) open(TESTDATA_YAMLFILENAME, "w+").write(yaml) pandata = Pandata(TESTDATA_YAMLFILENAME) self.assertEqual(pandata._edition, 'book') self.assertTrue(pandata.subjects[0][0] in ('lcsh', 'lcc'))
def setUp(self): self.pandata = Pandata(TESTDATA_FILENAME)
def metadata(self): if not self._pandata: self._pandata = Pandata() self._pandata.load(self.yaml) return self._pandata.metadata
def make_gitberg_info(): metadata = Pandata("metadata.yaml") tempdir = os.path.join(os.path.dirname(__file__), 'templates/') env = Environment(loader=FileSystemLoader([tempdir, '/',])) template = env.get_template(ABOUT) return template.render(metadata=metadata)