def get_page_ident_hash(self, page_uuid, page_version, book_uuid, book_version, latest=None): """Return the uuid of the page and full ident_hash of the page. which may or may not include the book uuid depending on whether the page is within the book. """ from cnxepub import flatten_tree_to_ident_hashes # XXX plan = self.plpy.prepare( 'SELECT tree_to_json($1, $2, FALSE)::json', ('text', 'text')) tree = self.plpy.execute( plan, (book_uuid, book_version))[0]['tree_to_json'] if isinstance(tree, basestring): tree = json.loads(tree) pages = list(flatten_tree_to_ident_hashes(tree)) book_ident_hash = join_ident_hash(book_uuid, book_version) page_ident_hash = join_ident_hash(page_uuid, page_version) for p_ident_hash in pages: p_id, p_version = split_ident_hash(p_ident_hash) if (p_id == page_uuid and (page_version is None or page_version == p_version)): return book_uuid, '{}:{}'.format( latest and book_uuid or book_ident_hash, page_ident_hash) # The page isn't in the given book, so only return the page. return page_uuid, page_ident_hash
def fix_anchor_references(self): """Fix references to internal documents and resources.""" # Catch the invalid, unparsable, etc. references. bad_references = [] for anchor in self.apply_xpath('//html:a'): ref = anchor.get('href') if not ref or self._should_ignore_reference(ref): continue try: ref_type, payload = parse_legacy_reference(ref) except ValueError: exc = InvalidReference(self.document_ident, ref) bad_references.append(exc) continue if ref_type == MODULE_REFERENCE: module_id, version, collection_id,\ collection_version, url_frag = payload if version: uuid, version = self.get_uuid_n_version(module_id, version) else: uuid, _ = self.get_uuid_n_version(module_id) if uuid is None: bad_references.append( ReferenceNotFound("Unable to find a reference to " "'{}' at version '{}'." .format(module_id, version), self.document_ident, ref)) break ident_hash = join_ident_hash(uuid, version) if collection_id: book_uuid, book_version = self.get_uuid_n_version( collection_id, collection_version) if book_uuid: uuid, ident_hash = self.get_page_ident_hash( uuid, version, book_uuid, book_version, latest=collection_version is None) if uuid: url_frag = url_frag and url_frag or '' path = '/contents/{}{}'.format(ident_hash, url_frag) anchor.set('href', path) elif ref_type == RESOURCE_REFERENCE: try: filename, module_id, version = payload info = self.get_resource_info(filename, module_id, version) except ReferenceNotFound as exc: bad_references.append(exc) else: anchor.set('href', '/resources/{}/{}'. format(info['hash'], filename)) else: exc = InvalidReference(self.document_ident, ref) bad_references.append(exc) return bad_references
def test_document_insertion_w_id_n_version_provided(self): id, version = '3a70f722-b7b0-4b41-83dd-2790cee98c39', '1' expected_ident_hash = join_ident_hash(id, version) metadata = { 'version': version, 'title': "Dingbat's Dilemma", 'language': 'en-us', 'summary': "The options are limitless.", 'created': '1420-02-03 23:36:20.583149-05', 'revised': '1420-02-03 23:36:20.583149-05', 'license_url': 'http://creativecommons.org/licenses/by/3.0/', # XXX We don't have a mapping. 'publishers': [{'id': 'ream', 'type': None}], 'authors': [{'id': 'rbates', 'type': 'cnx-id', 'name': 'Richard Bates'}], 'editors': [{'id': 'jone', 'type': None}, {'id': 'kahn', 'type': None}], # XXX We don't have a mapping. 'illustrators': [{'id': 'AbagaleBates', 'type': None}], 'translators': [{'id': 'RhowandaOkofarBates', 'type': None}, {'id': 'JamesOrwel', 'type': None}], 'copyright_holders': [{'id': 'ream', 'type': None}], 'subjects': ['Business', 'Arts', 'Mathematics and Statistics'], 'keywords': ['dingbat', 'bates', 'dilemma'], 'version': version, 'print_style': None, } publisher = 'ream' message = 'no msg' document = self.make_document(id=id, metadata=metadata) from ..publish import _insert_metadata with self.db_connect() as db_conn: with db_conn.cursor() as cursor: ident_hash = _insert_metadata(cursor, document, publisher, message)[1] self.assertEqual(ident_hash, expected_ident_hash) with self.db_connect() as db_conn: with db_conn.cursor() as cursor: cursor.execute("""\ SELECT m.name, uuid::text, m.major_version, m.minor_version FROM modules AS m WHERE ident_hash(m.uuid,m.major_version,m.minor_version) = %s """, (ident_hash,)) module = cursor.fetchone() self.assertEqual(module[0], metadata['title']) self.assertEqual(module[1], id) self.assertEqual(module[2], int(version)) self.assertEqual(module[3], None)
def test_document_insertion_w_id_n_version_provided(self): id, version = '3a70f722-b7b0-4b41-83dd-2790cee98c39', '1' expected_ident_hash = join_ident_hash(id, version) metadata = { 'version': version, 'title': "Dingbat's Dilemma", 'language': 'en-us', 'summary': "The options are limitless.", 'created': '1420-02-03 23:36:20.583149-05', 'revised': '1420-02-03 23:36:20.583149-05', 'license_url': 'http://creativecommons.org/licenses/by/3.0/', # XXX We don't have a mapping. 'publishers': [{'id': 'ream', 'type': None}], 'authors': [{'id': 'rbates', 'type': 'cnx-id', 'name': 'Richard Bates'}], 'editors': [{'id': 'jone', 'type': None}, {'id': 'kahn', 'type': None}], # XXX We don't have a mapping. 'illustrators': [{'id': 'AbagaleBates', 'type': None}], 'translators': [{'id': 'RhowandaOkofarBates', 'type': None}, {'id': 'JamesOrwel', 'type': None}], 'copyright_holders': [{'id': 'ream', 'type': None}], 'subjects': ['Business', 'Arts', 'Mathematics and Statistics'], 'keywords': ['dingbat', 'bates', 'dilemma'], 'version': version, 'print_style': None, } publisher = 'ream' message = 'no msg' document = self.make_document(id=id, metadata=metadata) from ..publish import _insert_metadata with self.db_connect() as db_conn: with db_conn.cursor() as cursor: ident_hash = _insert_metadata(cursor, document, publisher, message)[1] self.assertEqual(ident_hash, expected_ident_hash) with self.db_connect() as db_conn: with db_conn.cursor() as cursor: cursor.execute("""\ SELECT m.name, uuid::text, m.major_version, m.minor_version FROM modules AS m WHERE m.uuid||'@'||concat_ws('.',m.major_version,m.minor_version) = %s """, (ident_hash,)) module = cursor.fetchone() self.assertEqual(module[0], metadata['title']) self.assertEqual(module[1], id) self.assertEqual(module[2], int(version)) self.assertEqual(module[3], None)
def verify_id_n_version(id, version): """Given an ``id`` and ``version``, verify the identified content exists. """ stmt = _get_sql('verify-id-and-version.sql') args = dict(id=id, version=version) with db_connect() as db_conn: with db_conn.cursor() as cursor: cursor.execute(stmt, args) try: valid = cursor.fetchone()[0] except TypeError: raise NotFound(join_ident_hash(id, version)) return True
def get_id_n_version(ident_hash): """From the given ``ident_hash`` return the id and version.""" try: id, version = split_ident_hash(ident_hash) except IdentHashMissingVersion: # XXX Don't import from views... And don't use httpexceptions from pyramid.httpexceptions import HTTPNotFound from cnxarchive.views.helpers import get_latest_version try: version = get_latest_version(ident_hash) except HTTPNotFound: raise NotFound(ident_hash) id, version = split_ident_hash(join_ident_hash(ident_hash, version)) else: verify_id_n_version(id, version) return id, version
def get_id_n_version(ident_hash): """From the given ``ident_hash`` return the id and version.""" try: id, version = split_ident_hash(ident_hash) except IdentHashMissingVersion: # XXX Don't import from views... And don't use httpexceptions from pyramid.httpexceptions import HTTPNotFound from cnxarchive.views import get_latest_version try: version = get_latest_version(ident_hash) except HTTPNotFound: raise NotFound(ident_hash) id, version = split_ident_hash(join_ident_hash(ident_hash, version)) else: verify_id_n_version(id, version) return id, version
def lookup_module_ident(id, version): """Return the ``module_ident`` for the given ``id`` & major and minor version as a tuple. """ with db_connect() as db_conn: with db_conn.cursor() as cursor: cursor.execute( "SELECT module_ident FROM modules " "WHERE uuid = %s " "AND CONCAT_WS('.', major_version, minor_version) = %s", (id, version)) try: mident = cursor.fetchone()[0] except (IndexError, TypeError): ident_hash = join_ident_hash(id, version) raise RuntimeError("Content at {} does not exist." .format(ident_hash)) return mident
def lookup_module_ident(id, version): """Return the ``module_ident`` for the given ``id`` & major and minor version as a tuple. """ with db_connect() as db_conn: with db_conn.cursor() as cursor: cursor.execute( "SELECT module_ident FROM modules " "WHERE uuid = %s " "AND CONCAT_WS('.', major_version, minor_version) = %s", (id, version)) try: mident = cursor.fetchone()[0] except (IndexError, TypeError): ident_hash = join_ident_hash(id, version) raise RuntimeError( "Content at {} does not exist.".format(ident_hash)) return mident
def check_REVISED_BOOK_in_archive(test_case, cursor): """This checker assumes that the only content in the database is the content within the BOOK and REVISED_BOOK use cases. """ binder = REVISED_BOOK document = REVISED_BOOK[0][0] # Check the module records... cursor.execute("""\ SELECT uuid, moduleid, major_version, minor_version, version FROM modules ORDER BY major_version ASC""") records = {} key_sep = '--' for row in cursor.fetchall(): key = key_sep.join([str(x) for x in row[:2]]) value = list(row[2:]) if key not in records: records[key] = [] records[key].append(value) binder_uuid = split_ident_hash(binder.id)[0] document_uuid = split_ident_hash(document.id)[0] expected_records = { # [uuid, moduleid]: [[major_version, minor_version, version], ...] key_sep.join([binder_uuid, 'col10000']): [ [1, 1, '1.1'], # BOOK [2, 1, '1.2'], # REVISED_BOOK ], key_sep.join([document_uuid, 'm10000']): [ [1, None, '1.1'], [2, None, '1.2'], ], } test_case.assertEqual(expected_records, records) # Check the tree... # This also proves that the REVISED_BOOK is in latest_modules # by virtual of using the tree_to_json function. binder_ident_hash = join_ident_hash(split_ident_hash(binder.id)[0], (2, 1,)) document_ident_hash = join_ident_hash(split_ident_hash(document.id)[0], (2, None,)) expected_tree = { u"id": unicode(binder_ident_hash), u"title": u"Book of Infinity", u"contents": [ {u"id": u"subcol", u"title": REVISED_BOOK[0].metadata['title'], u"contents": [ {u"id": unicode(document_ident_hash), u"title": REVISED_BOOK[0].get_title_for_node(document)}]}]} cursor.execute("""\ SELECT tree_to_json(uuid::text, concat_ws('.', major_version, minor_version)) FROM latest_modules WHERE portal_type = 'Collection'""") tree = json.loads(cursor.fetchone()[0]) test_case.assertEqual(expected_tree, tree) resource_hash = hashlib.new(cnxepub.RESOURCE_HASH_TYPE, _read_file(RESOURCE_ONE_FILEPATH).read()) \ .hexdigest() # FIXME Remove and change assertion after cnx-archive switches to # ``cnxepub.RESOURCE_HASH_TYPE`` as hash. Use ``resource_hash`` in the # check instead of ``file_md5``. file_md5 = hashlib.new('md5', _read_file(RESOURCE_ONE_FILEPATH).read()) \ .hexdigest() cursor.execute("""\ SELECT f.file, mf.mimetype, m.uuid||'@'||concat_ws('.',m.major_version,m.minor_version) FROM files as f natural join module_files as mf, latest_modules as m WHERE mf.module_ident = m.module_ident AND f.md5 = %s""", (file_md5,)) file, mime_type, ident_hash = cursor.fetchone() test_case.assertEqual(mime_type, 'image/png') test_case.assertEqual(ident_hash, document_ident_hash) test_case.assertEqual(file[:], _read_file(RESOURCE_ONE_FILEPATH).read())
def republish_binders(cursor, models): """Republish the Binders that share Documents in the publication context. This needs to be given all the models in the publication context.""" documents = set([]) binders = set([]) history_mapping = {} # <previous-ident-hash>: <current-ident-hash> if not isinstance(models, (list, tuple, set,)): raise TypeError("``models`` Must be a sequence of model objects." \ "We were given: {}".format(models)) for model in models: if isinstance(model, (cnxepub.Binder,)): binders.add(split_ident_hash(model.ident_hash)) for doc in cnxepub.flatten_to_documents(model): documents.add(split_ident_hash(doc.ident_hash)) else: documents.add(split_ident_hash(model.ident_hash)) to_be_republished = [] # What binders are these documents a part of? for (uuid, version) in documents: ident_hash = join_ident_hash(uuid, version) previous_ident_hash = get_previous_publication(cursor, ident_hash) if previous_ident_hash is None: # Has no prior existence. continue else: history_mapping[previous_ident_hash] = ident_hash cursor.execute("""\ WITH RECURSIVE t(nodeid, parent_id, documentid, path) AS ( SELECT tr.nodeid, tr.parent_id, tr.documentid, ARRAY[tr.nodeid] FROM trees tr WHERE tr.documentid = ( SELECT module_ident FROM modules WHERE uuid||'@'||concat_ws('.', major_version, minor_version) = %s) UNION ALL SELECT c.nodeid, c.parent_id, c.documentid, path || ARRAY[c.nodeid] FROM trees c JOIN t ON (c.nodeid = t.parent_id) WHERE not c.nodeid = ANY(t.path) ) SELECT uuid||'@'||concat_ws('.', major_version, minor_version) FROM t JOIN latest_modules m ON (t.documentid = m.module_ident) WHERE t.parent_id IS NULL """, (previous_ident_hash,)) to_be_republished.extend([split_ident_hash(x[0]) for x in cursor.fetchall()]) to_be_republished = set(to_be_republished) republished_ident_hashes = [] # Republish the Collections set. for (uuid, version) in to_be_republished: if (uuid, version,) in binders: # This binder is already in the publication context, # don't try to publish it again. continue ident_hash = join_ident_hash(uuid, version) bumped_version = bump_version(cursor, uuid, is_minor_bump=True) republished_ident_hash = republish_collection(cursor, ident_hash, version=bumped_version) # Set the identifier history. history_mapping[ident_hash] = republished_ident_hash rebuild_collection_tree(cursor, ident_hash, history_mapping) republished_ident_hashes.append(republished_ident_hash) return republished_ident_hashes
def test_document_w_derived_from(self): id, version = '3a70f722-b7b0-4b41-83dd-2790cee98c39', '1' expected_ident_hash = join_ident_hash(id, version) metadata = { 'version': version, 'title': "Dingbat's Dilemma", 'language': 'en-us', 'summary': "The options are limitless.", 'created': '1420-02-03 23:36:20.583149-05', 'revised': '1420-02-03 23:36:20.583149-05', 'license_url': 'http://creativecommons.org/licenses/by/3.0/', 'publishers': [{ 'id': 'ream', 'type': None }], # XXX We don't have a mapping. 'authors': [{ 'id': 'rbates', 'type': 'cnx-id', 'name': 'Richard Bates' }], 'editors': [{ 'id': 'jone', 'type': None }, { 'id': 'kahn', 'type': None }], 'illustrators': [{ 'id': 'AbagaleBates', 'type': None }], # XXX We don't have a mapping. 'translators': [{ 'id': 'RhowandaOkofarBates', 'type': None }, { 'id': 'JamesOrwel', 'type': None }], 'copyright_holders': [{ 'id': 'ream', 'type': None }], 'subjects': ['Business', 'Arts', 'Mathematics and Statistics'], 'keywords': ['dingbat', 'bates', 'dilemma'], 'print_style': '* first print style* ', } publisher = 'ream' message = 'no msg' document = self.make_document(id=id, metadata=metadata) from ..publish import _insert_metadata with self.db_connect() as db_conn: with db_conn.cursor() as cursor: ident_hash = _insert_metadata(cursor, document, publisher, message)[1] self.assertEqual(ident_hash, expected_ident_hash) metadata = { 'title': "Copy of Dingbat's Dilemma", 'language': 'en-us', 'summary': "The options are limitless.", 'created': '1420-02-03 23:36:20.583149-05', 'revised': '1420-02-03 23:36:20.583149-05', 'license_url': 'http://creativecommons.org/licenses/by/3.0/', 'publishers': [{ 'id': 'someone', 'type': None }], # XXX We don't have a mapping. 'authors': [{ 'id': 'someone', 'type': 'cnx-id', 'name': 'Someone' }], 'editors': [], 'illustrators': [], # XXX We don't have a mapping. 'translators': [], 'copyright_holders': [{ 'id': 'someone', 'type': None }], 'subjects': ['Business', 'Arts', 'Mathematics and Statistics'], 'keywords': ['dingbat', 'bates', 'dilemma'], 'derived_from_uri': 'http://cnx.org/contents/{}'.format(ident_hash), 'print_style': '* second print style* ', } publisher = 'someone' message = 'derived a copy' document = self.make_document(metadata=metadata) from ..publish import _insert_metadata with self.db_connect() as db_conn: with db_conn.cursor() as cursor: derived_ident_hash = _insert_metadata(cursor, document, publisher, message)[1] self.assertNotEqual( derived_ident_hash.split('@')[0], ident_hash.split('@')[0]) with self.db_connect() as db_conn: with db_conn.cursor() as cursor: cursor.execute( """\ SELECT m.name, ident_hash(pm.uuid, pm.major_version, pm.minor_version), m.parentauthors FROM modules m JOIN modules pm ON m.parent = pm.module_ident WHERE ident_hash(m.uuid, m.major_version, m.minor_version) = %s """, (derived_ident_hash, )) title, parent, parentauthors = cursor.fetchone() self.assertEqual(title, "Copy of Dingbat's Dilemma") self.assertEqual(parent, ident_hash) self.assertEqual(parentauthors, ['rbates']) with self.db_connect() as db_conn: with db_conn.cursor() as cursor: cursor.execute( "SELECT print_style FROM modules m" " WHERE ident_hash(m.uuid, m.major_version, m.minor_version) = %s", (ident_hash, )) print_style = cursor.fetchone()[0] self.assertEqual(print_style, '* first print style* ') cursor.execute( "SELECT print_style FROM modules m" " WHERE ident_hash(m.uuid, m.major_version, m.minor_version) = %s", (derived_ident_hash, )) print_style = cursor.fetchone()[0] self.assertEqual(print_style, '* second print style* ')
def test_document_w_derived_from(self): id, version = '3a70f722-b7b0-4b41-83dd-2790cee98c39', '1' expected_ident_hash = join_ident_hash(id, version) metadata = { 'version': version, 'title': "Dingbat's Dilemma", 'language': 'en-us', 'summary': "The options are limitless.", 'created': '1420-02-03 23:36:20.583149-05', 'revised': '1420-02-03 23:36:20.583149-05', 'license_url': 'http://creativecommons.org/licenses/by/3.0/', 'publishers': [{'id': 'ream', 'type': None}], # XXX We don't have a mapping. 'authors': [{'id': 'rbates', 'type': 'cnx-id', 'name': 'Richard Bates'},], 'editors': [{'id': 'jone', 'type': None}, {'id': 'kahn', 'type': None}], 'illustrators': [{'id': 'AbagaleBates', 'type': None}], # XXX We don't have a mapping. 'translators': [{'id': 'RhowandaOkofarBates', 'type': None}, {'id': 'JamesOrwel', 'type': None}], 'copyright_holders': [{'id': 'ream', 'type': None}], 'subjects': ['Business', 'Arts', 'Mathematics and Statistics'], 'keywords': ['dingbat', 'bates', 'dilemma'], } publisher = 'ream' message = 'no msg' document = self.make_document(id=id, metadata=metadata) from ..publish import _insert_metadata with self.db_connect() as db_conn: with db_conn.cursor() as cursor: ident_hash = _insert_metadata(cursor, document, publisher, message)[1] self.assertEqual(ident_hash, expected_ident_hash) metadata = { 'title': "Copy of Dingbat's Dilemma", 'language': 'en-us', 'summary': "The options are limitless.", 'created': '1420-02-03 23:36:20.583149-05', 'revised': '1420-02-03 23:36:20.583149-05', 'license_url': 'http://creativecommons.org/licenses/by/3.0/', 'publishers': [{'id': 'someone', 'type': None}], # XXX We don't have a mapping. 'authors': [{'id': 'someone', 'type': 'cnx-id', 'name': 'Someone'},], 'editors': [], 'illustrators': [], # XXX We don't have a mapping. 'translators': [], 'copyright_holders': [{'id': 'someone', 'type': None}], 'subjects': ['Business', 'Arts', 'Mathematics and Statistics'], 'keywords': ['dingbat', 'bates', 'dilemma'], 'derived_from_uri': 'http://cnx.org/contents/{}'.format(ident_hash), } publisher = 'someone' message = 'derived a copy' document = self.make_document(metadata=metadata) from ..publish import _insert_metadata with self.db_connect() as db_conn: with db_conn.cursor() as cursor: derived_ident_hash = _insert_metadata(cursor, document, publisher, message)[1] self.assertNotEqual(derived_ident_hash.split('@')[0], ident_hash.split('@')[0]) with self.db_connect() as db_conn: with db_conn.cursor() as cursor: cursor.execute("""\ SELECT m.name, pm.uuid || '@' || concat_ws('.', pm.major_version, pm.minor_version), m.parentauthors FROM modules m JOIN modules pm ON m.parent = pm.module_ident WHERE m.uuid || '@' || concat_ws('.', m.major_version, m.minor_version) = %s """, (derived_ident_hash,)) title, parent, parentauthors = cursor.fetchone() self.assertEqual(title, "Copy of Dingbat's Dilemma") self.assertEqual(parent, ident_hash) self.assertEqual(parentauthors, ['rbates'])