def publish_litezip(struct, submission, db_conn): """Publish the contents of a litezip structured set of data. :param struct: a litezip struct from (probably from :func:`litezip.parse_litezip`) :param submission: a two value tuple containing a userid and submit message :type submission: tuple :param db_conn: a database connection object :type db_conn: :class:`sqlalchemy.engine.Connection` """ # Dissect objects from litezip struct. try: collection = [x for x in struct if isinstance(x, Collection)][0] except IndexError: # pragma: no cover raise NotImplementedError('litezip without collection') id_map = {} # pragma: no cover # Parse Collection tree to update the newly published Modules. with collection.file.open('rb') as fb: xml = etree.parse(fb) # Publish the Modules. for module in [x for x in struct if isinstance(x, Module)]: metadata = parse_module_metadata(module) old_id = module.id try: (id, version), ident = publish_legacy_page(module, metadata, submission, db_conn) id_map[old_id] = (id, version) # Update the Collection tree xpath = '//col:module[@document="{}"]'.format(old_id) for elm in xml.xpath(xpath, namespaces=COLLECTION_NSMAP): elm.attrib['document'] = id version_attrib_name = ( '{{{}}}version-at-this-collection-version' .format(COLLECTION_NSMAP['cnxorg'])) legacy_version = convert_version_to_legacy_version(version) elm.attrib[version_attrib_name] = legacy_version except Unchanged: pass # only publish content that has changed. modules_changed = bool(id_map) if modules_changed: # Rebuild the Collection tree from the newly published Modules. with collection.file.open('wb') as fb: fb.write(etree.tounicode(xml).encode('utf8')) # Maybe publish the Collection. metadata = parse_collection_metadata(collection) old_id = collection.id (id, version), ident = publish_legacy_book( collection, metadata, submission, db_conn, modules_changed=modules_changed) id_map[old_id] = (id, version) return id_map
def test_publish_with_no_significant_changes_raises_unchanged( content_util, persist_util, app, db_engines, db_tables): """MAKING CHANGES other than those which cause a major or minor version rev DOES cause `Unchanged` to be raised. NOTE: If this test ever fails in the future for no apparent reason, feel free to delete it because there is an existing view test which ultimately tests the same outcome, in test_legacy_publishing.py#test_publishing_no_significant_changes See: https://github.com/openstax/cnx/issues/325 """ collection, tree, modules = content_util.gen_collection() collection = persist_util.insert_collection(collection) metadata = parse_collection_metadata(collection) # TARGET with element_tree_from_model(collection) as xml: elem = xml.xpath('//md:created', namespaces=COLLECTION_NSMAP)[0] elem.text = 'something different' with pytest.raises(Unchanged): with db_engines['common'].begin() as conn: (id, version), ident = publish_legacy_book( collection, metadata, ( 'user1', 'test publish', ), conn, )
def test_publish_revision_with_new_resources(content_util, persist_util, app, db_engines, db_tables): # Insert initial collection and modules. resources = list([content_util.gen_resource() for x in range(0, 2)]) collection, tree, modules = content_util.gen_collection( resources=resources) modules = list([persist_util.insert_module(m) for m in modules]) collection, tree, modules = content_util.rebuild_collection( collection, tree) collection = persist_util.insert_collection(collection) metadata = parse_collection_metadata(collection) filename = 'book-cover.png' book_cover_img = generate_random_image_by_size(10) book_cover = content_util.gen_resource( data=book_cover_img, filename=filename, media_type='image/png', ) collection.resources.append(book_cover) # Collect control data for this current version stmt = (db_tables.module_files.join(db_tables.files).select().where( db_tables.modules.c.moduleid == metadata.id)) control_data = db_engines['common'].execute(stmt).fetchall() # Ensure the resource is not in the content # prior to our publication control_files = {x.filename: x for x in control_data} assert book_cover.filename not in control_files # make it publishable with element_tree_from_model(collection) as xml: elem = xml.xpath('//md:title', namespaces=COLLECTION_NSMAP)[0] elem.text = 'a different collection title' # TARGET with db_engines['common'].begin() as conn: (id, version), ident = publish_legacy_book( collection, metadata, ( 'user1', 'test publish', ), conn, ) # Check for file insertion stmt = (db_tables.module_files.join(db_tables.files).select().where( db_tables.module_files.c.module_ident == ident)) result = db_engines['common'].execute(stmt).fetchall() files = {x.filename: x for x in result} assert book_cover.filename in files assert files[book_cover.filename].sha1 == book_cover.sha1 assert files[book_cover.filename].file == book_cover.data.read_bytes()
def test_publish_revision_with_created_value_changed(content_util, persist_util, app, db_engines, db_tables): # Insert initial collection and modules. resources = list([content_util.gen_resource() for x in range(0, 2)]) collection, tree, modules = content_util.gen_collection( resources=resources) modules = list([persist_util.insert_module(m) for m in modules]) collection, tree, modules = content_util.rebuild_collection( collection, tree) collection = persist_util.insert_collection(collection) with element_tree_from_model(collection) as xml: elm = xml.xpath('//md:created', namespaces=COLLECTION_NSMAP)[0] actual_created = parse_date(elm.text) changed_created = actual_created - (timedelta(days=365) * 8) elm.text = changed_created.isoformat() metadata = parse_collection_metadata(collection) # Collect control data for non-legacy metadata stmt = (db_tables.modules.select().where( db_tables.modules.c.moduleid == metadata.id)) control_metadata = db_engines['common'].execute(stmt).fetchone() assert control_metadata.created == actual_created # Insert a new module ... new_module = content_util.gen_module() new_module = persist_util.insert_module(new_module) # ... remove second element from the tree ... tree.pop(1) # ... and append the new module to the tree. tree.append(content_util.make_tree_node_from(new_module)) collection, tree, modules = content_util.rebuild_collection( collection, tree) # TARGET with db_engines['common'].begin() as conn: (id, version), ident = publish_legacy_book( collection, metadata, ( 'user1', 'test publish', ), conn, ) # Check core metadata insertion stmt = (db_tables.modules.select().where( db_tables.modules.c.module_ident == ident)) result = db_engines['common'].execute(stmt).fetchone() assert result.created == control_metadata.created assert result.created != changed_created
def insert_collection(self, model, _derived_from=None): # This is validly used here because the tests associated with # this parser functions are outside the scope of persistent # actions dealing with the database. from press.parsers import parse_collection_metadata metadata = parse_collection_metadata(model) engine = self.db_engines['common'] t = self.db_tables # Anything inserted with this tool must already have a valid id assert metadata.id is not None with engine.begin() as trans: if self._already_exists(trans, model, metadata): return model # Insert metadata ident, id = self._insert_module_metadata( trans, metadata, 'Collection', _derived_from=_derived_from, ) # Rewrite the content with the id with model.file.open('rb') as fb: xml = etree.parse(fb) elm = xml.xpath('//md:content-id', namespaces=COLLECTION_NSMAP)[0] elm.text = id with model.file.open('wb') as fb: fb.write(etree.tounicode(xml).encode('utf8')) # Insert content files with model.file.open('rb') as fb: result = trans.execute(t.files.insert().values( file=fb.read(), media_type='text/xml', )) fileid = result.inserted_primary_key[0] result = trans.execute(t.module_files.insert().values( module_ident=ident, fileid=fileid, filename='collection.xml', )) self._set_state(trans, metadata.id, metadata.version, 'current') # Insert resource files (recipes, cover image, etc.) for resource in model.resources: self._insert_module_file(resource, ident) return Collection(id, model.file, model.resources)
def test_parse_colletion_metdata_without_print_style(tmpdir, litezip_valid_litezip): working_dir = tmpdir.mkdir('col') collection_file = working_dir.join('collection.xml') # Copy over and modify the collection.xml file. with (litezip_valid_litezip / 'collection.xml').open() as origin: xml = etree.parse(origin) elm = xml.xpath('//col:param[@name="print-style"]', namespaces=COLLECTION_NSMAP)[0] elm.getparent().remove(elm) collection_file.write(etree.tounicode(xml).encode('utf8')) assert 'print-style' not in collection_file.read() # Test the parser doesn't error when a print-style is missing. # given a Collection object, model = parse_collection(Path(working_dir)) # parse the metadata into a CollectionMetadata, md = parse_collection_metadata(model) assert md.print_style is None
def test_parse_collection_metadata(litezip_valid_litezip): # given a Collection object, model = parse_collection(litezip_valid_litezip) # parse the metadata into a CollectionMetadata, md = parse_collection_metadata(model) # which we then test for data point information assert md.id == 'col11405' assert md.version == '1.2' assert md.created == '2011/05/24 10:31:56.888 GMT-5' assert md.revised == '2013/03/11 22:52:33.244 GMT-5' assert md.title == 'Intro to Computational Engineering: Elec 220 Labs' assert md.license_url == 'http://creativecommons.org/licenses/by/3.0/' assert md.language == 'en' assert md.authors == ('mwjhnsn', 'jedifan42') assert md.maintainers == ('mwjhnsn', 'jedifan42', 'cavallar') assert md.licensors == ('mwjhnsn', 'jedifan42', 'cavallar') assert md.keywords == ( 'Calculator', 'Cavallaro', 'Elec 220', 'Gate', 'Interrupt', 'LC-3', 'Loop', 'Microcontroller', 'MSP 430', 'Rice', ) assert md.subjects == ('Science and Technology', ) assert md.abstract == ("This collection houses all the documentation " "for the lab component of Rice Universities Elec " "220 lab component. The labs cover topics such " "as gates, simulation, basic digital I/O, " "interrupt driven embedded programming, C " "language programming, and finally a/d " "interfacing and touch sensors.") # This test case uses ``value=""`` in the xml, so a value is found. assert md.print_style is None
def test_publish_revision_that_overwrites_existing_resources( content_util, persist_util, app, db_engines, db_tables): # Insert initial collection and modules. resources = list([content_util.gen_resource() for x in range(0, 2)]) # Create a book-cover resource book_cover_filename = 'book-cover.png' book_cover_media_type = 'image/png' book_cover_img = generate_random_image_by_size(10) book_cover = content_util.gen_resource( data=book_cover_img, filename=book_cover_filename, media_type=book_cover_media_type, ) resources.append(book_cover) collection, tree, modules = content_util.gen_collection( resources=resources) modules = list([persist_util.insert_module(m) for m in modules]) collection, tree, modules = content_util.rebuild_collection( collection, tree) collection = persist_util.insert_collection(collection) metadata = parse_collection_metadata(collection) # Overide the existing book-cover resource new_book_cover_img = generate_random_image_by_size(10) new_book_cover = content_util.gen_resource( data=new_book_cover_img, filename=book_cover_filename, media_type=book_cover_media_type, ) collection.resources.pop() # pop off the old book-cover collection.resources.append(new_book_cover) assert book_cover.sha1 != new_book_cover.sha1 # Collect control data for this current version stmt = (db_tables.module_files.join(db_tables.files).select().where( db_tables.modules.c.moduleid == metadata.id)) control_data = db_engines['common'].execute(stmt).fetchall() # Ensure the replaced resource really was in the content # prior to our publication control_files = {x.filename: x for x in control_data} replaced_file_record = control_files[book_cover_filename] # FIXME: temporarily skipping this check. Re-enable when we add a full # implementation of the collxml diffying code. assert replaced_file_record.sha1 == book_cover.sha1 assert replaced_file_record.file == book_cover.data.read_bytes() # make it publishable with element_tree_from_model(collection) as xml: elem = xml.xpath('//md:title', namespaces=COLLECTION_NSMAP)[0] elem.text = 'a different collection title' # TARGET with db_engines['common'].begin() as conn: (id, version), ident = publish_legacy_book( collection, metadata, ( 'user1', 'test publish', ), conn, ) # Check for file insertion stmt = (db_tables.module_files.join(db_tables.files).select().where( db_tables.module_files.c.module_ident == ident)) result = db_engines['common'].execute(stmt).fetchall() files = {x.filename: x for x in result} assert new_book_cover.filename in files assert files[new_book_cover.filename].sha1 == new_book_cover.sha1 assert files[new_book_cover.filename].file == \ new_book_cover.data.read_bytes()
def test_publish_derived(content_util, persist_util, app, db_engines, db_tables): # Insert initial collection and modules. resources = list([content_util.gen_resource() for x in range(0, 2)]) collection, tree, modules = content_util.gen_collection( resources=resources) modules = list([persist_util.insert_module(m) for m in modules]) collection, tree, modules = content_util.rebuild_collection( collection, tree) collection = persist_util.insert_collection(collection) metadata = parse_collection_metadata(collection) # Derive a copy of the collection derived_collection = persist_util.derive_from(collection) derived_metadata = parse_collection_metadata(derived_collection) # Collect control data for non-legacy metadata stmt = (db_tables.modules.select().where( db_tables.modules.c.moduleid == derived_metadata.id)) control_metadata = db_engines['common'].execute(stmt).fetchone() # Make some change to the collection xml, # because republishing an unchanged collection is no longer valid with derived_collection.file.open('r+') as fb: new_content = fb.read().replace('Derived copy of', 'Deerived copy of') fb.seek(0) fb.write(new_content) # TARGET with db_engines['common'].begin() as conn: now = conn.execute('SELECT CURRENT_TIMESTAMP as now').fetchone().now (id, version), ident = publish_legacy_book( derived_collection, derived_metadata, ( 'user1', 'test publish', ), conn, ) # Lookup parent collection's metadata (ident and authors) # for parentage checks against the derived-copy metadata. stmt = (db_tables.latest_modules.select().where( db_tables.latest_modules.c.moduleid == collection.id)) parent_metadata_result = db_engines['common'].execute(stmt).fetchone() # Check core metadata insertion stmt = (db_tables.modules.join(db_tables.abstracts).select().where( db_tables.modules.c.module_ident == ident)) result = db_engines['common'].execute(stmt).fetchone() assert result.uuid == control_metadata.uuid assert result.major_version == 2 assert result.minor_version == 1 assert result.version == '1.2' assert result.abstract == derived_metadata.abstract assert result.created == parse_date(derived_metadata.created) assert result.revised == now assert result.portal_type == 'Collection' assert result.name == derived_metadata.title assert result.licenseid == 13 assert result.print_style == derived_metadata.print_style assert result.submitter == 'user1' assert result.submitlog == 'test publish' assert result.authors == list(derived_metadata.authors) assert result.maintainers == list(derived_metadata.maintainers) assert result.licensors == list(derived_metadata.licensors) assert result.google_analytics == GOOGLE_ANALYTICS_CODE # Check for derived metadata (parent and parent authors) assert result.parent == parent_metadata_result.module_ident assert result.parentauthors == parent_metadata_result.authors # Check the derived-from metadata tag was correctly inserted # into the document. stmt = (db_tables.module_files.join(db_tables.files).select().where( db_tables.module_files.c.module_ident == ident).where( db_tables.module_files.c.filename == 'collection.xml')) collection_doc = db_engines['common'].execute(stmt).fetchone() expected = bytes( ('<md:derived-from url="http://cnx.org/content/{}/{}"/>'.format( metadata.id, metadata.version)), 'utf-8', ) assert expected in collection_doc.file
def test_publish_revision_base_case(content_util, persist_util, app, db_engines, db_tables): # Insert initial collection and modules. resources = list([content_util.gen_resource() for x in range(0, 2)]) collection, tree, modules = content_util.gen_collection( resources=resources) modules = list([persist_util.insert_module(m) for m in modules]) collection, tree, modules = content_util.rebuild_collection( collection, tree) collection = persist_util.insert_collection(collection) metadata = parse_collection_metadata(collection) # Collect control data for non-legacy metadata stmt = (db_tables.modules.select().where( db_tables.modules.c.moduleid == metadata.id)) control_metadata = db_engines['common'].execute(stmt).fetchone() # Insert a new module (will cause a major version rev!) new_module = content_util.gen_module() new_module = persist_util.insert_module(new_module) # ... remove second element from the tree ... tree.pop(1) # ... and append the new module to the tree. tree.append(content_util.make_tree_node_from(new_module)) collection, tree, modules = content_util.rebuild_collection( collection, tree) # TARGET with db_engines['common'].begin() as conn: now = conn.execute('SELECT CURRENT_TIMESTAMP as now').fetchone().now (id, version), ident = publish_legacy_book( collection, metadata, ( 'user1', 'test publish', ), conn, ) # Check core metadata insertion stmt = (db_tables.modules.select().where( db_tables.modules.c.module_ident == ident)) result = db_engines['common'].execute(stmt).fetchone() assert result.uuid == control_metadata.uuid assert result.major_version == 2 assert result.minor_version == 1 assert result.version == '1.2' assert result.abstractid == control_metadata.abstractid assert result.created == parse_date(metadata.created) assert result.revised == now assert result.portal_type == 'Collection' assert result.name == metadata.title assert result.licenseid == 13 assert result.print_style == metadata.print_style assert result.submitter == 'user1' assert result.submitlog == 'test publish' assert result.authors == list(metadata.authors) assert result.maintainers == list(metadata.maintainers) assert result.licensors == list(metadata.licensors) assert result.google_analytics == GOOGLE_ANALYTICS_CODE # Check subject metadata insertion stmt = (db_tables.moduletags.join(db_tables.tags).select().where( db_tables.moduletags.c.module_ident == ident)) results = db_engines['common'].execute(stmt) subjects = [x.tag for x in results] assert sorted(subjects) == sorted(metadata.subjects) # Check keyword metadata insertion stmt = (db_tables.modulekeywords.join(db_tables.keywords).select().where( db_tables.modulekeywords.c.module_ident == ident)) results = db_engines['common'].execute(stmt) keywords = [x.word for x in results] assert sorted(keywords) == sorted(metadata.keywords) # Check for file insertion stmt = (db_tables.module_files.join(db_tables.files).select().where( db_tables.module_files.c.module_ident == ident)) result = db_engines['common'].execute(stmt).fetchall() filenames = [x.filename for x in result] assert len(filenames) == len(resources) + 1 # content file assert 'collection.xml' in filenames # Check for resource file insertion for resource in resources: assert resource.filename in filenames # Check the tree for accuracy (even though this is out of scope) stmt = (text( "SELECT tree_to_json_for_legacy(" " m.uuid::text, " " concat_ws('.', m.major_version, m.minor_version)::text" ")::json " "FROM modules AS m " "WHERE m.module_ident = :module_ident").bindparams(module_ident=ident)) inserted_tree = db_engines['common'].execute(stmt).fetchone()[0] compare_legacy_tree_similarity(inserted_tree['contents'], tree)
def _parse_collection_metadata(self, *args, **kwargs): # The parser is validly used here because it is unit tested # without using this utility. from press.parsers import parse_collection_metadata return parse_collection_metadata(*args, **kwargs)