Python parse_collection_metadata 예제들, press.parsers.parse_collection_metadata Python 예제들

예제 #1

0

파일 보기

파일: litezip.py 프로젝트: openstax/cnx-press

def publish_litezip(struct, submission, db_conn):
    """Publish the contents of a litezip structured set of data.

    :param struct: a litezip struct from (probably from
                   :func:`litezip.parse_litezip`)
    :param submission: a two value tuple containing a userid
                       and submit message
    :type submission: tuple
    :param db_conn: a database connection object
    :type db_conn: :class:`sqlalchemy.engine.Connection`

    """
    # Dissect objects from litezip struct.
    try:
        collection = [x for x in struct if isinstance(x, Collection)][0]
    except IndexError:  # pragma: no cover
        raise NotImplementedError('litezip without collection')

    id_map = {}  # pragma: no cover

    # Parse Collection tree to update the newly published Modules.
    with collection.file.open('rb') as fb:
        xml = etree.parse(fb)

    # Publish the Modules.
    for module in [x for x in struct if isinstance(x, Module)]:
        metadata = parse_module_metadata(module)
        old_id = module.id

        try:
            (id, version), ident = publish_legacy_page(module, metadata,
                                                       submission, db_conn)
            id_map[old_id] = (id, version)
            # Update the Collection tree
            xpath = '//col:module[@document="{}"]'.format(old_id)
            for elm in xml.xpath(xpath, namespaces=COLLECTION_NSMAP):
                elm.attrib['document'] = id
                version_attrib_name = (
                    '{{{}}}version-at-this-collection-version'
                    .format(COLLECTION_NSMAP['cnxorg']))
                legacy_version = convert_version_to_legacy_version(version)
                elm.attrib[version_attrib_name] = legacy_version
        except Unchanged:
            pass  # only publish content that has changed.

    modules_changed = bool(id_map)
    if modules_changed:
        # Rebuild the Collection tree from the newly published Modules.
        with collection.file.open('wb') as fb:
            fb.write(etree.tounicode(xml).encode('utf8'))

    # Maybe publish the Collection.
    metadata = parse_collection_metadata(collection)
    old_id = collection.id
    (id, version), ident = publish_legacy_book(
        collection, metadata, submission, db_conn,
        modules_changed=modules_changed)
    id_map[old_id] = (id, version)

    return id_map

예제 #2

0

파일 보기

파일: test_collection.py 프로젝트: openstax/cnx-press

def test_publish_with_no_significant_changes_raises_unchanged(
        content_util, persist_util, app, db_engines, db_tables):
    """MAKING CHANGES other than those which cause a major or minor version rev
     DOES cause `Unchanged` to be raised.

    NOTE: If this test ever fails in the future for no apparent reason, feel
    free to delete it because there is an existing view test which ultimately
    tests the same outcome, in
    test_legacy_publishing.py#test_publishing_no_significant_changes

    See: https://github.com/openstax/cnx/issues/325
    """
    collection, tree, modules = content_util.gen_collection()
    collection = persist_util.insert_collection(collection)
    metadata = parse_collection_metadata(collection)

    # TARGET
    with element_tree_from_model(collection) as xml:
        elem = xml.xpath('//md:created', namespaces=COLLECTION_NSMAP)[0]
        elem.text = 'something different'

    with pytest.raises(Unchanged):
        with db_engines['common'].begin() as conn:
            (id, version), ident = publish_legacy_book(
                collection,
                metadata,
                (
                    'user1',
                    'test publish',
                ),
                conn,
            )

예제 #3

0

파일 보기

파일: test_collection.py 프로젝트: openstax/cnx-press

def test_publish_revision_with_new_resources(content_util, persist_util, app,
                                             db_engines, db_tables):
    # Insert initial collection and modules.
    resources = list([content_util.gen_resource() for x in range(0, 2)])
    collection, tree, modules = content_util.gen_collection(
        resources=resources)
    modules = list([persist_util.insert_module(m) for m in modules])
    collection, tree, modules = content_util.rebuild_collection(
        collection, tree)
    collection = persist_util.insert_collection(collection)
    metadata = parse_collection_metadata(collection)

    filename = 'book-cover.png'
    book_cover_img = generate_random_image_by_size(10)
    book_cover = content_util.gen_resource(
        data=book_cover_img,
        filename=filename,
        media_type='image/png',
    )
    collection.resources.append(book_cover)

    # Collect control data for this current version
    stmt = (db_tables.module_files.join(db_tables.files).select().where(
        db_tables.modules.c.moduleid == metadata.id))
    control_data = db_engines['common'].execute(stmt).fetchall()

    # Ensure the resource is not in the content
    # prior to our publication
    control_files = {x.filename: x for x in control_data}
    assert book_cover.filename not in control_files

    # make it publishable
    with element_tree_from_model(collection) as xml:
        elem = xml.xpath('//md:title', namespaces=COLLECTION_NSMAP)[0]
        elem.text = 'a different collection title'

    # TARGET
    with db_engines['common'].begin() as conn:
        (id, version), ident = publish_legacy_book(
            collection,
            metadata,
            (
                'user1',
                'test publish',
            ),
            conn,
        )

    # Check for file insertion
    stmt = (db_tables.module_files.join(db_tables.files).select().where(
        db_tables.module_files.c.module_ident == ident))
    result = db_engines['common'].execute(stmt).fetchall()
    files = {x.filename: x for x in result}
    assert book_cover.filename in files
    assert files[book_cover.filename].sha1 == book_cover.sha1
    assert files[book_cover.filename].file == book_cover.data.read_bytes()

예제 #4

0

파일 보기

파일: test_collection.py 프로젝트: openstax/cnx-press

def test_publish_revision_with_created_value_changed(content_util,
                                                     persist_util, app,
                                                     db_engines, db_tables):
    # Insert initial collection and modules.
    resources = list([content_util.gen_resource() for x in range(0, 2)])
    collection, tree, modules = content_util.gen_collection(
        resources=resources)
    modules = list([persist_util.insert_module(m) for m in modules])
    collection, tree, modules = content_util.rebuild_collection(
        collection, tree)
    collection = persist_util.insert_collection(collection)

    with element_tree_from_model(collection) as xml:
        elm = xml.xpath('//md:created', namespaces=COLLECTION_NSMAP)[0]
        actual_created = parse_date(elm.text)
        changed_created = actual_created - (timedelta(days=365) * 8)
        elm.text = changed_created.isoformat()

    metadata = parse_collection_metadata(collection)

    # Collect control data for non-legacy metadata
    stmt = (db_tables.modules.select().where(
        db_tables.modules.c.moduleid == metadata.id))
    control_metadata = db_engines['common'].execute(stmt).fetchone()
    assert control_metadata.created == actual_created

    # Insert a new module ...
    new_module = content_util.gen_module()
    new_module = persist_util.insert_module(new_module)
    # ... remove second element from the tree ...
    tree.pop(1)
    # ... and append the new module to the tree.
    tree.append(content_util.make_tree_node_from(new_module))
    collection, tree, modules = content_util.rebuild_collection(
        collection, tree)

    # TARGET
    with db_engines['common'].begin() as conn:
        (id, version), ident = publish_legacy_book(
            collection,
            metadata,
            (
                'user1',
                'test publish',
            ),
            conn,
        )

    # Check core metadata insertion
    stmt = (db_tables.modules.select().where(
        db_tables.modules.c.module_ident == ident))
    result = db_engines['common'].execute(stmt).fetchone()
    assert result.created == control_metadata.created
    assert result.created != changed_created

예제 #5

0

파일 보기

    def insert_collection(self, model, _derived_from=None):
        # This is validly used here because the tests associated with
        # this parser functions are outside the scope of persistent
        # actions dealing with the database.
        from press.parsers import parse_collection_metadata
        metadata = parse_collection_metadata(model)

        engine = self.db_engines['common']
        t = self.db_tables

        # Anything inserted with this tool must already have a valid id
        assert metadata.id is not None

        with engine.begin() as trans:
            if self._already_exists(trans, model, metadata):
                return model

            # Insert metadata
            ident, id = self._insert_module_metadata(
                trans,
                metadata,
                'Collection',
                _derived_from=_derived_from,
            )

            # Rewrite the content with the id
            with model.file.open('rb') as fb:
                xml = etree.parse(fb)
            elm = xml.xpath('//md:content-id', namespaces=COLLECTION_NSMAP)[0]
            elm.text = id
            with model.file.open('wb') as fb:
                fb.write(etree.tounicode(xml).encode('utf8'))

            # Insert content files
            with model.file.open('rb') as fb:
                result = trans.execute(t.files.insert().values(
                    file=fb.read(),
                    media_type='text/xml',
                ))
            fileid = result.inserted_primary_key[0]
            result = trans.execute(t.module_files.insert().values(
                module_ident=ident,
                fileid=fileid,
                filename='collection.xml',
            ))
            self._set_state(trans, metadata.id, metadata.version, 'current')

        # Insert resource files (recipes, cover image, etc.)
        for resource in model.resources:
            self._insert_module_file(resource, ident)

        return Collection(id, model.file, model.resources)

예제 #6

0

파일 보기

파일: test_collection.py 프로젝트: openstax/cnx-press

def test_parse_colletion_metdata_without_print_style(tmpdir,
                                                     litezip_valid_litezip):
    working_dir = tmpdir.mkdir('col')
    collection_file = working_dir.join('collection.xml')
    # Copy over and modify the collection.xml file.
    with (litezip_valid_litezip / 'collection.xml').open() as origin:
        xml = etree.parse(origin)
        elm = xml.xpath('//col:param[@name="print-style"]',
                        namespaces=COLLECTION_NSMAP)[0]
        elm.getparent().remove(elm)
        collection_file.write(etree.tounicode(xml).encode('utf8'))
    assert 'print-style' not in collection_file.read()

    # Test the parser doesn't error when a print-style is missing.
    # given a Collection object,
    model = parse_collection(Path(working_dir))
    # parse the metadata into a CollectionMetadata,
    md = parse_collection_metadata(model)
    assert md.print_style is None

예제 #7

0

파일 보기

파일: test_collection.py 프로젝트: openstax/cnx-press

def test_parse_collection_metadata(litezip_valid_litezip):
    # given a Collection object,
    model = parse_collection(litezip_valid_litezip)
    # parse the metadata into a CollectionMetadata,
    md = parse_collection_metadata(model)
    # which we then test for data point information

    assert md.id == 'col11405'
    assert md.version == '1.2'
    assert md.created == '2011/05/24 10:31:56.888 GMT-5'
    assert md.revised == '2013/03/11 22:52:33.244 GMT-5'
    assert md.title == 'Intro to Computational Engineering: Elec 220 Labs'
    assert md.license_url == 'http://creativecommons.org/licenses/by/3.0/'
    assert md.language == 'en'

    assert md.authors == ('mwjhnsn', 'jedifan42')
    assert md.maintainers == ('mwjhnsn', 'jedifan42', 'cavallar')
    assert md.licensors == ('mwjhnsn', 'jedifan42', 'cavallar')

    assert md.keywords == (
        'Calculator',
        'Cavallaro',
        'Elec 220',
        'Gate',
        'Interrupt',
        'LC-3',
        'Loop',
        'Microcontroller',
        'MSP 430',
        'Rice',
    )
    assert md.subjects == ('Science and Technology', )

    assert md.abstract == ("This collection houses all the documentation "
                           "for the lab component of Rice Universities Elec "
                           "220 lab component.  The labs cover topics such "
                           "as gates, simulation, basic digital I/O, "
                           "interrupt driven embedded programming, C "
                           "language programming, and finally a/d "
                           "interfacing and touch sensors.")
    # This test case uses ``value=""`` in the xml, so a value is found.
    assert md.print_style is None

예제 #8

0

파일 보기

파일: test_collection.py 프로젝트: openstax/cnx-press

def test_publish_revision_that_overwrites_existing_resources(
        content_util, persist_util, app, db_engines, db_tables):
    # Insert initial collection and modules.
    resources = list([content_util.gen_resource() for x in range(0, 2)])

    # Create a book-cover resource
    book_cover_filename = 'book-cover.png'
    book_cover_media_type = 'image/png'
    book_cover_img = generate_random_image_by_size(10)
    book_cover = content_util.gen_resource(
        data=book_cover_img,
        filename=book_cover_filename,
        media_type=book_cover_media_type,
    )
    resources.append(book_cover)

    collection, tree, modules = content_util.gen_collection(
        resources=resources)
    modules = list([persist_util.insert_module(m) for m in modules])
    collection, tree, modules = content_util.rebuild_collection(
        collection, tree)
    collection = persist_util.insert_collection(collection)
    metadata = parse_collection_metadata(collection)

    # Overide the existing book-cover resource
    new_book_cover_img = generate_random_image_by_size(10)
    new_book_cover = content_util.gen_resource(
        data=new_book_cover_img,
        filename=book_cover_filename,
        media_type=book_cover_media_type,
    )
    collection.resources.pop()  # pop off the old book-cover
    collection.resources.append(new_book_cover)
    assert book_cover.sha1 != new_book_cover.sha1

    # Collect control data for this current version
    stmt = (db_tables.module_files.join(db_tables.files).select().where(
        db_tables.modules.c.moduleid == metadata.id))
    control_data = db_engines['common'].execute(stmt).fetchall()

    # Ensure the replaced resource really was in the content
    # prior to our publication
    control_files = {x.filename: x for x in control_data}
    replaced_file_record = control_files[book_cover_filename]

    # FIXME: temporarily skipping this check. Re-enable when we add a full
    #        implementation of the collxml diffying code.
    assert replaced_file_record.sha1 == book_cover.sha1

    assert replaced_file_record.file == book_cover.data.read_bytes()

    # make it publishable
    with element_tree_from_model(collection) as xml:
        elem = xml.xpath('//md:title', namespaces=COLLECTION_NSMAP)[0]
        elem.text = 'a different collection title'

    # TARGET
    with db_engines['common'].begin() as conn:
        (id, version), ident = publish_legacy_book(
            collection,
            metadata,
            (
                'user1',
                'test publish',
            ),
            conn,
        )

    # Check for file insertion
    stmt = (db_tables.module_files.join(db_tables.files).select().where(
        db_tables.module_files.c.module_ident == ident))
    result = db_engines['common'].execute(stmt).fetchall()
    files = {x.filename: x for x in result}
    assert new_book_cover.filename in files
    assert files[new_book_cover.filename].sha1 == new_book_cover.sha1
    assert files[new_book_cover.filename].file == \
        new_book_cover.data.read_bytes()

예제 #9

0

파일 보기

파일: test_collection.py 프로젝트: openstax/cnx-press

def test_publish_derived(content_util, persist_util, app, db_engines,
                         db_tables):
    # Insert initial collection and modules.
    resources = list([content_util.gen_resource() for x in range(0, 2)])
    collection, tree, modules = content_util.gen_collection(
        resources=resources)
    modules = list([persist_util.insert_module(m) for m in modules])
    collection, tree, modules = content_util.rebuild_collection(
        collection, tree)
    collection = persist_util.insert_collection(collection)
    metadata = parse_collection_metadata(collection)

    # Derive a copy of the collection
    derived_collection = persist_util.derive_from(collection)
    derived_metadata = parse_collection_metadata(derived_collection)

    # Collect control data for non-legacy metadata
    stmt = (db_tables.modules.select().where(
        db_tables.modules.c.moduleid == derived_metadata.id))
    control_metadata = db_engines['common'].execute(stmt).fetchone()

    # Make some change to the collection xml,
    # because republishing an unchanged collection is no longer valid
    with derived_collection.file.open('r+') as fb:
        new_content = fb.read().replace('Derived copy of', 'Deerived copy of')
        fb.seek(0)
        fb.write(new_content)

    # TARGET
    with db_engines['common'].begin() as conn:
        now = conn.execute('SELECT CURRENT_TIMESTAMP as now').fetchone().now
        (id, version), ident = publish_legacy_book(
            derived_collection,
            derived_metadata,
            (
                'user1',
                'test publish',
            ),
            conn,
        )

    # Lookup parent collection's metadata (ident and authors)
    # for parentage checks against the derived-copy metadata.
    stmt = (db_tables.latest_modules.select().where(
        db_tables.latest_modules.c.moduleid == collection.id))
    parent_metadata_result = db_engines['common'].execute(stmt).fetchone()

    # Check core metadata insertion
    stmt = (db_tables.modules.join(db_tables.abstracts).select().where(
        db_tables.modules.c.module_ident == ident))
    result = db_engines['common'].execute(stmt).fetchone()
    assert result.uuid == control_metadata.uuid
    assert result.major_version == 2
    assert result.minor_version == 1
    assert result.version == '1.2'
    assert result.abstract == derived_metadata.abstract
    assert result.created == parse_date(derived_metadata.created)
    assert result.revised == now
    assert result.portal_type == 'Collection'
    assert result.name == derived_metadata.title
    assert result.licenseid == 13
    assert result.print_style == derived_metadata.print_style
    assert result.submitter == 'user1'
    assert result.submitlog == 'test publish'
    assert result.authors == list(derived_metadata.authors)
    assert result.maintainers == list(derived_metadata.maintainers)
    assert result.licensors == list(derived_metadata.licensors)
    assert result.google_analytics == GOOGLE_ANALYTICS_CODE

    # Check for derived metadata (parent and parent authors)
    assert result.parent == parent_metadata_result.module_ident
    assert result.parentauthors == parent_metadata_result.authors

    # Check the derived-from metadata tag was correctly inserted
    # into the document.
    stmt = (db_tables.module_files.join(db_tables.files).select().where(
        db_tables.module_files.c.module_ident == ident).where(
            db_tables.module_files.c.filename == 'collection.xml'))
    collection_doc = db_engines['common'].execute(stmt).fetchone()
    expected = bytes(
        ('<md:derived-from url="http://cnx.org/content/{}/{}"/>'.format(
            metadata.id, metadata.version)),
        'utf-8',
    )
    assert expected in collection_doc.file

예제 #10

0

파일 보기

파일: test_collection.py 프로젝트: openstax/cnx-press

def test_publish_revision_base_case(content_util, persist_util, app,
                                    db_engines, db_tables):
    # Insert initial collection and modules.
    resources = list([content_util.gen_resource() for x in range(0, 2)])
    collection, tree, modules = content_util.gen_collection(
        resources=resources)
    modules = list([persist_util.insert_module(m) for m in modules])
    collection, tree, modules = content_util.rebuild_collection(
        collection, tree)
    collection = persist_util.insert_collection(collection)
    metadata = parse_collection_metadata(collection)

    # Collect control data for non-legacy metadata
    stmt = (db_tables.modules.select().where(
        db_tables.modules.c.moduleid == metadata.id))
    control_metadata = db_engines['common'].execute(stmt).fetchone()

    # Insert a new module (will cause a major version rev!)
    new_module = content_util.gen_module()
    new_module = persist_util.insert_module(new_module)
    # ... remove second element from the tree ...
    tree.pop(1)
    # ... and append the new module to the tree.
    tree.append(content_util.make_tree_node_from(new_module))
    collection, tree, modules = content_util.rebuild_collection(
        collection, tree)

    # TARGET
    with db_engines['common'].begin() as conn:
        now = conn.execute('SELECT CURRENT_TIMESTAMP as now').fetchone().now
        (id, version), ident = publish_legacy_book(
            collection,
            metadata,
            (
                'user1',
                'test publish',
            ),
            conn,
        )

    # Check core metadata insertion
    stmt = (db_tables.modules.select().where(
        db_tables.modules.c.module_ident == ident))
    result = db_engines['common'].execute(stmt).fetchone()
    assert result.uuid == control_metadata.uuid
    assert result.major_version == 2
    assert result.minor_version == 1
    assert result.version == '1.2'
    assert result.abstractid == control_metadata.abstractid
    assert result.created == parse_date(metadata.created)
    assert result.revised == now
    assert result.portal_type == 'Collection'
    assert result.name == metadata.title
    assert result.licenseid == 13
    assert result.print_style == metadata.print_style
    assert result.submitter == 'user1'
    assert result.submitlog == 'test publish'
    assert result.authors == list(metadata.authors)
    assert result.maintainers == list(metadata.maintainers)
    assert result.licensors == list(metadata.licensors)
    assert result.google_analytics == GOOGLE_ANALYTICS_CODE

    # Check subject metadata insertion
    stmt = (db_tables.moduletags.join(db_tables.tags).select().where(
        db_tables.moduletags.c.module_ident == ident))
    results = db_engines['common'].execute(stmt)
    subjects = [x.tag for x in results]
    assert sorted(subjects) == sorted(metadata.subjects)

    # Check keyword metadata insertion
    stmt = (db_tables.modulekeywords.join(db_tables.keywords).select().where(
        db_tables.modulekeywords.c.module_ident == ident))
    results = db_engines['common'].execute(stmt)
    keywords = [x.word for x in results]
    assert sorted(keywords) == sorted(metadata.keywords)

    # Check for file insertion
    stmt = (db_tables.module_files.join(db_tables.files).select().where(
        db_tables.module_files.c.module_ident == ident))
    result = db_engines['common'].execute(stmt).fetchall()
    filenames = [x.filename for x in result]
    assert len(filenames) == len(resources) + 1  # content file
    assert 'collection.xml' in filenames

    # Check for resource file insertion
    for resource in resources:
        assert resource.filename in filenames

    # Check the tree for accuracy (even though this is out of scope)
    stmt = (text(
        "SELECT tree_to_json_for_legacy("
        "  m.uuid::text, "
        "  concat_ws('.', m.major_version, m.minor_version)::text"
        ")::json "
        "FROM modules AS m "
        "WHERE m.module_ident = :module_ident").bindparams(module_ident=ident))
    inserted_tree = db_engines['common'].execute(stmt).fetchone()[0]
    compare_legacy_tree_similarity(inserted_tree['contents'], tree)

예제 #11

0

파일 보기

 def _parse_collection_metadata(self, *args, **kwargs):
     # The parser is validly used here because it is unit tested
     # without using this utility.
     from press.parsers import parse_collection_metadata
     return parse_collection_metadata(*args, **kwargs)