Esempio n. 1
0
 def are_contained_publishable(self):
     """Flag to say whether any contained models are publishable.
     """
     has_publishable_docs = False
     for doc in cnxepub.flatten_to_documents(self):
         has_publishable_docs = has_publishable_docs or doc.is_publishable
     return has_publishable_docs
def main():
    input_assembled_file = Path(sys.argv[1]).resolve(strict=True)
    uuid_to_revised_path = Path(sys.argv[2]).resolve(strict=True)
    output_file_path = sys.argv[3]

    with open(uuid_to_revised_path, 'r') as f:
        uuid_to_revised_map = json.load(f)

    json_data = {}

    with open(input_assembled_file, "r") as in_file:
        binder = reconstitute(in_file)

    for doc in flatten_to_documents(binder):
        abstract = doc.metadata.get("summary")
        # Use the map revised value if available, otherwise expect it from the
        # metadata parsed from the assembled XHTML
        revised = uuid_to_revised_map.get(doc.id) or doc.metadata["revised"]
        json_data[doc.ident_hash] = {
            "abstract": abstract,
            "revised": utils.ensure_isoformat(revised)
        }

    with open(output_file_path, "w") as out_file:
        json.dump(json_data, out_file)
Esempio n. 3
0
 def are_contained_publishable(self):
     """Flag to say whether any contained models are publishable.
     """
     has_publishable_docs = False
     for doc in cnxepub.flatten_to_documents(self):
         has_publishable_docs = has_publishable_docs or doc.is_publishable
     return has_publishable_docs
def create_canonical_map(binders):
    """Create a canonical book map from a set of binders"""
    canonical_map = {}

    for binder in binders:
        for doc in flatten_to_documents(binder):
            canonical_map[doc.id] = doc.metadata['canonical_book_uuid']

    return canonical_map
Esempio n. 5
0
 def publish_prep(self):
     license = self.metadata['license']
     self.metadata['license_url'] = license.url
     self.metadata['license_text'] = ' '.join([license.name, license.abbr, license.version])
     self.metadata['summary'] = self.metadata['abstract']
     self.set_uri('cnx-archive', self.id)
     documents = []
     for document in cnxepub.flatten_to_documents(self):
         if document.id not in documents:
             documents.append(document.id)
             document.publish_prep()
Esempio n. 6
0
def provide_supporting_files(input_dir, output_dir, binder):
    documents = {doc.id: doc for doc in flatten_to_documents(binder)}
    id_to_filepath_mapping = scan_for_id_mapping(input_dir)
    id_to_filepath_mapping.update(scan_for_uuid_mapping(input_dir))
    for id, filepath in id_to_filepath_mapping.items():
        if id in documents:
            if (output_dir / id).exists():
                (output_dir / id).unlink()
            (output_dir / id).symlink_to(
                relative_path(filepath.parent, output_dir))
            with (output_dir / '{}.xhtml'.format(id)).open('wb') as fb:
                fb.write(bytes(HTMLFormatter(documents[id])))
Esempio n. 7
0
    def publish_prep(self):
        license = self.metadata['license']
        self.metadata['license_url'] = license.url
        self.metadata['license_text'] = ' '.join(
            [license.name, license.code, license.version])
        self.metadata['summary'] = self.metadata['abstract']
        if self.metadata['print_style'] == 'default':
            self.metadata['print_style'] = None

        self.set_uri('cnx-archive', self.id)
        documents = []
        for document in cnxepub.flatten_to_documents(self):
            if document.id not in documents:
                documents.append(document.id)
                document.publish_prep()
def main():
    """Main function"""
    xhtml_file = Path(sys.argv[1]).resolve(strict=True)
    metadata_file = Path(sys.argv[2]).resolve(strict=True)
    book_slug = sys.argv[3]
    out_dir = Path(sys.argv[4])

    with open(xhtml_file, "rb") as file:
        html_root = etree.parse(file)
        binder = reconstitute(file)
        slugs = extract_slugs_from_binder(binder)

    with open(metadata_file, "r") as baked_json:
        baked_metadata = json.load(baked_json)
        book_toc_metadata = baked_metadata.get(binder.ident_hash)

    nav = html_root.xpath("//xhtml:nav",
                          namespaces=HTML_DOCUMENT_NAMESPACES)[0]

    toc_maker = ElementMaker(namespace=None,
                             nsmap={None: "http://www.w3.org/1999/xhtml"})
    toc = toc_maker.html(E.head(E.title("Table of Contents")), E.body(nav))

    nav_links = toc.xpath("//xhtml:a", namespaces=HTML_DOCUMENT_NAMESPACES)

    for doc in flatten_to_documents(binder):
        id_with_context = f'{binder.ident_hash}:{doc.id}'

        module_etree = content_to_etree(doc.content)
        for link in nav_links:
            link_href = link.attrib['href']
            if not link_href.startswith('#'):
                continue
            if module_etree.xpath(
                    f"/xhtml:body/xhtml:div[@id='{link_href[1:]}']",
                    namespaces=HTML_DOCUMENT_NAMESPACES):
                link.attrib['href'] = f'./{id_with_context}.xhtml'

        # Add metadata to same-book-different-module links.
        # The module in which same-book link targets reside is only fully known
        # at time of disassembly. Different pipelines can make use of this
        # metadata in different ways
        for node in module_etree.xpath(
                '//xhtml:a[@href and starts-with(@href, "/contents/")]',
                namespaces=HTML_DOCUMENT_NAMESPACES):
            print('BEFORE:')
            print(node.attrib)

            page_link = node.attrib["href"].split("/")[-1]
            # Link may have fragment
            if "#" in page_link:
                page_uuid, page_fragment = page_link.split("#")
            else:
                page_uuid = page_link
                page_fragment = ''

            # This is either an intra-book link or inter-book link. We can
            # differentiate the latter by data-book-uuid attrib).
            if not node.attrib.get("data-book-uuid"):
                node.attrib["data-page-slug"] = slugs.get(page_uuid)
                node.attrib["data-page-uuid"] = page_uuid
                node.attrib["data-page-fragment"] = page_fragment

            print('AFTER:')
            print(node.attrib)

        doc.content = etree_to_content(module_etree)

        # Inject some styling and JS for QA
        xml_parser = etree.XMLParser(ns_clean=True)
        root = etree.XML(bytes(DocumentContentFormatter(doc)), xml_parser)
        head = root.xpath("//xhtml:head", namespaces=HTML_DOCUMENT_NAMESPACES)

        if not head:
            head = etree.Element("head")
            root.insert(0, head)

        style = etree.Element("style")
        script = etree.Element("script")

        style.text = u'''
            /* STYLING_FOR_DEVS */
            /* Linking to a specific element should highlight the element */
            :target {
                background-color: #ffffcc;
                border: 1px dotted #000000;

                animation-name: cssAnimation;
                animation-duration: 10s;
                animation-timing-function: ease-out;
                animation-delay: 0s;
                animation-fill-mode: forwards;
            }
            @keyframes cssAnimation {
                to {
                    background-color: initial;
                    border: initial;
                }
            }

            /* Style footnotes so that they stand out */
            [role="doc-footnote"] {
                background-color: #ffcccc;
                border: 1px dashed #ff0000;
            }
            [role="doc-footnote"]:before { content: "FOOTNOTE " ; }

            /* Show a permalink when hovering over a heading or paragraph */
            *:not(:hover) > a.-dev-permalinker { display: none; }
            * > a.-dev-permalinker {
                margin-left: .1rem;
                text-decoration: none;
            }
        '''

        script.text = u'''//<![CDATA[
            // SCRIPTS_FOR_DEVS
            window.addEventListener('load', () => {
                const pilcrow = '¶'

                function addPermalink(parent, id) {
                    const link = window.document.createElement('a')
                    link.classList.add('-dev-permalinker')
                    link.setAttribute('href', '#' + id)
                    link.textContent = pilcrow
                    parent.appendChild(link)
                }

                const paragraphs = Array.from(
                    document.querySelectorAll('p[id]')
                )
                paragraphs.forEach(p => addPermalink(p, p.getAttribute('id')) )

                const headings = Array.from(
                    document.querySelectorAll(
                        '*[id] > h1, *[id] > h2, *[id] > h3, ' +
                        '*[id] > h4, *[id] > h5, *[id] > h6'
                    )
                )
                headings.forEach(h => addPermalink(
                    h, h.parentElement.getAttribute('id'))
                )
            })
        // ]]>'''

        head.append(style)
        head.append(script)

        with open(f"{out_dir / id_with_context}.xhtml", "wb") as out:
            out.write(etree.tostring(root))

        with open(f"{out_dir / id_with_context}-metadata.json",
                  "w") as json_out:
            # Incorporate metadata from disassemble step while setting defaults
            # for cases like composite pages which may not have metadata from
            # previous stages
            json_metadata = {
                "slug": slugs.get(doc.id),
                "title": doc.metadata.get("title"),
                "abstract": None,
                "id": doc.id,
                "revised": datetime.now(timezone.utc).isoformat()
            }

            # Add / override metadata from baking if available
            json_metadata.update(baked_metadata.get(doc.ident_hash, {}))

            json.dump(json_metadata, json_out)

    with open(f"{out_dir}/{book_slug}.toc.xhtml", "wb") as out:
        out.write(etree.tostring(toc, encoding="utf8", pretty_print=True))

    with open(f"{out_dir}/{book_slug}.toc-metadata.json", "w") as toc_json:
        json.dump(book_toc_metadata, toc_json)
Esempio n. 9
0
    def test_from_git_collection_xml(self, git_collection_data):
        filepath = git_collection_data / 'collection.xml'

        # Hit the target
        binder = Binder.from_collection_xml(filepath)

        # Verify the tree structure
        expected_tree = {
            'contents': [
                {'id': '[email protected]',
                 'shortId': None,
                 'title': 'Preface'},
                {'contents': [{'id': 'd93df8ff-6e4a-4a5e-befc-ba5a144f309c@',
                               'shortId': None,
                               'title': 'Introduction'},
                              {'id': 'cb418599-f69b-46c1-b0ef-60d9e36e677f@',
                               'shortId': None,
                               'title': 'Definitions of '
                               'Statistics, Probability, '
                               'and Key Terms'},
                              {'id': '[email protected]',
                               'shortId': None,
                               'title': 'Data, Sampling, and '
                               'Variation in Data and '
                               'Sampling'},
                              {'id': '3fb20c92-9515-420b-ab5e-6de221b89e99@',
                               'shortId': None,
                               'title': 'Frequency, Frequency '
                               'Tables, and Levels of '
                               'Measurement'},
                              {'id': '[email protected]',
                               'shortId': None,
                               'title': 'Experimental Design and '
                               'Ethics'}],
                 'id': 'subcol',
                 'shortId': None,
                 'title': 'Sampling and Data'},
                {'contents': [{'id': '[email protected]',
                               'shortId': None,
                               'title': 'Introduction'},
                              {'id': '[email protected]',
                               'shortId': None,
                               'title': 'Stem-and-Leaf Graphs '
                               '(Stemplots), Line Graphs, '
                               'and Bar Graphs'}],
                 'id': 'subcol',
                 'shortId': None,
                 'title': 'Descriptive Statistics'},
                {'id': '[email protected]',
                 'shortId': None,
                 'title': 'Review Exercises (Ch 3-13)'},
                {'id': '[email protected]',
                 'shortId': None,
                 'title': 'Practice Tests (1-4) and Final Exams'},
                {'id': '[email protected]',
                 'shortId': None,
                 'title': 'Data Sets'}],
            'id': '30189442-6998-4686-ac05-ed152b91b9de@af89d35',
            'shortId': None,
            'title': 'Introductory Statistics',
        }
        assert model_to_tree(binder) == expected_tree

        # Verify the metadata
        expected_metadata = {
            'authors': [],
            'cnx-archive-shortid': None,
            'cnx-archive-uri': '30189442-6998-4686-ac05-ed152b91b9de@af89d35',
            'copyright_holders': [],
            'created': None,
            'derived_from_title': None,
            'derived_from_uri': None,
            'editors': [],
            'illustrators': [],
            'keywords': (),
            'language': None,
            'license_text': 'Creative Commons Attribution License',
            'license_url': 'http://creativecommons.org/licenses/by/4.0/',
            'print_style': 'statistics',
            'publishers': [],
            'revised': '2019-02-22T14:15:14.840187-06:00',
            'subjects': (),
            'summary': None,
            'title': 'Introductory Statistics',
            'translators': [],
            'version': 'af89d35',
            'uuid': '30189442-6998-4686-ac05-ed152b91b9de',
            'canonical_book_uuid': None,
            'slug': 'introductory-statistics',
        }
        assert binder.metadata == expected_metadata

        # Verify documents have been created
        expected = [
            'd93df8ff-6e4a-4a5e-befc-ba5a144f309c',
            'cb418599-f69b-46c1-b0ef-60d9e36e677f',
            '3fb20c92-9515-420b-ab5e-6de221b89e99'
        ]
        assert [x.id for x in flatten_to_documents(binder)] == expected

        # Verify the collection title overrides
        custom_title_doc = [
            doc
            for doc in flatten_to_documents(binder)
            if doc.id == 'd93df8ff-6e4a-4a5e-befc-ba5a144f309c'
        ][0]
        # the page believes its title is...
        title = 'Introduction to Statistics'
        assert custom_title_doc.metadata['title'] == title
        # ...and the book believes the title is...
        title = 'Introduction'
        assert binder[1].get_title_for_node(custom_title_doc) == title

        # Verify the DocumentPointer objects have a title set on the object
        doc_pt = binder[0]
        title = 'Preface'
        assert doc_pt.metadata['title'] == title

        # Verify cnx-archive-uri is set in modules with metadata
        expected = {
            '3fb20c92-9515-420b-ab5e-6de221b89e99':
                '3fb20c92-9515-420b-ab5e-6de221b89e99@',
            'cb418599-f69b-46c1-b0ef-60d9e36e677f':
                'cb418599-f69b-46c1-b0ef-60d9e36e677f@',
            'd93df8ff-6e4a-4a5e-befc-ba5a144f309c':
                'd93df8ff-6e4a-4a5e-befc-ba5a144f309c@'
        }
        for doc in flatten_to_documents(binder):
            assert expected.get(doc.id)
            assert expected[doc.id] == doc.metadata['cnx-archive-uri']
Esempio n. 10
0
    def test_from_collection_xml(self, neb_collection_data):
        filepath = neb_collection_data / 'collection.xml'

        # Hit the target
        binder = Binder.from_collection_xml(filepath)

        # Verify the tree structure
        expected_tree = {
            'contents': [
                {'id': '[email protected]',
                 'shortId': None,
                 'title': 'Preface'},
                {'contents': [{'id': 'd93df8ff-6e4a-4a5e-befc-ba5a144f309c@14',
                               'shortId': None,
                               'title': 'Introduction'},
                              {'id': 'cb418599-f69b-46c1-b0ef-60d9e36e677f@12',
                               'shortId': None,
                               'title': 'Definitions of '
                               'Statistics, Probability, '
                               'and Key Terms'},
                              {'id': '[email protected]',
                               'shortId': None,
                               'title': 'Data, Sampling, and '
                               'Variation in Data and '
                               'Sampling'},
                              {'id': '3fb20c92-9515-420b-ab5e-6de221b89e99@17',
                               'shortId': None,
                               'title': 'Frequency, Frequency '
                               'Tables, and Levels of '
                               'Measurement'},
                              {'id': '[email protected]',
                               'shortId': None,
                               'title': 'Experimental Design and '
                               'Ethics'}],
                 'id': 'subcol',
                 'shortId': None,
                 'title': 'Sampling and Data'},
                {'contents': [{'id': '[email protected]',
                               'shortId': None,
                               'title': 'Introduction'},
                              {'id': '[email protected]',
                               'shortId': None,
                               'title': 'Stem-and-Leaf Graphs '
                               '(Stemplots), Line Graphs, '
                               'and Bar Graphs'}],
                 'id': 'subcol',
                 'shortId': None,
                 'title': 'Descriptive Statistics'},
                {'id': '[email protected]',
                 'shortId': None,
                 'title': 'Review Exercises (Ch 3-13)'},
                {'id': '[email protected]',
                 'shortId': None,
                 'title': 'Practice Tests (1-4) and Final Exams'},
                {'id': '[email protected]',
                 'shortId': None,
                 'title': 'Data Sets'}],
            'id': '[email protected]',
            'shortId': None,
            'title': 'Introductory Statistics',
        }
        assert model_to_tree(binder) == expected_tree

        # Verify the metadata
        expected_metadata = {
            'authors': [{'id': 'OpenStaxCollege',
                         'name': 'OpenStaxCollege',
                         'type': 'cnx-id'}],
            'cnx-archive-shortid': None,
            'cnx-archive-uri': '[email protected]',
            'copyright_holders': [{'id': 'OpenStaxCollege',
                                   'name': 'OpenStaxCollege',
                                   'type': 'cnx-id'}],
            'created': '2013-07-18T19:30:26-05:00',
            'derived_from_title': 'Principles of Economics',
            'derived_from_uri': 'https://legacy.cnx.org/content/col11613/1.2',
            'editors': [],
            'illustrators': [],
            'keywords': (),
            'language': 'en',
            'license_text': 'Creative Commons Attribution License',
            'license_url': 'http://creativecommons.org/licenses/by/4.0/',
            'print_style': 'statistics',
            'publishers': [{'id': 'OpenStaxCollege',
                            'name': 'OpenStaxCollege',
                            'type': 'cnx-id'},
                           {'id': 'cnxstats',
                            'name': 'cnxstats',
                            'type': 'cnx-id'}],
            'revised': '2019-02-22T14:15:14.840187-06:00',
            # FIXME: Subject from derived-from is duplicated here
            # This is a problem with the cnxml library, not neb
            # Same problem will exist with keywords and potentially roles
            'subjects': ('Mathematics and Statistics',
                         'Mathematics and Statistics'),
            'summary': None,
            'title': 'Introductory Statistics',
            'translators': [],
            'version': '23.41',
            'uuid': None,
            'canonical_book_uuid': None,
            'slug': None,
        }
        assert binder.metadata == expected_metadata

        # Verify documents have been created
        expected = [
            'd93df8ff-6e4a-4a5e-befc-ba5a144f309c',
            'cb418599-f69b-46c1-b0ef-60d9e36e677f',
            '3fb20c92-9515-420b-ab5e-6de221b89e99'
        ]
        assert [x.id for x in flatten_to_documents(binder)] == expected

        # Verify the collection title overrides
        custom_title_doc = [
            doc
            for doc in flatten_to_documents(binder)
            if doc.id == 'd93df8ff-6e4a-4a5e-befc-ba5a144f309c'
        ][0]
        # the page believes its title is...
        title = 'Introduction to Statistics'
        assert custom_title_doc.metadata['title'] == title
        # ...and the book believes the title is...
        title = 'Introduction'
        assert binder[1].get_title_for_node(custom_title_doc) == title

        # Verify the DocumentPointer objects have a title set on the object
        doc_pt = binder[0]
        title = 'Preface'
        assert doc_pt.metadata['title'] == title

        # Verify cnx-archive-uri is set in modules with metadata
        expected = {
            '3fb20c92-9515-420b-ab5e-6de221b89e99':
                '3fb20c92-9515-420b-ab5e-6de221b89e99@17',
            'cb418599-f69b-46c1-b0ef-60d9e36e677f':
                'cb418599-f69b-46c1-b0ef-60d9e36e677f@12',
            'd93df8ff-6e4a-4a5e-befc-ba5a144f309c':
                'd93df8ff-6e4a-4a5e-befc-ba5a144f309c@14'
        }
        for doc in flatten_to_documents(binder):
            assert expected.get(doc.id)
            assert expected[doc.id] == doc.metadata['cnx-archive-uri']

        # Verify reference uris are updated based upon metadata
        expected = {
            'd93df8ff-6e4a-4a5e-befc-ba5a144f309c': [
                'd93df8ff-6e4a-4a5e-befc-ba5a144f309c/CNX_Stats_C01_COs.jpg'
            ],
            'cb418599-f69b-46c1-b0ef-60d9e36e677f': [
                'cb418599-f69b-46c1-b0ef-60d9e36e677f/fig-ch01_02_01n.png',
                'cb418599-f69b-46c1-b0ef-60d9e36e677f'
                '/m16020_DotPlot_description.html',
                'cb418599-f69b-46c1-b0ef-60d9e36e677f'
                '/m16020_DotPlot_description.html'
            ],
            '3fb20c92-9515-420b-ab5e-6de221b89e99': [
                '/contents/[email protected]',
                'http://en.wikibooks.org/',
                '3fb20c92-9515-420b-ab5e-6de221b89e99'
                '/CNX_Stats_C01_M10_003.jpg',
                'foobar.png',
                '/contents/cb418599-f69b-46c1-b0ef-60d9e36e677f',
                '/contents/d93df8ff-6e4a-4a5e-befc-ba5a144f309c#pagelocation'
            ]
        }

        for doc in flatten_to_documents(binder):
            assert expected.get(doc.id)
            for reference in doc.references:
                assert reference.uri in expected[doc.id]
import sys
import json
from cnxepub.collation import reconstitute
from cnxepub.models import flatten_to_documents

in_path, out_path = sys.argv[1:3]

json_data = {}

with open(in_path, "r") as in_file:
    binder = reconstitute(in_file)

for doc in flatten_to_documents(binder):
    abstract = doc.metadata.get("summary")
    json_data[doc.ident_hash] = {"abstract": abstract}

with open(out_path, "w") as out_file:
    json.dump(json_data, out_file)