def build_digest(file_name, temp_dir="tmp", digest_config=None, image_file_name=None): "build a digest object from a DOCX input file" digest = None docx_file_name, zip_image_file_name = handle_zip(file_name, temp_dir) LOGGER.info("build_digest file '%s' has docx_file_name: '%s'", file_name, docx_file_name) LOGGER.info( "build_digest file '%s' has zip_image_file_name: '%s'", file_name, zip_image_file_name, ) if not image_file_name: image_file_name = zip_image_file_name content = parse_content(docx_file_name) if content: digest = Digest() digest.author = build_author(content) digest.title = build_title(content) digest.summary = build_summary(content) digest.keywords = build_keywords(content) digest.manuscript_number = build_manuscript_number(content) digest.doi = build_doi(content, digest_config) digest.text = build_text(content) digest.image = build_image(content, image_file_name) return digest
def test_docx_file_name(self, test_data): "docx output file name tests for various input" # build the Digest object digest = Digest() digest.author = test_data.get("author") digest.doi = test_data.get("doi") # set the config, if using in the test digest_config = None if test_data.get("use_config"): digest_config = parse_raw_config( raw_config(test_data.get("config_section"))) # generate the file_name file_name = output.docx_file_name(digest, digest_config) # test assertion self.assertEqual( file_name, test_data.get("expected_file_name"), u"failed in scenario '{scenario}', got file_name {file_name}". format(scenario=test_data.get("scenario"), file_name=file_name), ) # test for creating the file on disk full_file_name = os.path.join("tmp", file_name) output_file_name = output.digest_docx(digest, full_file_name) self.assertEqual( os.path.join("tmp", test_data.get("expected_file_name")), output_file_name, u"failed creating file in scenario '{scenario}', got file_name {file_name}" .format(scenario=test_data.get("scenario"), file_name=output_file_name), )
def test_digest_figure_image_url(self): "test figure image url formatting" image = build_image(file_value="test.jpg") digest = Digest() digest.doi = "10.7554/eLife.99999" expected = u"https://iiif.elifesciences.org/digests/99999%2Ftest.jpg/full/full/0/default.jpg" self.assertEqual( medium_post.digest_figure_image_url(self.digest_config, image, digest), expected, )
def test_digest_json_published_value(self): "test json output for a digest with a published value" digest = Digest() digest.published = "2018-10-29" expected = OrderedDict([ ("id", "None"), ("title", None), ("impactStatement", None), ("published", "2018-10-29"), ("content", []), ]) self.assertEqual(json_output.digest_json(digest, None), expected)
def test_digest_docx(self): "test digest_docx directly for coverage of setting bold tags" output_dir = "tmp" output_file_name = "bold_tag_test.docx" text = ["<b>Test</b>"] expected_content = "DIGEST\n<b>Test</b>\n" digest = Digest() digest.text = text full_file_name = os.path.join(output_dir, output_file_name) docx_file = output.digest_docx(digest, full_file_name) output_content = parse_content(docx_file) self.assertEqual(output_content, expected_content)
def test_digest_json_empty(self): "test json output for an empty digest where there is no text or image file" digest = Digest() # reset some lists to None for testing digest.text = None digest.keywords = None digest.subjects = None expected = OrderedDict([ ("id", "None"), ("title", None), ("impactStatement", None), ("content", []), ]) self.assertEqual(json_output.digest_json(digest, None), expected)
def test_digest_jats(self): "simple test to convert digest text to JATS XML content" digest = Digest() digest.text = [ "First <b>paragraph</b>.", "Second <i>paragraph</i>.", "", " ", None, ] expected_content = ("<p>First <bold>paragraph</bold>.</p><p>Second " + "<italic>paragraph</italic>.</p>") jats_content = jats.digest_jats(digest) self.assertEqual(jats_content, expected_content)
def test_digest_figure_content(self): "test figure caption formatting" image = build_image(caption=u"Caption. Anonymous (CC BY\xa04.0)", file_value="test.jpg") digest = Digest() digest.doi = "10.7554/eLife.99999" expected = ( u"<figure>" + u'<img src="https://iiif.elifesciences.org/digests/99999%2Ftest.jpg/full/full/0/default.jpg" />' + u"<figcaption>Caption. Anonymous (CC BY\xa04.0)</figcaption></figure>" ) self.assertEqual( medium_post.digest_figure_content(self.digest_config, image, digest), expected, )