def treat_metadatas(name, metadatas): found_origin_id = False found_script = False script_name = "{}-{}".format( "antidot-html-connector", pkg_resources.get_distribution("antidot-html-connector").version) new_metadatas = [] for metadata in metadatas: if metadata.key == "ft:forcedOriginId": LOGGER.debug("Forcing the origin ID to '%s'.", metadata.first_value) name = metadata.first_value found_origin_id = True else: if metadata.key == METADATA_SCRIPT: found_script = True metadata = Metadata.string( METADATA_SCRIPT, ["{}-{}".format(metadata.first_value, script_name)]) new_metadatas.append(metadata) if not found_script: new_metadatas.append(Metadata.string(METADATA_SCRIPT, [script_name])) if logging.WARNING and not found_origin_id: LOGGER.warning( "For <%s>, we used a default origin_id based on the file name and its metadatas." " Sending the same file with the same metadata will replace it.", name, ) return name, new_metadatas
def test_excluded_metadata_name(self): """We can change metadata without affecting the default name created from metadata""" ignored_metadatas = [ Metadata.string(METADATA_SCRIPT, ["this test script"]), Metadata.string("style-map-hash", [hash("STYLE_MAP")]), ] _, name = get_html_from_path(Path(__file__), ignored_metadatas) _, other_name = get_html_from_path(Path(__file__), []) self.assertEqual(name, other_name, "The ignored metadata should not affect the name!")
def test_treat_metadatas(self): """We change the title when we use the foredOriginId metdata. """ forced_name = "This is the forced origin_id" self.metadatas.append( Metadata.string("ft:forcedOriginId", [forced_name])) new_name, new_metadatas = treat_metadatas("name", self.metadatas) expected_metadatas = [ Metadata.string( METADATA_SCRIPT, {"this test script-{}".format(self.connector_version)}) ] self.assertEqual(forced_name, new_name) self.assertEqual(new_metadatas, expected_metadatas)
def test_no_metadata_script(self): """We add script metadata when there is nothing.""" new_name, new_metadatas = treat_metadatas("name", []) expected_metadatas = [ Metadata.string(METADATA_SCRIPT, {self.connector_version}) ] self.assertEqual("name", new_name) self.assertEqual(new_metadatas, expected_metadatas)
def test_publish_with_meta(self): client = RemoteClient(url="url", authentication=LoginAuthentication( "login", "password"), source_id="source_id") origin_id = "Forced joke origin id" title = "THE LITTLE FORCED TITLE IN THE PRAIRIE" metadatas = [ Metadata.string("ft:forcedTitle", [title]), Metadata.string("ft:forcedOriginId", [origin_id]) ] response = publish_html_with_client(HTML_PATHS[0], client, metadatas=metadatas) self.assertIsNotNone(response.publications) self.assertEqual(response.publications.id, origin_id) self.assertEqual(response.publications.title, title) self.assertEqual(len(response.publications.metadata), 1)
def setUp(self) -> None: self.temp_sys = sys.argv self.script_name = "script_name" self.connector_version = "antidot-html-connector-{}".format( pkg_resources.get_distribution("antidot-html-connector").version) self.metadatas = [ Metadata.string(METADATA_SCRIPT, ["this test script"]) ] sys.argv = [self.script_name, "myhtml.html"]
def test_preexiting_metadata_script(self): """We add script metadata when we already have one""" new_name, new_metadatas = treat_metadatas("name", self.metadatas) expected_metadatas = [ Metadata.string( METADATA_SCRIPT, {"this test script-{}".format(self.connector_version)}) ] self.assertEqual("name", new_name) self.assertEqual(new_metadatas, expected_metadatas)
def test_get_html_from_path(self): """Origin id is not gigantic even with a lot of metadata""" metadatas = [] for i in range(1000): metadatas.append(Metadata.string("i", [-i, "long"])) _, name = get_html_from_path(Path(__file__), metadatas) self.assertLess(len(name), ORIGIN_ID_MAX_SIZE) _, other_name = get_html_from_path(Path(__file__), metadatas[:-1]) self.assertNotEqual(name, other_name, "Different metadatas, same origin id !")
def ft_content_from_html_content(html_content, title, use_ftml, render_cover_page, path): if use_ftml and not FTML_AVAILABLE: raise ModuleNotFoundError( "Please install the FTML connector in order to use FTML.") if use_ftml: topic = TopicBuilder().title(Metadata.title(title)).content( html_content).origin_id("0").build() topics = [TopicsSplitter().split(topic)] nodes = PublicationConverter().convert_toc(topics) content = StructuredContent(toc=nodes, editorial_type=EditorialType.DEFAULT) resources = [] else: if path: splitter = HtmlSplitter(path=path) else: splitter = HtmlSplitter(content=html_content) html2topics = HtmlToTopics(splitter, render_cover_page=render_cover_page) content = StructuredContent(toc=(html2topics.topics), editorial_type=EditorialType.DEFAULT) resources = html2topics.resources return content, resources