예제 #1
0
def treat_metadatas(name, metadatas):
    found_origin_id = False
    found_script = False
    script_name = "{}-{}".format(
        "antidot-html-connector",
        pkg_resources.get_distribution("antidot-html-connector").version)
    new_metadatas = []
    for metadata in metadatas:
        if metadata.key == "ft:forcedOriginId":
            LOGGER.debug("Forcing the origin ID to '%s'.",
                         metadata.first_value)
            name = metadata.first_value
            found_origin_id = True
        else:
            if metadata.key == METADATA_SCRIPT:
                found_script = True
                metadata = Metadata.string(
                    METADATA_SCRIPT,
                    ["{}-{}".format(metadata.first_value, script_name)])
            new_metadatas.append(metadata)
    if not found_script:
        new_metadatas.append(Metadata.string(METADATA_SCRIPT, [script_name]))
    if logging.WARNING and not found_origin_id:
        LOGGER.warning(
            "For <%s>, we used a default origin_id based on the file name and its metadatas."
            " Sending the same file with the same metadata will replace it.",
            name,
        )
    return name, new_metadatas
예제 #2
0
 def test_excluded_metadata_name(self):
     """We can change metadata without affecting the default name created from metadata"""
     ignored_metadatas = [
         Metadata.string(METADATA_SCRIPT, ["this test script"]),
         Metadata.string("style-map-hash", [hash("STYLE_MAP")]),
     ]
     _, name = get_html_from_path(Path(__file__), ignored_metadatas)
     _, other_name = get_html_from_path(Path(__file__), [])
     self.assertEqual(name, other_name,
                      "The ignored metadata should not affect the name!")
예제 #3
0
 def test_treat_metadatas(self):
     """We change the title when we use the foredOriginId metdata. """
     forced_name = "This is the forced origin_id"
     self.metadatas.append(
         Metadata.string("ft:forcedOriginId", [forced_name]))
     new_name, new_metadatas = treat_metadatas("name", self.metadatas)
     expected_metadatas = [
         Metadata.string(
             METADATA_SCRIPT,
             {"this test script-{}".format(self.connector_version)})
     ]
     self.assertEqual(forced_name, new_name)
     self.assertEqual(new_metadatas, expected_metadatas)
예제 #4
0
 def test_no_metadata_script(self):
     """We add script metadata when there is nothing."""
     new_name, new_metadatas = treat_metadatas("name", [])
     expected_metadatas = [
         Metadata.string(METADATA_SCRIPT, {self.connector_version})
     ]
     self.assertEqual("name", new_name)
     self.assertEqual(new_metadatas, expected_metadatas)
 def test_publish_with_meta(self):
     client = RemoteClient(url="url",
                           authentication=LoginAuthentication(
                               "login", "password"),
                           source_id="source_id")
     origin_id = "Forced joke origin id"
     title = "THE LITTLE FORCED TITLE IN THE PRAIRIE"
     metadatas = [
         Metadata.string("ft:forcedTitle", [title]),
         Metadata.string("ft:forcedOriginId", [origin_id])
     ]
     response = publish_html_with_client(HTML_PATHS[0],
                                         client,
                                         metadatas=metadatas)
     self.assertIsNotNone(response.publications)
     self.assertEqual(response.publications.id, origin_id)
     self.assertEqual(response.publications.title, title)
     self.assertEqual(len(response.publications.metadata), 1)
예제 #6
0
 def setUp(self) -> None:
     self.temp_sys = sys.argv
     self.script_name = "script_name"
     self.connector_version = "antidot-html-connector-{}".format(
         pkg_resources.get_distribution("antidot-html-connector").version)
     self.metadatas = [
         Metadata.string(METADATA_SCRIPT, ["this test script"])
     ]
     sys.argv = [self.script_name, "myhtml.html"]
예제 #7
0
 def test_preexiting_metadata_script(self):
     """We add script metadata when we already have one"""
     new_name, new_metadatas = treat_metadatas("name", self.metadatas)
     expected_metadatas = [
         Metadata.string(
             METADATA_SCRIPT,
             {"this test script-{}".format(self.connector_version)})
     ]
     self.assertEqual("name", new_name)
     self.assertEqual(new_metadatas, expected_metadatas)
예제 #8
0
 def test_get_html_from_path(self):
     """Origin id is not gigantic even with a lot of metadata"""
     metadatas = []
     for i in range(1000):
         metadatas.append(Metadata.string("i", [-i, "long"]))
     _, name = get_html_from_path(Path(__file__), metadatas)
     self.assertLess(len(name), ORIGIN_ID_MAX_SIZE)
     _, other_name = get_html_from_path(Path(__file__), metadatas[:-1])
     self.assertNotEqual(name, other_name,
                         "Different metadatas, same origin id !")
예제 #9
0
def ft_content_from_html_content(html_content, title, use_ftml,
                                 render_cover_page, path):
    if use_ftml and not FTML_AVAILABLE:
        raise ModuleNotFoundError(
            "Please install the FTML connector in order to use FTML.")
    if use_ftml:
        topic = TopicBuilder().title(Metadata.title(title)).content(
            html_content).origin_id("0").build()
        topics = [TopicsSplitter().split(topic)]
        nodes = PublicationConverter().convert_toc(topics)
        content = StructuredContent(toc=nodes,
                                    editorial_type=EditorialType.DEFAULT)
        resources = []
    else:
        if path:
            splitter = HtmlSplitter(path=path)
        else:
            splitter = HtmlSplitter(content=html_content)
        html2topics = HtmlToTopics(splitter,
                                   render_cover_page=render_cover_page)
        content = StructuredContent(toc=(html2topics.topics),
                                    editorial_type=EditorialType.DEFAULT)
        resources = html2topics.resources
    return content, resources