Example #1
0
    def test6_dbpedia_noredirects(self):

        df = pd.DataFrame({
            "random_string": ["abc", "abc", "abc", "abc", "abc", "abc"],
            "url": [
                "http://dbpedia.org/resource/Mainz",
                "http://dbpedia.org/resource/Paris",
                "http://dbpedia.org/resource/Bremen",
                "http://dbpedia.org/resource/New_York",
                "http://dbpedia.org/resource/New_YorkXXXX", np.nan
            ]
        })

        df_expected_results = pd.DataFrame({
            "random_string": ["abc", "abc", "abc", "abc", "abc", "abc"],
            "url": [
                "http://dbpedia.org/resource/Mainz",
                "http://dbpedia.org/resource/Paris",
                "http://dbpedia.org/resource/Bremen",
                "http://dbpedia.org/resource/New_York",
                "http://dbpedia.org/resource/New_YorkXXXX", np.nan
            ]
        })

        df_result = check_uri_redirects(df,
                                        "url",
                                        replace=True,
                                        bundled_mode=True,
                                        uri_data_model=False)

        pd.testing.assert_frame_equal(df_result,
                                      df_expected_results,
                                      check_like=True)
Example #2
0
 def transform(self, X, y=None):
     X = check_uri_redirects(X,
                             column=self.column,
                             replace=self.replace,
                             custom_name_postfix=self.custom_name_postfix,
                             redirection_property=self.redirection_property,
                             endpoint=self.endpoint,
                             regex_filter=self.regex_filter,
                             bundled_mode=self.bundled_mode,
                             uri_data_model=self.uri_data_model,
                             progress=self.progress,
                             caching=self.caching)
     return X
Example #3
0
    def test5_dbpedia_bundled_noreplace_postfix(self):

        df = pd.DataFrame({
            "random_string": ["abc", "abc", "abc", "abc", "abc", "abc"],
            "url": [
                "http://dbpedia.org/resource/Sachsen",
                "http://dbpedia.org/resource/Hessen",
                "http://dbpedia.org/resource/Bremen",
                "http://dbpedia.org/resource/New_York",
                "http://dbpedia.org/resource/New_YorkXXXX", np.nan
            ]
        })

        df_expected_results = pd.DataFrame({
            "random_string": ["abc", "abc", "abc", "abc", "abc", "abc"],
            "url": [
                "http://dbpedia.org/resource/Sachsen",
                "http://dbpedia.org/resource/Hessen",
                "http://dbpedia.org/resource/Bremen",
                "http://dbpedia.org/resource/New_York",
                "http://dbpedia.org/resource/New_YorkXXXX", np.nan
            ],
            "url_checked": [
                "http://dbpedia.org/resource/Saxony",
                "http://dbpedia.org/resource/Hesse", np.nan, np.nan, np.nan,
                np.nan
            ]
        })

        df_result = check_uri_redirects(df,
                                        "url",
                                        replace=False,
                                        bundled_mode=True,
                                        uri_data_model=False,
                                        custom_name_postfix="_checked")

        pd.testing.assert_frame_equal(df_result,
                                      df_expected_results,
                                      check_like=True)