def test6_dbpedia_noredirects(self): df = pd.DataFrame({ "random_string": ["abc", "abc", "abc", "abc", "abc", "abc"], "url": [ "http://dbpedia.org/resource/Mainz", "http://dbpedia.org/resource/Paris", "http://dbpedia.org/resource/Bremen", "http://dbpedia.org/resource/New_York", "http://dbpedia.org/resource/New_YorkXXXX", np.nan ] }) df_expected_results = pd.DataFrame({ "random_string": ["abc", "abc", "abc", "abc", "abc", "abc"], "url": [ "http://dbpedia.org/resource/Mainz", "http://dbpedia.org/resource/Paris", "http://dbpedia.org/resource/Bremen", "http://dbpedia.org/resource/New_York", "http://dbpedia.org/resource/New_YorkXXXX", np.nan ] }) df_result = check_uri_redirects(df, "url", replace=True, bundled_mode=True, uri_data_model=False) pd.testing.assert_frame_equal(df_result, df_expected_results, check_like=True)
def transform(self, X, y=None): X = check_uri_redirects(X, column=self.column, replace=self.replace, custom_name_postfix=self.custom_name_postfix, redirection_property=self.redirection_property, endpoint=self.endpoint, regex_filter=self.regex_filter, bundled_mode=self.bundled_mode, uri_data_model=self.uri_data_model, progress=self.progress, caching=self.caching) return X
def test5_dbpedia_bundled_noreplace_postfix(self): df = pd.DataFrame({ "random_string": ["abc", "abc", "abc", "abc", "abc", "abc"], "url": [ "http://dbpedia.org/resource/Sachsen", "http://dbpedia.org/resource/Hessen", "http://dbpedia.org/resource/Bremen", "http://dbpedia.org/resource/New_York", "http://dbpedia.org/resource/New_YorkXXXX", np.nan ] }) df_expected_results = pd.DataFrame({ "random_string": ["abc", "abc", "abc", "abc", "abc", "abc"], "url": [ "http://dbpedia.org/resource/Sachsen", "http://dbpedia.org/resource/Hessen", "http://dbpedia.org/resource/Bremen", "http://dbpedia.org/resource/New_York", "http://dbpedia.org/resource/New_YorkXXXX", np.nan ], "url_checked": [ "http://dbpedia.org/resource/Saxony", "http://dbpedia.org/resource/Hesse", np.nan, np.nan, np.nan, np.nan ] }) df_result = check_uri_redirects(df, "url", replace=False, bundled_mode=True, uri_data_model=False, custom_name_postfix="_checked") pd.testing.assert_frame_equal(df_result, df_expected_results, check_like=True)