def test_url_to_protocol_urls(): url_to_protocol = URLToProtocol() urls = pd.Series([ "https://play.google.com/store/apps/details?id=com.skgames.trafficracer%22", "http://mplay.google.co.in/sadfask/asdkfals?dk=10", "http://lplay.google.co.in/sadfask/asdkfals?dk=10", "www.google.co.in/sadfask/asdkfals?dk=10", "http://*****:*****@google.com/?a=b#asdd", "https://www.compzets.com?asd=10", "www.compzets.com?asd=10", "facebook.com", "https://www.compzets.net?asd=10", "http://www.featuretools.org", "https://featuretools.com", ]) correct_urls = pd.Series([ "https", "http", "http", np.nan, "http", "https", np.nan, np.nan, "https", "http", "https", ]) results = url_to_protocol(urls) pd.testing.assert_series_equal(results, correct_urls)
def test_url_to_protocol_long_url(): url_to_protocol = URLToProtocol() urls = pd.Series([ "http://chart.apis.google.com/chart?chs=500x500&chma=0,0,100, \ 100&cht=p&chco=FF0000%2CFFFF00%7CFF8000%2C00FF00%7C00FF00%2C0 \ 000FF&chd=t%3A122%2C42%2C17%2C10%2C8%2C7%2C7%2C7%2C7%2C6%2C6% \ 2C6%2C6%2C5%2C5&chl=122%7C42%7C17%7C10%7C8%7C7%7C7%7C7%7C7%7C \ 6%7C6%7C6%7C6%7C5%7C5&chdl=android%7Cjava%7Cstack-trace%7Cbro \ adcastreceiver%7Candroid-ndk%7Cuser-agent%7Candroid-webview%7 \ Cwebview%7Cbackground%7Cmultithreading%7Candroid-source%7Csms \ %7Cadb%7Csollections%7Cactivity|Chart" ]) correct_urls = ["http"] results = url_to_protocol(urls) np.testing.assert_array_equal(results, correct_urls)
def test_url_to_protocol_nan(): url_to_protocol = URLToProtocol() urls = pd.Series(["www.featuretools.com", np.nan, ""], dtype="object") correct_urls = pd.Series([np.nan, np.nan, np.nan], dtype="object") results = url_to_protocol(urls) pd.testing.assert_series_equal(results, correct_urls)