Esempio n. 1
0
def test_url_to_domain_urls():
    url_to_domain = URLToDomain()
    urls = pd.Series([
        "https://play.google.com/store/apps/details?id=com.skgames.trafficracer%22",
        "http://mplay.google.co.in/sadfask/asdkfals?dk=10",
        "http://lplay.google.co.in/sadfask/asdkfals?dk=10",
        "http://play.google.co.in/sadfask/asdkfals?dk=10",
        "http://tplay.google.co.in/sadfask/asdkfals?dk=10",
        "http://www.google.co.in/sadfask/asdkfals?dk=10",
        "www.google.co.in/sadfask/asdkfals?dk=10",
        "http://*****:*****@google.com/?a=b#asdd",
        "https://www.compzets.com?asd=10",
        "www.compzets.com?asd=10",
        "facebook.com",
        "https://www.compzets.net?asd=10",
        "http://www.featuretools.org",
    ])
    correct_urls = [
        "play.google.com",
        "mplay.google.co.in",
        "lplay.google.co.in",
        "play.google.co.in",
        "tplay.google.co.in",
        "google.co.in",
        "google.co.in",
        "google.com",
        "compzets.com",
        "compzets.com",
        "facebook.com",
        "compzets.net",
        "featuretools.org",
    ]
    np.testing.assert_array_equal(url_to_domain(urls), correct_urls)
Esempio n. 2
0
def test_url_to_domain_long_url():
    url_to_domain = URLToDomain()
    urls = pd.Series([
        "http://chart.apis.google.com/chart?chs=500x500&chma=0,0,100, \
                        100&cht=p&chco=FF0000%2CFFFF00%7CFF8000%2C00FF00%7C00FF00%2C0 \
                        000FF&chd=t%3A122%2C42%2C17%2C10%2C8%2C7%2C7%2C7%2C7%2C6%2C6% \
                        2C6%2C6%2C5%2C5&chl=122%7C42%7C17%7C10%7C8%7C7%7C7%7C7%7C7%7C \
                        6%7C6%7C6%7C6%7C5%7C5&chdl=android%7Cjava%7Cstack-trace%7Cbro \
                        adcastreceiver%7Candroid-ndk%7Cuser-agent%7Candroid-webview%7 \
                        Cwebview%7Cbackground%7Cmultithreading%7Candroid-source%7Csms \
                        %7Cadb%7Csollections%7Cactivity|Chart"
    ])
    correct_urls = ["chart.apis.google.com"]
    results = url_to_domain(urls)
    np.testing.assert_array_equal(results, correct_urls)
def test_url_to_domain_urls():
    url_to_domain = URLToDomain()
    urls = pd.Series([
        'https://play.google.com/store/apps/details?id=com.skgames.trafficracer%22',
        'http://mplay.google.co.in/sadfask/asdkfals?dk=10',
        'http://lplay.google.co.in/sadfask/asdkfals?dk=10',
        'http://play.google.co.in/sadfask/asdkfals?dk=10',
        'http://tplay.google.co.in/sadfask/asdkfals?dk=10',
        'http://www.google.co.in/sadfask/asdkfals?dk=10',
        'www.google.co.in/sadfask/asdkfals?dk=10',
        'http://*****:*****@google.com/?a=b#asdd',
        'https://www.compzets.com?asd=10', 'www.compzets.com?asd=10',
        'facebook.com', 'https://www.compzets.net?asd=10',
        'http://www.featuretools.org'
    ])
    correct_urls = [
        'play.google.com', 'mplay.google.co.in', 'lplay.google.co.in',
        'play.google.co.in', 'tplay.google.co.in', 'google.co.in',
        'google.co.in', 'google.com', 'compzets.com', 'compzets.com',
        'facebook.com', 'compzets.net', 'featuretools.org'
    ]
    np.testing.assert_array_equal(url_to_domain(urls), correct_urls)
Esempio n. 4
0
def test_url_to_domain_nan():
    url_to_domain = URLToDomain()
    urls = pd.Series(["www.featuretools.com", np.nan], dtype="object")
    correct_urls = pd.Series(["featuretools.com", np.nan], dtype="object")
    results = url_to_domain(urls)
    pd.testing.assert_series_equal(results, correct_urls)