Example #1
0
def make_map(arg_tuple):
    dynamic_path = "/usr/lib/chromium-browser/chromedriver"

    url = arg_tuple[0]
    max_nodes = arg_tuple[1]
    dynamic = arg_tuple[2]

    url_map = UrlMap(url, dynamic_path, url, dynamic_pages=dynamic)
    url_map.create_map(total_iterations=max_nodes)

    return url_map
Example #2
0
    def setUp(self):

        def rettest(*args, **kwargs):
            return args, kwargs

        self.urlmap = UrlMap()
        self.urlmap.add(r'^articles/2003/$', rettest)
        self.urlmap.add(r'^articles/(\d{4})/$', rettest)
        self.urlmap.add(r'^articles/(\d{4})/(\d{2})/$', rettest)
        self.urlmap.add(
            r'^articles/(?P<year>\d{4})/(?P<month>\d{2})/(?P<day>\d{2})/$',
            rettest)
Example #3
0
from urlmapper import UrlMap
import time
import json
from operator import itemgetter

mypath = "/Users/Tilley/Downloads/chromedriver"
myurl = "https://sjrfire.com"
myurlstart = "https://sjrfire.com"
# myurl = "https://youtube.com"
# myurl = "https://reddit.com"

# creates our UrlMap object
url_map = UrlMap(myurl, mypath, myurlstart, dynamic_pages=False)
url_map.create_map(total_iterations=5)

print(url_map.this_map)
Example #4
0
import unittest
import sys
sys.path.append("..")
from urlmapper import UrlMap

myurl1 = "https://google.com/"
myurl2 = "https://reddit.com/"
mypath = "/Users/Tilley/Downloads/chromedriver"

mymap1 = UrlMap(myurl1, mypath, myurl1, dynamic_pages=False)
mymap1.create_map(total_iterations=10)

mymap2 = UrlMap(myurl2, mypath, myurl2)
mymap2.create_map(total_iterations=0)

mymap3 = UrlMap(myurl1, mypath)
mymap3.create_map()  # total_iterations defaults to 30


# print(mymap1.get_map())
# print(len(mymap1.this_map.keys()))
class TestUrlMap(unittest.TestCase):

    # test to see that we get the correct amount of nodes
    def test_map_max_nodes(self):
        self.assertEqual(len(mymap1.this_map.keys()), 10)
        self.assertEqual(len(mymap2.get_map().keys()), 0)
        self.assertEqual(len(mymap3.this_map.keys()), 30)
        # -1 goes forever and cannot be tested

    # test to see that our queue remains intact after termination
Example #5
0
# January 2020
# This module serves to display the previously scraped websites network
#
from urlmapper import UrlMap
import time
import json
from operator import itemgetter

mypath = "/Users/Tilley/Downloads/chromedriver"
myurl = "https://sjrfire.com"
myurlstart = "https://sjrfire.com"
# myurl = "https://youtube.com"
# myurl = "https://reddit.com"

# creates our UrlMap object
url_map = UrlMap(myurl, mypath, myurlstart, dynamic_pages=False)
url_map.create_map()

# site_mapping = url_map.get_map()
# site_map_json_list = url_map.json_list
# site_map_json = json.dumps(site_map_json_list, indent=4)
# print(site_map_json)

# with open('site_map.json', 'w', encoding='utf-8') as f:
#     json.dump(site_map_json, f, ensure_ascii=False, indent=4)

# gets data and formats it so d3.js can read it properly
llu = url_map.d3_json_links_list
nlu = url_map.d3_json_nodes_list
tlu = url_map.json_time
nodes_and_links = {}
Example #6
0
class TestUrlMap(unittest.TestCase):
    def setUp(self):

        def rettest(*args, **kwargs):
            return args, kwargs

        self.urlmap = UrlMap()
        self.urlmap.add(r'^articles/2003/$', rettest)
        self.urlmap.add(r'^articles/(\d{4})/$', rettest)
        self.urlmap.add(r'^articles/(\d{4})/(\d{2})/$', rettest)
        self.urlmap.add(
            r'^articles/(?P<year>\d{4})/(?P<month>\d{2})/(?P<day>\d{2})/$',
            rettest)

    def test_noargs(self):
        # make sure it map no args
        path = 'articles/2003/'
        self.assertEqual(self.urlmap.map_path(path).call(), ((), {}))

        # should raise an exception for an immutable sequence
        #self.assertRaises(TypeError, random.shuffle, (1, 2, 3))
    def test_oneargs(self):
        path = 'articles/2004/'
        self.assertEqual(self.urlmap.map_path(path).call(), (('2004',), {}))

    def test_kwargss(self):
        path = 'articles/2004/12/01/'
        self.assertEqual(self.urlmap.map_path(path).call(),
            ((), {'day': '01', 'month': '12', 'year': '2004'}))

    def test_urlnotmatch(self):
        path = 'article/2004/12/01/'
        self.assertRaises(UrlNotFound, self.urlmap.map_path, path)