Example #1
0
def make_map(arg_tuple):
    dynamic_path = "/usr/lib/chromium-browser/chromedriver"

    url = arg_tuple[0]
    max_nodes = arg_tuple[1]
    dynamic = arg_tuple[2]

    url_map = UrlMap(url, dynamic_path, url, dynamic_pages=dynamic)
    url_map.create_map(total_iterations=max_nodes)

    return url_map
Example #2
0
from urlmapper import UrlMap
import time
import json
from operator import itemgetter

mypath = "/Users/Tilley/Downloads/chromedriver"
myurl = "https://sjrfire.com"
myurlstart = "https://sjrfire.com"
# myurl = "https://youtube.com"
# myurl = "https://reddit.com"

# creates our UrlMap object
url_map = UrlMap(myurl, mypath, myurlstart, dynamic_pages=False)
url_map.create_map(total_iterations=5)

print(url_map.this_map)
Example #3
0
import unittest
import sys
sys.path.append("..")
from urlmapper import UrlMap

myurl1 = "https://google.com/"
myurl2 = "https://reddit.com/"
mypath = "/Users/Tilley/Downloads/chromedriver"

mymap1 = UrlMap(myurl1, mypath, myurl1, dynamic_pages=False)
mymap1.create_map(total_iterations=10)

mymap2 = UrlMap(myurl2, mypath, myurl2)
mymap2.create_map(total_iterations=0)

mymap3 = UrlMap(myurl1, mypath)
mymap3.create_map()  # total_iterations defaults to 30


# print(mymap1.get_map())
# print(len(mymap1.this_map.keys()))
class TestUrlMap(unittest.TestCase):

    # test to see that we get the correct amount of nodes
    def test_map_max_nodes(self):
        self.assertEqual(len(mymap1.this_map.keys()), 10)
        self.assertEqual(len(mymap2.get_map().keys()), 0)
        self.assertEqual(len(mymap3.this_map.keys()), 30)
        # -1 goes forever and cannot be tested

    # test to see that our queue remains intact after termination
Example #4
0
from urlmapper import UrlMap
import time

mypath = "/Users/Tilley/Downloads/chromedriver"
myurl = "https://www.google.com/"
myurlstart = "https://www.google.com/"

url_map = UrlMap(myurl, mypath, myurlstart, dynamic_pages=False)
#2.9747540950775146 sec for 10 nodes
#37.1645712852478 sec for 100 nodes
#424.85131096839905 sec for 1000 nodes
start = time.time()
url_map.create_map(total_iterations=1000)
end = time.time()
print(url_map.this_map)
print(end - start)
Example #5
0
# This module serves to display the previously scraped websites network
#
from urlmapper import UrlMap
import time
import json
from operator import itemgetter

mypath = "/Users/Tilley/Downloads/chromedriver"
myurl = "https://sjrfire.com"
myurlstart = "https://sjrfire.com"
# myurl = "https://youtube.com"
# myurl = "https://reddit.com"

# creates our UrlMap object
url_map = UrlMap(myurl, mypath, myurlstart, dynamic_pages=False)
url_map.create_map()

# site_mapping = url_map.get_map()
# site_map_json_list = url_map.json_list
# site_map_json = json.dumps(site_map_json_list, indent=4)
# print(site_map_json)

# with open('site_map.json', 'w', encoding='utf-8') as f:
#     json.dump(site_map_json, f, ensure_ascii=False, indent=4)

# gets data and formats it so d3.js can read it properly
llu = url_map.d3_json_links_list
nlu = url_map.d3_json_nodes_list
tlu = url_map.json_time
nodes_and_links = {}
nodes_and_links["nodes"] = nlu