Esempio n. 1
0
def back_to_start():
    logging.info('getting back to start point')
    site = Site()
    #browser = Browser()
    #browser.get()
    site.back_to_start()
    return jsonify(gexf=etree.tostring(site.get_gexf()).decode('utf-8'), current_page=site.current)
Esempio n. 2
0
 def test_adding_two_page(self):
     from Site.Site import Site
     site = Site('%s.url' % uuid.uuid4())
     assert len(site._pages) == 1
     site.current_page("<html></html>", 'http://google.com/')
     site.current_page("<html><body></body></html>", 'http://google.com/2')
     assert len(site._pages) == 3
Esempio n. 3
0
def site_temp(request):
    threads_before = [
        obj for obj in gc.get_objects() if isinstance(obj, gevent.Greenlet)
    ]
    with mock.patch("Config.config.data_dir", config.data_dir + "-temp"):
        site_temp = Site("1TeSTvb4w2PWE81S2rEELgmX2GCCExQGT")
        site_temp.settings["serving"] = True
        site_temp.announce = mock.MagicMock(
            return_value=True)  # Don't try to find peers from the net

    def cleanup():
        site_temp.delete()
        site_temp.content_manager.contents.db.close("Test cleanup")
        site_temp.content_manager.contents.db.timer_check_optional.kill()
        db_path = "%s-temp/content.db" % config.data_dir
        os.unlink(db_path)
        del ContentDb.content_dbs[db_path]
        gevent.killall([
            obj for obj in gc.get_objects()
            if isinstance(obj, gevent.Greenlet) and obj not in threads_before
        ])

    request.addfinalizer(cleanup)
    site_temp.log = logging.getLogger("Temp:%s" % site_temp.address_short)
    return site_temp
Esempio n. 4
0
def follow_existing_connections():
    infos = request.get_json()
    logging.info('%s' % (infos['target']))
    site = Site()
    actions = site.get_actions_to(infos['target'])
    for action in actions:
        action.do()
    return jsonify(gexf=etree.tostring(site.get_gexf()).decode('utf-8'), current_page=site.current)
Esempio n. 5
0
def add_connection_and_go():
    connection = request.get_json()
    logging.info('%s - %s' % (connection['css'], connection['nb']))
    site = Site()
    connection_id = site.add_connection_to_current_page(Action.ActionType.CLICK, connection['css'], connection['nb'])
    action = site.get_action_from_id(connection_id)
    action.do()
    return jsonify(gexf=etree.tostring(site.get_gexf()).decode('utf-8'), current_page=site.current)
Esempio n. 6
0
def get_from_start():
    starter = request.get_json()
    site = Site()
    if site.current != 'start':
        abort(500)
    browser = Browser()
    browser.get(starter['url'])
    return jsonify(gexf=etree.tostring(site.get_gexf()).decode('utf-8'), current_page=site.current)
Esempio n. 7
0
 def test_repr_getter(self):
     from Site.Site import Site
     name = str(uuid.uuid4())
     site = Site(name)
     assert site.__repr__() == ("<Site ('{0}')>".format(name))
     assert site.name == name
     assert site.current == 'start'
     assert site.current == site._current
Esempio n. 8
0
    def siteCreate(self, use_master_seed=True):
        logging.info("Generating new privatekey (use_master_seed: %s)..." %
                     config.use_master_seed)
        from Crypt import CryptBitcoin
        if use_master_seed:
            from User import UserManager
            user = UserManager.user_manager.get()
            if not user:
                user = UserManager.user_manager.create()
            address, address_index, site_data = user.getNewSiteData()
            privatekey = site_data["privatekey"]
            logging.info(
                "Generated using master seed from users.json, site index: %s" %
                address_index)
        else:
            privatekey = CryptBitcoin.newPrivatekey()
            address = CryptBitcoin.privatekeyToAddress(privatekey)
        logging.info(
            "----------------------------------------------------------------------"
        )
        logging.info("Site private key: %s" % privatekey)
        logging.info(
            "                  !!! ^ Save it now, required to modify the site ^ !!!"
        )
        logging.info("Site address:     %s" % address)
        logging.info(
            "----------------------------------------------------------------------"
        )

        while True and not config.batch and not use_master_seed:
            if input("? Have you secured your private key? (yes, no) > "
                     ).lower() == "yes":
                break
            else:
                logging.info(
                    "Please, secure it now, you going to need it to modify your site!"
                )

        logging.info("Creating directory structure...")
        from Site.Site import Site
        from Site import SiteManager
        SiteManager.site_manager.load()

        os.mkdir("%s/%s" % (config.data_dir, address))
        open("%s/%s/index.html" % (config.data_dir, address),
             "w").write("Hello %s!" % address)

        logging.info("Creating content.json...")
        site = Site(address)
        extend = {"postmessage_nonce_security": True}
        if use_master_seed:
            extend["address_index"] = address_index

        site.content_manager.sign(privatekey=privatekey, extend=extend)
        site.settings["own"] = True
        site.saveSettings()

        logging.info("Site created!")
Esempio n. 9
0
def follow(connection):
    config = Configuration()
    site = Site()
    if not connection in site._connections:
        abort(404)
    if site._connections[connection]['from'] != site.current:
        abort(500)
    action = site.get_action_from_id(connection)
    action.do()
    return jsonify(gexf=etree.tostring(site.get_gexf()).decode('utf-8'), current_page=site.current)
Esempio n. 10
0
 def test_get_action(self):
     from Site.Site import Site
     from Main.Action import Action
     site_url = "http://%s.url/for/page" % uuid.uuid4()
     site = Site(site_url)
     url = str(uuid.uuid4())
     id = site.add_connection_to_current_page(Action.ActionType.CLICK, "[href='%s']" % url, 0)
     action = site.get_action({'connection': {'data': {'url': url}}, 'id': id})
     assert action._type == Action.ActionType.CLICK
     assert action.connection == id
Esempio n. 11
0
def node_details(node):
    config = Configuration()
    site = Site()
    if node == 'start':
        return jsonify(url='start', html='start', has_path=False, connections=site.get_actions_from_page(node))
    if not node in site._pages:
        abort(404)
    page = site._pages[node]
    has_path = site.get_actions_to(node) is not None
    return jsonify(url=page.url, html=page.html_source, has_path=has_path, connections=site.get_actions_from_page(node))
Esempio n. 12
0
 def test_adding_one_page(self):
     from Site.Site import Site
     site = Site('%s.url' % uuid.uuid4())
     assert len(site._pages) == 1
     url = str(uuid.uuid4())
     page = site.current_page("<html></html>", url)
     assert len(site._pages) == 2
     assert page._url == url
     assert page._interests == []
     assert page._calls == []
Esempio n. 13
0
 def test_adding_same_page_twice(self):
     from Site.Site import Site
     site = Site('%s.url' % uuid.uuid4())
     assert len(site._pages) == 1
     page = site.current_page("<html></html>", 'http://google.com/')
     assert len(site._pages) == 2
     call = str(uuid.uuid4())
     page.add_call(call)
     page2 = site.current_page("<html></html>", 'http://google.com/')
     assert len(site._pages) == 2
     assert page2._calls == [call]
Esempio n. 14
0
def start():
    starter = request.get_json()
    Configuration().browser = starter['browser']
    if starter['proxy'] == 'no proxy':
        Configuration().proxy_path = None
    else:
        Configuration().proxy_path = starter['proxy_path']
    site = Site(starter['name'])
    browser = Browser()
    browser.start(site)
    return jsonify(gexf=etree.tostring(site.get_gexf()).decode('utf-8'), current_page=site.current)
Esempio n. 15
0
    def siteSign(self, address, privatekey=None, inner_path="content.json", publish=False, remove_missing_optional=False):
        from Site.Site import Site
        from Site import SiteManager
        from Debug import Debug
        SiteManager.site_manager.load()
        logging.info("Signing site: %s..." % address)
        site = Site(address, allow_create=False)

        if not privatekey:  # If no privatekey defined
            from User import UserManager
            user = UserManager.user_manager.get()
            if user:
                site_data = user.getSiteData(address)
                privatekey = site_data.get("privatekey")
            else:
                privatekey = None
            if not privatekey:
                # Not found in users.json, ask from console
                import getpass
                privatekey = getpass.getpass("Private key (input hidden):")
        try:
            succ = site.content_manager.sign(inner_path=inner_path, privatekey=privatekey, update_changed_files=True, remove_missing_optional=remove_missing_optional)
        except Exception as err:
            logging.error("Sign error: %s" % Debug.formatException(err))
            succ = False
        if succ and publish:
            self.sitePublish(address, inner_path=inner_path)
Esempio n. 16
0
    def siteVerify(self, address):
        import time
        from Site.Site import Site
        from Site import SiteManager
        SiteManager.site_manager.load()

        s = time.time()
        logging.info("Verifing site: %s..." % address)
        site = Site(address)
        bad_files = []

        for content_inner_path in site.content_manager.contents:
            s = time.time()
            logging.info("Verifing %s signature..." % content_inner_path)
            err = None
            try:
                file_correct = site.content_manager.verifyFile(
                    content_inner_path, site.storage.open(content_inner_path, "rb"), ignore_same=False
                )
            except Exception as err:
                file_correct = False

            if file_correct is True:
                logging.info("[OK] %s (Done in %.3fs)" % (content_inner_path, time.time() - s))
            else:
                logging.error("[ERROR] %s: invalid file: %s!" % (content_inner_path, err))
                input("Continue?")
                bad_files += content_inner_path

        logging.info("Verifying site files...")
        bad_files += site.storage.verifyFiles()["bad_files"]
        if not bad_files:
            logging.info("[OK] All file sha512sum matches! (%.3fs)" % (time.time() - s))
        else:
            logging.error("[ERROR] Error during verifying site files!")
Esempio n. 17
0
    def siteAnnounce(self, address):
        from Site.Site import Site
        from Site import SiteManager
        SiteManager.site_manager.load()

        logging.info("Opening a simple connection server")
        global file_server
        from File import FileServer
        file_server = FileServer("127.0.0.1", 1234)
        file_server.start()

        logging.info("Announcing site %s to tracker..." % address)
        site = Site(address)

        s = time.time()
        site.announce()
        print("Response time: %.3fs" % (time.time() - s))
        print(site.peers)
Esempio n. 18
0
def site(request):
    threads_before = [
        obj for obj in gc.get_objects() if isinstance(obj, gevent.Greenlet)
    ]
    # Reset ratelimit
    RateLimit.queue_db = {}
    RateLimit.called_db = {}

    site = Site("1TeSTvb4w2PWE81S2rEELgmX2GCCExQGT")

    # Always use original data
    assert "1TeSTvb4w2PWE81S2rEELgmX2GCCExQGT" in site.storage.getPath(
        "")  # Make sure we dont delete everything
    shutil.rmtree(site.storage.getPath(""), True)
    shutil.copytree(
        site.storage.getPath("") + "-original", site.storage.getPath(""))

    # Add to site manager
    SiteManager.site_manager.get("1TeSTvb4w2PWE81S2rEELgmX2GCCExQGT")
    site.announce = mock.MagicMock(
        return_value=True)  # Don't try to find peers from the net

    def cleanup():
        site.storage.deleteFiles()
        site.content_manager.contents.db.deleteSite(site)
        del SiteManager.site_manager.sites["1TeSTvb4w2PWE81S2rEELgmX2GCCExQGT"]
        site.content_manager.contents.db.close()
        SiteManager.site_manager.sites.clear()
        db_path = "%s/content.db" % config.data_dir
        os.unlink(db_path)
        del ContentDb.content_dbs[db_path]
        gevent.killall([
            obj for obj in gc.get_objects()
            if isinstance(obj, gevent.Greenlet) and obj not in threads_before
        ])

    request.addfinalizer(cleanup)

    site = Site("1TeSTvb4w2PWE81S2rEELgmX2GCCExQGT"
                )  # Create new Site object to load content.json files
    if not SiteManager.site_manager.sites:
        SiteManager.site_manager.sites = {}
    SiteManager.site_manager.sites["1TeSTvb4w2PWE81S2rEELgmX2GCCExQGT"] = site
    return site
Esempio n. 19
0
    def dbQuery(self, address, query):
        from Site.Site import Site
        from Site import SiteManager
        SiteManager.site_manager.load()

        import json
        site = Site(address)
        result = []
        for row in site.storage.query(query):
            result.append(dict(row))
        print(json.dumps(result, indent=4))
Esempio n. 20
0
    def siteNeedFile(self, address, inner_path):
        from Site.Site import Site
        from Site import SiteManager
        SiteManager.site_manager.load()

        def checker():
            while 1:
                s = time.time()
                time.sleep(1)
                print("Switch time:", time.time() - s)
        gevent.spawn(checker)

        logging.info("Opening a simple connection server")
        global file_server
        from File import FileServer
        file_server = FileServer("127.0.0.1", 1234)
        file_server_thread = gevent.spawn(file_server.start, check_sites=False)

        site = Site(address)
        site.announce()
        print(site.needFile(inner_path, update=True))
Esempio n. 21
0
def check_website(url):
    config = Configuration()
    site = Site(url)

    browser = Browser()
    browser.start(site)

    # strange behaviour from browsermob proxy, dsn doesn't always work
    browser.add_remap_urls([urlparse(url).hostname])

    browser.get(url)
    browser.study_state()
    actions = site.get_first_connection_unexplored()
    while actions is not None:
        logging.info('%s action(s) needed to reach this connection' % (len(actions)))
        for action in actions:
            action.do()
        browser.study_state()
        actions = site.get_first_connection_unexplored()
    site.show_graph()
    browser.stop()
Esempio n. 22
0
 def test_basic(self):
     from Main.Configuration import Configuration
     from Site.Site import Site
     from Main.Browser import Browser
     from tests.DummyBrowser import DummyBrowser
     dummy = DummyBrowser(random.random())
     config = Configuration(['-b', 'Dummy'])
     config._browser = 'Dummy'
     site_name = str(uuid.uuid4())
     site = Site(site_name)
     browser = Browser(random.random())
     browser.start(site)
     url = "http://{0}/page".format(uuid.uuid4())
     browser.get(url)
     browser.study_state()
     assert len(dummy.actions) == 3
     action_n = 0
     assert dummy.actions[action_n] == {'action': 'get', 'target': url}
     action_n += 1
     assert dummy.actions[action_n]['action'] == 'find_elements_by_tag_name'
     assert dummy.actions[action_n]['target'] == 'input'
     action_n += 1
     assert dummy.actions[action_n]['action'] == 'find_elements_by_tag_name'
     assert dummy.actions[action_n]['target'] == 'a'
     assert len(site._pages) == 2
     assert len(site._connections) == dummy.actions[action_n]['nb'] + 1
     assert len([explored for explored in list(site._connections.values()) if explored['explored']]) == 1
     assert len([unexplored for unexplored in list(site._connections.values()) if not unexplored['explored']]) == dummy.actions[action_n]['nb']
     actions = site.get_first_connection_unexplored()
     assert len(actions) == 1
     actions[0].do()
     assert len(dummy.actions) == 5
     action_n += 1
     assert dummy.actions[action_n]['action'] == 'find_elements_by_css_selector'
     action_n += 1
     assert dummy.actions[action_n]['action'] == 'element.click'
     browser.stop()
     assert len(dummy.actions) == 6
     action_n += 1
     assert dummy.actions[action_n]['action'] == 'quit'
Esempio n. 23
0
    def siteDownload(self, address):
        from Site.Site import Site
        from Site import SiteManager
        SiteManager.site_manager.load()

        logging.info("Opening a simple connection server")
        global file_server
        from File import FileServer
        file_server = FileServer("127.0.0.1", 1234)
        file_server_thread = gevent.spawn(file_server.start, check_sites=False)

        site = Site(address)

        on_completed = gevent.event.AsyncResult()

        def onComplete(evt):
            evt.set(True)

        site.onComplete.once(lambda: onComplete(on_completed))
        print("Announcing...")
        site.announce()

        s = time.time()
        print("Downloading...")
        site.downloadContent("content.json", check_modifications=True)

        print("Downloaded in %.3fs" % (time.time()-s))
Esempio n. 24
0
 def test_with_browsermobproxy(self):
     from Main.Configuration import Configuration
     from Site.Site import Site
     from Main.Browser import Browser
     from tests.DummyBrowser import DummyBrowser
     dummy = DummyBrowser(random.random())
     config = Configuration(['-b', 'Dummy', '--env', '--proxy-path', '/home/vagrant/browsermob-proxy-2.0-beta-9/bin/'])
     assert config.proxy_path is not None
     config._browser = 'Dummy'
     site_name = str(uuid.uuid4())
     site = Site(site_name)
     browser = Browser(random.random())
     browser.start(site)
     url = "http://{0}/page".format(uuid.uuid4())
     browser.get(url)
     browser.add_remap_urls(['localhost'])
     browser.study_state()
     assert len(dummy.actions) == 3
     assert dummy.actions[0] == {'action': 'get', 'target': url}
     assert dummy.actions[1]['action'] == 'find_elements_by_tag_name'
     assert dummy.actions[1]['target'] == 'input'
     assert dummy.actions[2]['action'] == 'find_elements_by_tag_name'
     assert dummy.actions[2]['target'] == 'a'
     assert len(site._pages) == 2
     assert len(site._connections) == dummy.actions[2]['nb'] + 1
     assert len(site._connections) > 1
     assert len([explored for explored in list(site._connections.values()) if explored['explored']]) == 1
     actions = site.get_first_connection_unexplored()
     assert len(actions) == 1
     actions[0].do()
     assert len(dummy.actions) == 5
     assert dummy.actions[3]['action'] == 'find_elements_by_css_selector'
     assert dummy.actions[4]['action'] == 'element.click'
     browser.stop()
     assert len(dummy.actions) == 6
     assert dummy.actions[5]['action'] == 'quit'
Esempio n. 25
0
def site_temp(request):
    threads_before = [
        obj for obj in gc.get_objects() if isinstance(obj, gevent.Greenlet)
    ]
    with mock.patch("Config.config.data_dir", config.data_dir + "-temp"):
        site_temp = Site("1TeSTvb4w2PWE81S2rEELgmX2GCCExQGT")
        site_temp.announce = mock.MagicMock(
            return_value=True)  # Don't try to find peers from the net

    def cleanup():
        site_temp.storage.deleteFiles()
        site_temp.content_manager.contents.db.deleteSite(site_temp)
        site_temp.content_manager.contents.db.close()
        time.sleep(0.01)  # Wait for db close
        db_path = "%s-temp/content.db" % config.data_dir
        os.unlink(db_path)
        del ContentDb.content_dbs[db_path]
        gevent.killall([
            obj for obj in gc.get_objects()
            if isinstance(obj, gevent.Greenlet) and obj not in threads_before
        ])

    request.addfinalizer(cleanup)
    return site_temp
Esempio n. 26
0
 def test_update_current_page(self):
     from Site.Site import Site
     site = Site("http://%s.url/for/page" % uuid.uuid4())
     page1_id = str(uuid.uuid4())
     page1_content = str(uuid.uuid4())
     page2_id = str(uuid.uuid4())
     page2_content = str(uuid.uuid4())
     page3_content = str(uuid.uuid4())
     site._pages[page1_id] = page1_content
     site._pages[page2_id] = page2_content
     site._current = page1_id
     site.update_current_page(page3_content)
     assert site._pages[page1_id] == page3_content
     assert site._pages[page2_id] == page2_content
     assert site.get_current_page() == page3_content
Esempio n. 27
0
 def test_add_link(self):
     from Site.Site import Site
     url = "http://%s.url/for/page" % uuid.uuid4()
     site = Site(url)
     current_page = uuid.uuid4()
     site._current = current_page
     url = 'http://test.com/about.html'
     connection1 = {'from': current_page, 'type': Site().ConnectionTypes.LINK, 'explored': False, 'to': None, 'data': {'url': url}}
     site.add_link(url)
     assert len(site._connections) == 1
     connection_check = connection1
     connection_check['data'] = {'url': url, 'nb': 0, 'css': '[href="{0}"]'.format(url)}
     assert list(site._connections.values())[0] == connection_check
     site.add_link(url)
     assert len(site._connections) == 1
Esempio n. 28
0
def go_to_url():
    call = request.get_json()
    browser = Browser()
    site = Site()
    browser.get(call['url'])
    return jsonify(gexf=etree.tostring(site.get_gexf()).decode('utf-8'), current_page=site.current)
Esempio n. 29
0
 def create_site(self):
     from Site.Site import Site
     from Site.Page import Page
     from Main.Action import Action
     site_url = "http://%s.url/for/page" % uuid.uuid4()
     site = Site(site_url)
     node_start = str(uuid.uuid4())
     node_1 = str(uuid.uuid4())
     node_2 = str(uuid.uuid4())
     node_end = str(uuid.uuid4())
     connection_0_id = str(uuid.uuid4())
     connection_0_content = {'from': 'start', 'to': node_start, 'explored': True, 'type': site.ConnectionTypes.START, 'data': {'url': site_url}}
     connection_1_id = str(uuid.uuid4())
     connection_1_url = str(uuid.uuid4())
     connection_1_content = {'from': node_start, 'to': node_1, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': {'url': connection_1_url}}
     connection_2_id = str(uuid.uuid4())
     connection_2_url = str(uuid.uuid4())
     connection_2_content = {'from': node_1, 'to': node_2, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': {'url': connection_2_url}}
     connection_3_id = str(uuid.uuid4())
     connection_3_url = str(uuid.uuid4())
     connection_3_content = {'from': node_2, 'to': node_end, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': {'url': connection_3_url}}
     connection_4_id = str(uuid.uuid4())
     connection_4_url = str(uuid.uuid4())
     connection_4_content = {'from': node_2, 'to': node_1, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': {'url': connection_4_url}}
     connection_5_id = str(uuid.uuid4())
     connection_5_url = str(uuid.uuid4())
     connection_5_content = {'from': node_start, 'to': node_2, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': {'url': connection_5_url}}
     connection_6_id = str(uuid.uuid4())
     connection_6_url = str(uuid.uuid4())
     connection_6_content = {'from': node_end, 'to': None, 'explored': False, 'type': site.ConnectionTypes.LINK, 'data': {'url': connection_6_url}}
     site._connections[connection_0_id] = connection_0_content
     site._connections[connection_1_id] = connection_1_content
     site._connections[connection_2_id] = connection_2_content
     site._connections[connection_3_id] = connection_3_content
     site._connections[connection_4_id] = connection_4_content
     site._connections[connection_5_id] = connection_5_content
     site._connections[connection_6_id] = connection_6_content
     html_start = '<html><body>%s</body></html>' % uuid.uuid4()
     site._pages[node_start] = Page('node_start', html_start)
     html_1 = '<html><body>%s</body></html>' % uuid.uuid4()
     site._pages[node_1] = Page('node_1', html_1)
     html_2 = '<html><body>%s</body></html>' % uuid.uuid4()
     site._pages[node_2] = Page('node_2', html_2)
     html_end = '<html><body>%s</body></html>' % uuid.uuid4()
     site._pages[node_end] = Page('node_end', html_end)
     site._current = node_start
     return site
Esempio n. 30
0
 def test_on_path_finding(self):
     from Site.Site import Site
     from Site.Page import Page
     from Main.Action import Action
     site_url = "http://%s.url/for/page" % uuid.uuid4()
     site = Site(site_url)
     node_start = str(uuid.uuid4())
     node_1 = str(uuid.uuid4())
     node_2 = str(uuid.uuid4())
     node_end = str(uuid.uuid4())
     connection_0_id = str(uuid.uuid4())
     connection_0_content = {'from': 'start', 'to': node_start, 'explored': True, 'type': site.ConnectionTypes.START, 'data': {'url': site_url}}
     connection_1_id = str(uuid.uuid4())
     connection_1_data = {'css': str(uuid.uuid4()), 'nb': 0}
     connection_1_content = {'from': node_start, 'to': node_1, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_1_data}
     connection_2_id = str(uuid.uuid4())
     connection_2_data = {'css': str(uuid.uuid4()), 'nb': 0}
     connection_2_content = {'from': node_1, 'to': node_2, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_2_data}
     connection_3_id = str(uuid.uuid4())
     connection_3_data = {'css': str(uuid.uuid4()), 'nb': 0}
     connection_3_content = {'from': node_2, 'to': node_end, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_3_data}
     connection_4_id = str(uuid.uuid4())
     connection_4_data = {'css': str(uuid.uuid4()), 'nb': 0}
     connection_4_content = {'from': node_2, 'to': node_1, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_4_data}
     connection_5_id = str(uuid.uuid4())
     connection_5_data = {'css': str(uuid.uuid4()), 'nb': 0}
     connection_5_content = {'from': node_start, 'to': node_2, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_5_data}
     connection_6_id = str(uuid.uuid4())
     connection_6_data = {'css': str(uuid.uuid4()), 'nb': 0}
     connection_6_content = {'from': node_end, 'to': None, 'explored': False, 'type': site.ConnectionTypes.LINK, 'data': connection_6_data}
     site._connections[connection_0_id] = connection_0_content
     site._connections[connection_1_id] = connection_1_content
     site._connections[connection_2_id] = connection_2_content
     site._connections[connection_3_id] = connection_3_content
     site._connections[connection_4_id] = connection_4_content
     site._connections[connection_5_id] = connection_5_content
     site._connections[connection_6_id] = connection_6_content
     html_start = '<html><body>%s</body></html>' % uuid.uuid4()
     site._pages[node_start] = Page('node_start', html_start)
     html_1 = '<html><body>%s</body></html>' % uuid.uuid4()
     site._pages[node_1] = Page('node_1', html_1)
     html_2 = '<html><body>%s</body></html>' % uuid.uuid4()
     site._pages[node_2] = Page('node_2', html_2)
     html_end = '<html><body>%s</body></html>' % uuid.uuid4()
     site._pages[node_end] = Page('node_end', html_end)
     site._current = node_start
     path = site.find_shortest_path(node_start, node_end)
     assert path[0] == {'connection': {'from': node_start, 'to': node_2, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_5_data}, 'id': connection_5_id}
     assert path[1] == {'connection': {'from': node_2, 'to': node_end, 'explored': True, 'type': site.ConnectionTypes.LINK, 'data': connection_3_data}, 'id': connection_3_id}
     actions = site.get_actions_to(node_end)
     assert len(actions) == 2
     assert actions[0]._type == Action.ActionType.CLICK
     assert actions[0].connection == connection_5_id
     assert actions[1]._type == Action.ActionType.CLICK
     assert actions[1].connection == connection_3_id
     assert site.get_distance_to(connection_6_id) == 2
     actions = site.get_first_connection_unexplored()
     assert len(actions) == 3
     assert actions[0]._type == Action.ActionType.CLICK
     assert actions[0].connection == connection_5_id
     assert actions[1]._type == Action.ActionType.CLICK
     assert actions[1].connection == connection_3_id
     assert actions[2]._type == Action.ActionType.CLICK
     assert actions[2].connection == connection_6_id
     site._current = node_end
     actions = site.get_first_connection_unexplored()
     assert len(actions) == 1
     assert actions[0]._type == Action.ActionType.CLICK
     assert actions[0].connection == connection_6_id
     html = str('<html><body><div>new_page</div></body></html>')
     page = site.current_page(html, '', connection_6_id)
     assert site._pages[site.get_uniq_id(html, '')] == page
     assert site._connections[connection_6_id]['explored'] == True
     assert site._connections[connection_6_id]['to'] == site.get_uniq_id(html, '')
     actions = site.get_first_connection_unexplored()
     assert actions == None
     gexf = etree.Element('test')
     gexf_site = site.get_gexf()
     assert gexf_site.xpath('//meta/creator')[0].text == "Pallas"
     assert gexf_site.xpath('//meta/description')[0].text == site_url
     assert len(gexf_site.xpath('//nodes/node')) == 6
     assert len(gexf_site.xpath('//edges/edge')) == 7
Esempio n. 31
0
    def testFindOptional(self, file_server, site, site_temp):
        # Init source server
        site.connection_server = file_server
        file_server.sites[site.address] = site

        # Init full source server (has optional files)
        site_full = Site("1TeSTvb4w2PWE81S2rEELgmX2GCCExQGT")
        file_server_full = FileServer(file_server.ip, 1546)
        site_full.connection_server = file_server_full

        def listen():
            ConnectionServer.start(file_server_full)
            ConnectionServer.listen(file_server_full)

        gevent.spawn(listen)
        time.sleep(0.001)  # Port opening
        file_server_full.sites[site_full.address] = site_full  # Add site
        site_full.storage.verifyFiles(quick_check=True)  # Check optional files
        site_full_peer = site.addPeer(file_server.ip,
                                      1546)  # Add it to source server
        hashfield = site_full_peer.updateHashfield()  # Update hashfield
        assert len(site_full.content_manager.hashfield) == 8
        assert hashfield
        assert site_full.storage.isFile("data/optional.txt")
        assert site_full.storage.isFile(
            "data/users/1CjfbrbwtP8Y2QjPy12vpTATkUT7oSiPQ9/peanut-butter-jelly-time.gif"
        )
        assert len(site_full_peer.hashfield) == 8

        # Remove hashes from source server
        for hash in list(site.content_manager.hashfield):
            site.content_manager.hashfield.remove(hash)

        # Init client server
        site_temp.connection_server = ConnectionServer(file_server.ip, 1545)
        site_temp.addPeer(file_server.ip, 1544)  # Add source server

        # Download normal files
        site_temp.log.info("Start Downloading site")
        site_temp.download(blind_includes=True).join(timeout=5)

        # Download optional data/optional.txt
        optional_file_info = site_temp.content_manager.getFileInfo(
            "data/optional.txt")
        optional_file_info2 = site_temp.content_manager.getFileInfo(
            "data/users/1CjfbrbwtP8Y2QjPy12vpTATkUT7oSiPQ9/peanut-butter-jelly-time.gif"
        )
        assert not site_temp.storage.isFile("data/optional.txt")
        assert not site_temp.storage.isFile(
            "data/users/1CjfbrbwtP8Y2QjPy12vpTATkUT7oSiPQ9/peanut-butter-jelly-time.gif"
        )
        assert not site.content_manager.hashfield.hasHash(
            optional_file_info["sha512"]
        )  # Source server don't know he has the file
        assert not site.content_manager.hashfield.hasHash(
            optional_file_info2["sha512"]
        )  # Source server don't know he has the file
        assert site_full_peer.hashfield.hasHash(
            optional_file_info["sha512"]
        )  # Source full peer on source server has the file
        assert site_full_peer.hashfield.hasHash(
            optional_file_info2["sha512"]
        )  # Source full peer on source server has the file
        assert site_full.content_manager.hashfield.hasHash(
            optional_file_info["sha512"])  # Source full server he has the file
        assert site_full.content_manager.hashfield.hasHash(
            optional_file_info2["sha512"]
        )  # Source full server he has the file

        site_temp.log.info("Request optional files")
        with Spy.Spy(FileRequest, "route") as requests:
            # Request 2 file same time
            threads = []
            threads.append(
                site_temp.needFile("data/optional.txt", blocking=False))
            threads.append(
                site_temp.needFile(
                    "data/users/1CjfbrbwtP8Y2QjPy12vpTATkUT7oSiPQ9/peanut-butter-jelly-time.gif",
                    blocking=False))
            gevent.joinall(threads)

            assert len([
                request for request in requests if request[1] == "findHashIds"
            ]) == 1  # findHashids should call only once

        assert site_temp.storage.isFile("data/optional.txt")
        assert site_temp.storage.isFile(
            "data/users/1CjfbrbwtP8Y2QjPy12vpTATkUT7oSiPQ9/peanut-butter-jelly-time.gif"
        )

        assert site_temp.storage.deleteFiles()
        file_server_full.stop()
        [connection.close() for connection in file_server.connections]
        site_full.content_manager.contents.db.close()