Esempio n. 1
    def node_two_selection(self, tree_view, path, column):
        ''' Event handler for list items in the node 2 grid '''
        # Block event handlers so we don't fire as we update them

        row_num = tree_view.get_cursor()[0].get_indices()
        node = [
            x for x in self.node_lists[2]
            if x.title == tree_view.get_model()[row_num][0]

        self.logger.debug('Selected Node: ' + node.title)

        spider = Spider()

        if node.children is None:
            node = spider.build_node(node.base_url, node.parent, 1)

        self.set_node_lists(self.node_lists[1], self.node_lists[2],



Esempio n. 2
    def node_zero_selection(self, tree_view, path, column):
        ''' Event handler for list items in the node 0 grid '''
            # Block event handlers so we don't fire as we update them

            # Grab the curent selected node
            row_num = tree_view.get_cursor()[0].get_indices()

            # Find the node from our
            node = [
                x for x in self.node_lists[0]
                if x.title == tree_view.get_model()[row_num][0]
            self.logger.debug('Selected Node: ' + node.title)
            spider = Spider()

            # Build our node out only if we don't have children
            if node.children is None:
                node = spider.build_node(node.base_url, node.parent, 1)

            # If this is the root node
            if node.parent is None:
            # If this is node's parent is root, we can't set its peers
            elif node.parent.parent is None:
                self.set_node_lists(node.parent, node.parent.children,

            # If this node's parent has peers (IE its parent has a parent with chidlren)
            # We set the children to be the node0 list
                                    node.parent.children, node.children)


        except Exception as e:
Esempio n. 3
class TestSpider(unittest.TestCase):
    Tests the Spider class and methods
    def setUp(self):
        self.spider = Spider()

    def test_get_page_calls_url_open(self, mock_open):
        ''' Validate that we are calling the module with the passed in URL'''

    @patch('spider.Spider.logger.error') # Remember the mock vars are in reverse order
    def test_get_page_raises_exception(self, mock_open,mock_logger):
        ''' Validate that get_page() logs and rethrows the exception'''
        mock_open.side_effect = URLError('Test Exception')


    def test_get_soup_calls_bs4_init(self, mock_soup):
        ''' Validate that we are calling the module with the passed in URL'''
        mock_soup.return_value = None

        # We want to validate that soup was called with 'test'
        # But we don't want the test to rely on the 'lxml' parser
        # Erring on the side of a slightly fragile test

    def test_get_soup_raises_exception(self, mock_soup,mock_logger):
        ''' Validate that get_soup() logs and rethrows the exception'''
        # In reality this would be an HTMLParser.HTMLParseError or something
        # But instead of including all those dependencies, lets just validate
        # That is passes through the exception
        mock_soup.side_effect = ValueError('Test Exception')


    def test_get_links_with_absolute_link_returns_page_links(self, mock_logger,mock_soup,mock_find_all, mock_parse):
        ''' Mock the external calls and verify that it doesn't prepend the suffix '''
        _prefix = 'test'
        pages = ['page_one', 'page_two']

        # Mock bs4 and make sure that it has a find_all method that returns an iterable
        mock_soup().find_all.return_value = [{'href':page} for page in pages]

        # Mock urlparse and make sure that it claims that we have an absolute link
        mock_parse().netloc = True

        out = self.spider.get_links('page',_prefix)
        assert_that(out, is_(equal_to(pages)))

    def test_get_links_with_relative_link_returns_page_links(self, mock_logger,mock_soup,mock_find_all, mock_parse):
        ''' Mock the external calls and verify that it prepends the suffix if it is a rlative URL'''
        _prefix = 'test'
        pages = ['page_one', 'page_two']

        # Mock bs4 and make sure that it has a find_all method that returns an iterable
        mock_soup().find_all.return_value = [{'href':page} for page in pages]

        # Mock urlparse and make sure that it claims that we have an absolute link
        mock_parse().netloc = False

        out = self.spider.get_links('page',_prefix)
        assert_that(out, is_(equal_to([_prefix + x for x in pages])))

    def test_get_links_with_invalid_link_logs_exception(self, mock_logger,mock_soup,mock_find_all, mock_parse):
        ''' Mock the external calls and verify that URLLib errors are logged and not thrown'''
        _prefix = 'test'
        pages = ['page_one', 'page_two']

        # Mock bs4 and make sure that it has a find_all method that returns an iterable
        mock_soup().find_all.return_value = [{'href':page} for page in pages]

        # Mock urlparse and make sure that it claims that we have an absolute link
        mock_parse.side_effect = ValueError('')
        out = self.spider.get_links('page',_prefix)
        assert_that(out, is_(equal_to([]))) # In this case all links are going to fail
        mock_logger.assert_called() # Validate that we logged the exception

    def test_filter_links_filters_on_prefix(self):
        ''' Test that filter_links will fileter out the invalid links '''
        _prefix = 'abc'
        urls = ['abc/wiki/1', 'abc/wiki/2', '3', 'any_other/domain/abc']
        self.spider.wiki_prefix = _prefix
        filtered_links = self.spider.filter_links(urls)

        assert_that(filtered_links,is_(equal_to(['abc/wiki/1', 'abc/wiki/2'])))

    def test_build_title_from_url_returns_title(self, mock_parse):
        ''' Mock urllib and ensure that we call it with the last part of the URL'''
        mock_parse.assert_called_with(' test name ')

    def test_build_title_from_url_with_invalid_url_raises_valueerror(self, mock_logger):
        ''' Mock urllib and ensure any other URL raises an error and logs it'''
        assert_that(calling(self.spider.build_title_from_url).with_args('any other title really '),raises(ValueError))
    def test_build_node_with_0_depth_returns_node(self):
        ''' Ensure that create a node with no children correctly'''
        url = ''
        expected_node = WikiNode(url,'title',None)
        returned_node = self.spider.build_node(url, None, 0)
        assert_that(returned_node, is_(equal_to(expected_node)))

    def test_build_node_with_1_depth_calls_build_node_on_children(self, mock_soup, mock_page, mock_links, mock_filter):
        ''' Ensure that we recursively call build_node on children and build the node correctly'''
        urls = {'title':'',

        expected_node = WikiNode(urls['title'],'title','parent')
        expected_child_a = WikiNode(urls['a'],'a',expected_node)
        expected_child_b = WikiNode(urls['b'],'b',None)
        expected_node.children = [expected_child_a, expected_child_b]

        mock_filter.return_value = [urls['b'], urls['a']]

        returned_node = self.spider.build_node(urls['title'], 'parent', 1)
        assert_that(returned_node, is_(equal_to(expected_node)))