コード例 #1
0
    def test_level_filter(self):
        record = URLRecord()
        record.level = 4
        url_filter = LevelFilter(0)
        self.assertTrue(url_filter.test(None, record))

        url_filter = LevelFilter(5)
        record.level = 5
        self.assertTrue(url_filter.test(None, record))
        record.level = 6
        self.assertFalse(url_filter.test(None, record))

        url_filter = LevelFilter(5)
        record.inline_level = 1
        record.level = 5
        self.assertTrue(url_filter.test(None, record))
        record.level = 6
        self.assertTrue(url_filter.test(None, record))
        record.level = 7
        self.assertTrue(url_filter.test(None, record))
        record.level = 8
        self.assertFalse(url_filter.test(None, record))

        url_filter = LevelFilter(0)
        record.inline_level = 1
        self.assertTrue(url_filter.test(None, record))
        record.inline_level = 2
        self.assertTrue(url_filter.test(None, record))
        record.inline_level = 3
        self.assertTrue(url_filter.test(None, record))
        record.inline_level = 4
        self.assertTrue(url_filter.test(None, record))
        record.inline_level = 5
        self.assertTrue(url_filter.test(None, record))
        record.inline_level = 6
        self.assertFalse(url_filter.test(None, record))

        record.level = 1

        url_filter = LevelFilter(0, inline_max_depth=0)
        record.inline_level = 1000
        self.assertTrue(url_filter.test(None, record))

        url_filter = LevelFilter(5, inline_max_depth=1)
        record.inline_level = 1
        self.assertTrue(url_filter.test(None, record))
        record.inline_level = 2
        self.assertFalse(url_filter.test(None, record))
コード例 #2
0
    def test_recursive_filter_requisites(self):
        record = URLRecord()
        record.level = 0
        record.inline_level = 1
        url_filter = RecursiveFilter(page_requisites=True)

        self.assertTrue(url_filter.test(None, record))
コード例 #3
0
ファイル: sqlmodel.py プロジェクト: Super-Rad/wpull
    def to_plain(self) -> URLRecord:
        record = URLRecord()
        record.url = self.url
        record.parent_url = self.parent_url
        record.root_url = self.root_url
        record.status = Status(self.status)
        record.try_count = self.try_count
        record.level = self.level
        record.inline_level = self.inline_level
        record.link_type = LinkType(self.link_type) if self.link_type else None
        record.priority = self.priority
        record.post_data = self.post_data
        record.status_code = self.status_code
        record.filename = self.filename

        return record
コード例 #4
0
ファイル: sqlmodel.py プロジェクト: fakegit/ludios_wpull
    def to_plain(self) -> URLRecord:
        record = URLRecord()
        record.url = self.url
        record.parent_url = self.parent_url
        record.root_url = self.root_url
        record.status = Status(self.status)
        record.try_count = self.try_count
        record.level = self.level
        record.inline_level = self.inline_level
        record.link_type = LinkType(self.link_type) if self.link_type else None
        record.priority = self.priority
        record.post_data = self.post_data
        record.status_code = self.status_code
        record.filename = self.filename

        return record
コード例 #5
0
    def child_url_record(self, url: str, inline: bool=False,
                         link_type: Optional[LinkType]=None,
                         post_data: Optional[str]=None,
                         level: Optional[int]=None):
        '''Return a child URLRecord.

        This function is useful for testing filters before adding to table.
        '''
        url_record = URLRecord()
        url_record.url = url
        url_record.status = Status.todo
        url_record.try_count = 0
        url_record.level = self.url_record.level + 1 if level is None else level
        url_record.root_url = self.url_record.root_url or self.url_record.url
        url_record.parent_url = self.url_record.url
        url_record.inline_level = (self.url_record.inline_level or 0) + 1 if inline else 0
        url_record.link_type = link_type
        url_record.post_data = post_data

        return url_record
コード例 #6
0
    def test_parent_filter(self):
        record = URLRecord()
        url_filter = ParentFilter()

        record.root_url = 'http://example.com/blog/topic2/'
        self.assertTrue(url_filter.test(
            URLInfo.parse('http://example.com/blog/topic2/'),
            record
        ))
        record.root_url = 'http://example.com/blog/topic1/'
        self.assertTrue(url_filter.test(
            URLInfo.parse('http://example.com/blog/topic1/blah.html'),
            record
        ))
        self.assertTrue(url_filter.test(
            URLInfo.parse('https://example.com/blog/topic1/blah2.html'),
            record
        ))
        self.assertFalse(url_filter.test(
            URLInfo.parse('http://example.com/blog/'),
            record
        ))
        self.assertFalse(url_filter.test(
            URLInfo.parse('https://example.com/blog/'),
            record
        ))
        self.assertTrue(url_filter.test(
            URLInfo.parse('http://somewhere.com/'),
            record
        ))
        self.assertTrue(url_filter.test(
            URLInfo.parse('https://somewhere.com/'),
            record
        ))

        record.inline_level = 1
        self.assertTrue(url_filter.test(
            URLInfo.parse('http://example.com/styles.css'),
            record
        ))
コード例 #7
0
    def test_span_hosts_filter(self):
        record = URLRecord()
        record.url = 'http://example.com'

        url_filter = SpanHostsFilter([
            URLInfo.parse('http://example.com/blog/').hostname,
        ],
            enabled=False
        )

        self.assertTrue(url_filter.test(
            URLInfo.parse('http://example.com/blog/topic1/blah.html'),
            record
        ))
        self.assertFalse(url_filter.test(
            URLInfo.parse('http://hotdog.example/blog/topic1/blah.html'),
            record
        ))

        url_filter = SpanHostsFilter([
            URLInfo.parse('http://example.com/blog/').hostname,
        ],
            enabled=True
        )
        self.assertTrue(url_filter.test(
            URLInfo.parse('http://example.com/blog/topic1/blah.html'),
            record
        ))
        self.assertTrue(url_filter.test(
            URLInfo.parse('http://hotdog.example/blog/topic1/blah.html'),
            record
        ))

        url_filter = SpanHostsFilter([
            URLInfo.parse('http://example.com/blog/').hostname,
        ],
            page_requisites=True
        )
        record = URLRecord()
        record.url = 'http://1.example.com/'
        record.inline_level = 1

        self.assertTrue(url_filter.test(
            URLInfo.parse('http://1.example.com/'),
            record
        ))

        url_filter = SpanHostsFilter([
            URLInfo.parse('http://example.com/blog/').hostname,
        ],
            linked_pages=True,
        )
        record = URLRecord()
        record.url = 'http://1.example.com/'
        record.parent_url = 'http://example.com/blog/'

        self.assertTrue(url_filter.test(
            URLInfo.parse('http://1.example.com/'),
            record
        ))

        record = URLRecord()
        record.url = 'http://1.example.com/blah.html'
        record.parent_url = 'http://1.example.com/'

        self.assertFalse(url_filter.test(
            URLInfo.parse('http://1.example.com/blah.html'),
            record
        ))