def test_parent_filter(self): record = URLRecord() url_filter = ParentFilter() record.root_url = 'http://example.com/blog/topic2/' self.assertTrue(url_filter.test( URLInfo.parse('http://example.com/blog/topic2/'), record )) record.root_url = 'http://example.com/blog/topic1/' self.assertTrue(url_filter.test( URLInfo.parse('http://example.com/blog/topic1/blah.html'), record )) self.assertTrue(url_filter.test( URLInfo.parse('https://example.com/blog/topic1/blah2.html'), record )) self.assertFalse(url_filter.test( URLInfo.parse('http://example.com/blog/'), record )) self.assertFalse(url_filter.test( URLInfo.parse('https://example.com/blog/'), record )) self.assertTrue(url_filter.test( URLInfo.parse('http://somewhere.com/'), record )) self.assertTrue(url_filter.test( URLInfo.parse('https://somewhere.com/'), record )) record.inline_level = 1 self.assertTrue(url_filter.test( URLInfo.parse('http://example.com/styles.css'), record ))
def to_plain(self) -> URLRecord: record = URLRecord() record.url = self.url record.parent_url = self.parent_url record.root_url = self.root_url record.status = Status(self.status) record.try_count = self.try_count record.level = self.level record.inline_level = self.inline_level record.link_type = LinkType(self.link_type) if self.link_type else None record.priority = self.priority record.post_data = self.post_data record.status_code = self.status_code record.filename = self.filename return record
def child_url_record(self, url: str, inline: bool=False, link_type: Optional[LinkType]=None, post_data: Optional[str]=None, level: Optional[int]=None): '''Return a child URLRecord. This function is useful for testing filters before adding to table. ''' url_record = URLRecord() url_record.url = url url_record.status = Status.todo url_record.try_count = 0 url_record.level = self.url_record.level + 1 if level is None else level url_record.root_url = self.url_record.root_url or self.url_record.url url_record.parent_url = self.url_record.url url_record.inline_level = (self.url_record.inline_level or 0) + 1 if inline else 0 url_record.link_type = link_type url_record.post_data = post_data return url_record