def test_navstring_dump(self): node = Soupy('<div><a>1</a>2<a>3</a></div>') result = node.find('div').contents.each(Q.text).val() assert result == ['1', '2', '3'] result = (node.find('div').contents.each( Q.contents[0].text.orelse('!')).val()) assert result == ['1', '!', '3']
def test_navstring_dump(self): node = Soupy('<div><a>1</a>2<a>3</a></div>') result = node.find('div').contents.each(Q.text).val() assert result == ['1', '2', '3'] result = (node.find('div').contents .each(Q.contents[0].text.orelse('!')) .val()) assert result == ['1', '!', '3']
def test_simple_dump(self): node = Soupy('<a>1</a><a>2</a><a>3</a>') result = node.find_all('a').dump(a=Q.text).val() assert result == [{'a': '1'}, {'a': '2'}, {'a': '3'}] result = node.find_all('a').dump(Q.text).val() assert result == [('1',), ('2',), ('3',)] with pytest.raises(ValueError): node.find('a').dump(Q.text, a=Q.text)
def test_simple_dump(self): node = Soupy('<a>1</a><a>2</a><a>3</a>') result = node.find_all('a').dump(a=Q.text).val() assert result == [{'a': '1'}, {'a': '2'}, {'a': '3'}] result = node.find_all('a').dump(Q.text).val() assert result == [('1', ), ('2', ), ('3', )] with pytest.raises(ValueError): node.find('a').dump(Q.text, a=Q.text)
def check_podcast(type, url): global CHANNEL, videos, bot, apikey page = Soupy(urllib.urlopen(url)) try: namenode = page.find("h2") latestname = namenode.text.val() if not latestname == videos[type]: latestdesc = page.find(class_="deck").text.val().strip() bot.say(CHANNEL, "[New %s] %s - %s %s" % (PODCAST_NAMES[type], latestname, latestdesc, url)) log.info("New %s: %s" % (PODCAST_NAMES[type], latestname)) videos[type] = latestname return True return False except: log.error("Failed checking for latest %s at %s" % (type, url)) return False
def check_podcast(type, url): global CHANNEL, videos, bot, apikey page = Soupy(urllib.urlopen(url)) try: namenode = page.find("h2") latestname = namenode.text.val() if not latestname == videos[type]: latestdesc = page.find(class_="deck").text.val().strip() bot.say( CHANNEL, "[New %s] %s - %s %s" % (PODCAST_NAMES[type], latestname, latestdesc, url)) log.info("New %s: %s" % (PODCAST_NAMES[type], latestname)) videos[type] = latestname return True return False except: log.error("Failed checking for latest %s at %s" % (type, url)) return False
def __init__(self, url, generator = correct_content_generator): self.url = url self.tags = [] self.img_url = [] soup = Soupy(download(url)) self.title = soup.find('title').text.val() or 'Lorem Ipsum' self.safe_title = safe_chars(self.title) try: find_start_tag(soup) except NameError as err: generator = only_p_generator for tag in generator(soup): self.retrieve_file(tag)
class TestCollection(object): def setup_method(self, method): self.node = Soupy('<html><body><a>1</a><a>2</a><a>3</a></body></html>') def test_slice(self): node = self.node dom = node.val() assert isinstance(node.children[::2], Collection) assert node.children[::2].val() == list(dom.children)[::2] def test_slice_on_iterator(self): c = Collection((Scalar(i) for i in range(5))) assert c[::2].val() == [0, 2, 4] def test_get_single(self): node = self.node.find('body') dom = node.val() assert node.children[1].val() == dom.contents[1] def test_get_single_on_iterator(self): c = Collection((Scalar(i) for i in range(5))) assert c[2].val() == 2 def test_map(self): node = self.node assert node.find_all('a').map(len).val() == 3 def test_first(self): node = self.node assert node.find_all('a').first().text.val() == '1' def test_first_empty(self): node = self.node assert isinstance(node.find_all('x').first(), NullNode) def test_each(self): node = self.node result = node.find_all('a').each(Q.text.map(int)).val() assert result == [1, 2, 3] def test_multi_each(self): node = self.node result = node.find_all('a').each(Q.text.map(int), Q.text).val() assert result == [(1, '1'), (2, '2'), (3, '3')] def test_filter(self): node = self.node result = node.find_all('a').filter(Q.text.map(int) > 1).val() assert len(result) == 2 def test_filter(self): node = self.node result = node.find_all('a').exclude(Q.text.map(int) > 1).val() assert len(result) == 1 def test_filter_noarg(self): node = self.node result = node.find_all('a').each(Q.text.map(int) > 1).filter().val() assert len(result) == 2 def test_takewhile(self): node = self.node result = node.find_all('a').takewhile(Q.text.map(int) < 2).val() assert len(result) == 1 def test_dropwhile(self): node = self.node result = node.find_all('a').dropwhile(Q.text.map(int) < 2).val() assert len(result) == 2 def test_index_oob(self): assert isinstance(Collection([])[5], NullNode) def test_bool(self): assert Collection([Scalar(1)]) assert not Collection([]) def test_count(self): assert Collection([]).count().val() == 0 assert Collection([Scalar(1)]).count().val() == 1 assert NullCollection().count().val() == 0 def test_repr_unicode(self): s = Collection([Soupy('<html>∂ƒ</html>')]) print(s) print(repr(s)) print(text_type(s)) def test_zip(self): c1 = Collection(map(Scalar, [1, 2, 3])) c2 = c1.each(Q * 2) c3 = c1.zip(c2) assert c3.val() == [(1, 2), (2, 4), (3, 6)] with pytest.raises(ValueError): c1.zip([1, 2]) def test_dictzip(self): c = Collection([Scalar(1), Scalar(2)]) result = c.dictzip(['a', 'b']) expected = {'a': 1, 'b': 2} assert result.val() == expected lbls = Collection([Scalar('a'), Scalar('b')]) assert c.dictzip(lbls).val() == expected def test_list(self): items = list(map(Scalar, [1, 2])) assert list(Collection(items)) == items def test_typecheck(self): """ Collections must contain wrappers """ with pytest.raises(TypeError): Collection([1]) def test_all(self): c = Collection(map(Scalar, [True, False])) assert c.any().val() assert not c.all().val() assert not c.none().val() c = Collection(map(Scalar, [True, True])) assert c.any().val() assert c.all().val() assert not c.none().val() c = Collection(map(Scalar, [False, False])) assert not c.any().val() assert not c.all().val() assert c.none().val() c = Collection([]) assert not c.any().val() assert c.none().val() assert c.all().val() # this is python's behavior for empty lists