예제 #1
0
    def test_navstring_dump(self):
        node = Soupy('<div><a>1</a>2<a>3</a></div>')

        result = node.find('div').contents.each(Q.text).val()
        assert result == ['1', '2', '3']

        result = (node.find('div').contents.each(
            Q.contents[0].text.orelse('!')).val())
        assert result == ['1', '!', '3']
예제 #2
0
    def test_navstring_dump(self):
        node = Soupy('<div><a>1</a>2<a>3</a></div>')

        result = node.find('div').contents.each(Q.text).val()
        assert result == ['1', '2', '3']

        result = (node.find('div').contents
                  .each(Q.contents[0].text.orelse('!'))
                  .val())
        assert result == ['1', '!', '3']
예제 #3
0
    def test_simple_dump(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(a=Q.text).val()
        assert result == [{'a': '1'}, {'a': '2'}, {'a': '3'}]

        result = node.find_all('a').dump(Q.text).val()
        assert result == [('1',), ('2',), ('3',)]

        with pytest.raises(ValueError):
            node.find('a').dump(Q.text, a=Q.text)
예제 #4
0
    def test_simple_dump(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(a=Q.text).val()
        assert result == [{'a': '1'}, {'a': '2'}, {'a': '3'}]

        result = node.find_all('a').dump(Q.text).val()
        assert result == [('1', ), ('2', ), ('3', )]

        with pytest.raises(ValueError):
            node.find('a').dump(Q.text, a=Q.text)
예제 #5
0
def check_podcast(type, url):
    global CHANNEL, videos, bot, apikey

    page = Soupy(urllib.urlopen(url))
    try:
        namenode = page.find("h2")
        latestname = namenode.text.val()
        if not latestname == videos[type]:
            latestdesc = page.find(class_="deck").text.val().strip()
            bot.say(CHANNEL, "[New %s] %s - %s %s" % (PODCAST_NAMES[type], latestname, latestdesc, url))
            log.info("New %s: %s" % (PODCAST_NAMES[type], latestname))
            videos[type] = latestname
            return True
        return False
    except:
        log.error("Failed checking for latest %s at %s" % (type, url))
        return False
예제 #6
0
def check_podcast(type, url):
    global CHANNEL, videos, bot, apikey

    page = Soupy(urllib.urlopen(url))
    try:
        namenode = page.find("h2")
        latestname = namenode.text.val()
        if not latestname == videos[type]:
            latestdesc = page.find(class_="deck").text.val().strip()
            bot.say(
                CHANNEL, "[New %s] %s - %s %s" %
                (PODCAST_NAMES[type], latestname, latestdesc, url))
            log.info("New %s: %s" % (PODCAST_NAMES[type], latestname))
            videos[type] = latestname
            return True
        return False
    except:
        log.error("Failed checking for latest %s at %s" % (type, url))
        return False
예제 #7
0
파일: pyn.py 프로젝트: flyingjam/epubmaker
    def __init__(self, url, generator = correct_content_generator):

        self.url = url
        self.tags = []
        self.img_url = []

        soup = Soupy(download(url))

        self.title = soup.find('title').text.val() or 'Lorem Ipsum'
        self.safe_title = safe_chars(self.title)
        try:
            find_start_tag(soup)
        except NameError as err:
            generator = only_p_generator
        
        for tag in generator(soup):
            self.retrieve_file(tag)
예제 #8
0
class TestCollection(object):

    def setup_method(self, method):
        self.node = Soupy('<html><body><a>1</a><a>2</a><a>3</a></body></html>')

    def test_slice(self):
        node = self.node
        dom = node.val()

        assert isinstance(node.children[::2], Collection)
        assert node.children[::2].val() == list(dom.children)[::2]

    def test_slice_on_iterator(self):
        c = Collection((Scalar(i) for i in range(5)))
        assert c[::2].val() == [0, 2, 4]

    def test_get_single(self):
        node = self.node.find('body')
        dom = node.val()
        assert node.children[1].val() == dom.contents[1]

    def test_get_single_on_iterator(self):
        c = Collection((Scalar(i) for i in range(5)))
        assert c[2].val() == 2

    def test_map(self):
        node = self.node
        assert node.find_all('a').map(len).val() == 3

    def test_first(self):
        node = self.node
        assert node.find_all('a').first().text.val() == '1'

    def test_first_empty(self):
        node = self.node
        assert isinstance(node.find_all('x').first(), NullNode)

    def test_each(self):
        node = self.node
        result = node.find_all('a').each(Q.text.map(int)).val()
        assert result == [1, 2, 3]

    def test_multi_each(self):
        node = self.node
        result = node.find_all('a').each(Q.text.map(int),
                                         Q.text).val()
        assert result == [(1, '1'), (2, '2'), (3, '3')]

    def test_filter(self):
        node = self.node
        result = node.find_all('a').filter(Q.text.map(int) > 1).val()
        assert len(result) == 2

    def test_filter(self):
        node = self.node
        result = node.find_all('a').exclude(Q.text.map(int) > 1).val()
        assert len(result) == 1

    def test_filter_noarg(self):
        node = self.node
        result = node.find_all('a').each(Q.text.map(int) > 1).filter().val()
        assert len(result) == 2

    def test_takewhile(self):
        node = self.node
        result = node.find_all('a').takewhile(Q.text.map(int) < 2).val()
        assert len(result) == 1

    def test_dropwhile(self):
        node = self.node

        result = node.find_all('a').dropwhile(Q.text.map(int) < 2).val()
        assert len(result) == 2

    def test_index_oob(self):
        assert isinstance(Collection([])[5], NullNode)

    def test_bool(self):

        assert Collection([Scalar(1)])
        assert not Collection([])

    def test_count(self):

        assert Collection([]).count().val() == 0
        assert Collection([Scalar(1)]).count().val() == 1
        assert NullCollection().count().val() == 0

    def test_repr_unicode(self):

        s = Collection([Soupy('<html>∂ƒ</html>')])
        print(s)
        print(repr(s))
        print(text_type(s))

    def test_zip(self):

        c1 = Collection(map(Scalar, [1, 2, 3]))
        c2 = c1.each(Q * 2)
        c3 = c1.zip(c2)

        assert c3.val() == [(1, 2), (2, 4), (3, 6)]

        with pytest.raises(ValueError):
            c1.zip([1, 2])

    def test_dictzip(self):

        c = Collection([Scalar(1), Scalar(2)])
        result = c.dictzip(['a', 'b'])
        expected = {'a': 1, 'b': 2}

        assert result.val() == expected

        lbls = Collection([Scalar('a'), Scalar('b')])
        assert c.dictzip(lbls).val() == expected

    def test_list(self):

        items = list(map(Scalar, [1, 2]))
        assert list(Collection(items)) == items

    def test_typecheck(self):
        """ Collections must contain wrappers """
        with pytest.raises(TypeError):
            Collection([1])

    def test_all(self):

        c = Collection(map(Scalar, [True, False]))
        assert c.any().val()
        assert not c.all().val()
        assert not c.none().val()

        c = Collection(map(Scalar, [True, True]))
        assert c.any().val()
        assert c.all().val()
        assert not c.none().val()

        c = Collection(map(Scalar, [False, False]))
        assert not c.any().val()
        assert not c.all().val()
        assert c.none().val()

        c = Collection([])
        assert not c.any().val()
        assert c.none().val()
        assert c.all().val()  # this is python's behavior for empty lists
예제 #9
0
class TestCollection(object):
    def setup_method(self, method):
        self.node = Soupy('<html><body><a>1</a><a>2</a><a>3</a></body></html>')

    def test_slice(self):
        node = self.node
        dom = node.val()

        assert isinstance(node.children[::2], Collection)
        assert node.children[::2].val() == list(dom.children)[::2]

    def test_slice_on_iterator(self):
        c = Collection((Scalar(i) for i in range(5)))
        assert c[::2].val() == [0, 2, 4]

    def test_get_single(self):
        node = self.node.find('body')
        dom = node.val()
        assert node.children[1].val() == dom.contents[1]

    def test_get_single_on_iterator(self):
        c = Collection((Scalar(i) for i in range(5)))
        assert c[2].val() == 2

    def test_map(self):
        node = self.node
        assert node.find_all('a').map(len).val() == 3

    def test_first(self):
        node = self.node
        assert node.find_all('a').first().text.val() == '1'

    def test_first_empty(self):
        node = self.node
        assert isinstance(node.find_all('x').first(), NullNode)

    def test_each(self):
        node = self.node
        result = node.find_all('a').each(Q.text.map(int)).val()
        assert result == [1, 2, 3]

    def test_multi_each(self):
        node = self.node
        result = node.find_all('a').each(Q.text.map(int), Q.text).val()
        assert result == [(1, '1'), (2, '2'), (3, '3')]

    def test_filter(self):
        node = self.node
        result = node.find_all('a').filter(Q.text.map(int) > 1).val()
        assert len(result) == 2

    def test_filter(self):
        node = self.node
        result = node.find_all('a').exclude(Q.text.map(int) > 1).val()
        assert len(result) == 1

    def test_filter_noarg(self):
        node = self.node
        result = node.find_all('a').each(Q.text.map(int) > 1).filter().val()
        assert len(result) == 2

    def test_takewhile(self):
        node = self.node
        result = node.find_all('a').takewhile(Q.text.map(int) < 2).val()
        assert len(result) == 1

    def test_dropwhile(self):
        node = self.node

        result = node.find_all('a').dropwhile(Q.text.map(int) < 2).val()
        assert len(result) == 2

    def test_index_oob(self):
        assert isinstance(Collection([])[5], NullNode)

    def test_bool(self):

        assert Collection([Scalar(1)])
        assert not Collection([])

    def test_count(self):

        assert Collection([]).count().val() == 0
        assert Collection([Scalar(1)]).count().val() == 1
        assert NullCollection().count().val() == 0

    def test_repr_unicode(self):

        s = Collection([Soupy('<html>∂ƒ</html>')])
        print(s)
        print(repr(s))
        print(text_type(s))

    def test_zip(self):

        c1 = Collection(map(Scalar, [1, 2, 3]))
        c2 = c1.each(Q * 2)
        c3 = c1.zip(c2)

        assert c3.val() == [(1, 2), (2, 4), (3, 6)]

        with pytest.raises(ValueError):
            c1.zip([1, 2])

    def test_dictzip(self):

        c = Collection([Scalar(1), Scalar(2)])
        result = c.dictzip(['a', 'b'])
        expected = {'a': 1, 'b': 2}

        assert result.val() == expected

        lbls = Collection([Scalar('a'), Scalar('b')])
        assert c.dictzip(lbls).val() == expected

    def test_list(self):

        items = list(map(Scalar, [1, 2]))
        assert list(Collection(items)) == items

    def test_typecheck(self):
        """ Collections must contain wrappers """
        with pytest.raises(TypeError):
            Collection([1])

    def test_all(self):

        c = Collection(map(Scalar, [True, False]))
        assert c.any().val()
        assert not c.all().val()
        assert not c.none().val()

        c = Collection(map(Scalar, [True, True]))
        assert c.any().val()
        assert c.all().val()
        assert not c.none().val()

        c = Collection(map(Scalar, [False, False]))
        assert not c.any().val()
        assert not c.all().val()
        assert c.none().val()

        c = Collection([])
        assert not c.any().val()
        assert c.none().val()
        assert c.all().val()  # this is python's behavior for empty lists