Example #1
0
    def test_empty_scalars_return_nullnode(self):
        node = Soupy('<a></a>').find('a')
        assert isinstance(node.next_sibling, NullNode)
        assert isinstance(node.previous_sibling, NullNode)

        node = Soupy('<a></a>')
        assert isinstance(node.parent, NullNode)
Example #2
0
    def test_failed_search(self):
        node = Soupy('<a><b>1</b></a><a>2</a>')

        with pytest.raises(NullValueError):
            node.find_all('a').dump(
                a=Q.find('b').text
            )
Example #3
0
    def test_orelse(self):
        node = Soupy('<a><b>1</b></a><a>2</a>')

        result = node.find_all('a').dump(
            a=Q.find('b').text.map(int).orelse(0)).val()

        assert result == [{'a': 1}, {'a': 0}]
Example #4
0
    def test_orelse(self):
        node = Soupy('<a><b>1</b></a><a>2</a>')

        result = node.find_all('a').dump(
            a=Q.find('b').text.map(int).orelse(0)
        ).val()

        assert result == [{'a': 1}, {'a': 0}]
Example #5
0
    def test_multi_dump(self):
        node = Soupy('<a val="1">1</a><a>2</a><a val="3">3</a>')

        result = node.find_all('a').dump(
            a=Q.text,
            b=Q.attrs.get('val')).val()
        assert result == [{'a': '1', 'b': '1'},
                          {'a': '2', 'b': None},
                          {'a': '3', 'b': '3'}]
Example #6
0
    def test_navstring_dump(self):
        node = Soupy('<div><a>1</a>2<a>3</a></div>')

        result = node.find('div').contents.each(Q.text).val()
        assert result == ['1', '2', '3']

        result = (node.find('div').contents.each(
            Q.contents[0].text.orelse('!')).val())
        assert result == ['1', '!', '3']
Example #7
0
    def test_navstring_dump(self):
        node = Soupy('<div><a>1</a>2<a>3</a></div>')

        result = node.find('div').contents.each(Q.text).val()
        assert result == ['1', '2', '3']

        result = (node.find('div').contents
                  .each(Q.contents[0].text.orelse('!'))
                  .val())
        assert result == ['1', '!', '3']
Example #8
0
    def test_simple_dump(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(a=Q.text).val()
        assert result == [{'a': '1'}, {'a': '2'}, {'a': '3'}]

        result = node.find_all('a').dump(Q.text).val()
        assert result == [('1',), ('2',), ('3',)]

        with pytest.raises(ValueError):
            node.find('a').dump(Q.text, a=Q.text)
Example #9
0
 def test_find_multi_methods(self, method):
     node = Soupy("""
         <div>
            <div></div>
            <b><div></div></b>
            <div></div>
        </div>
        """).find('b')
     dom = node.val()
     expected = getattr(dom, method)('div')
     assert expected
     actual = getattr(node, method)('div').val()
     assert actual == expected
Example #10
0
 def test_find_multi_methods(self, method):
     node = Soupy("""
         <div>
            <div></div>
            <b><div></div></b>
            <div></div>
        </div>
        """).find('b')
     dom = node.val()
     expected = getattr(dom, method)('div')
     assert expected
     actual = getattr(node, method)('div').val()
     assert actual == expected
Example #11
0
    def test_multi_dump(self):
        node = Soupy('<a val="1">1</a><a>2</a><a val="3">3</a>')

        result = node.find_all('a').dump(a=Q.text, b=Q.attrs.get('val')).val()
        assert result == [{
            'a': '1',
            'b': '1'
        }, {
            'a': '2',
            'b': None
        }, {
            'a': '3',
            'b': '3'
        }]
Example #12
0
 def get_chart(self, chart_name):
     self.chart_list = []
     self.chart_name = chart_name.lower()
     global KeyError
     try:
         number = self.chart_titles_dict[self.chart_name]
     except KeyError:
         #return json.dumps(["That chart does not exist"], indent = 2)
         raise ValueError('That chart does not exist')
     self.url = self.base_url + str(number)
     raw = requests.get(self.url)
     soup = Soupy(raw.text)
     tr_container = soup.find_all('tr', {'class': 'latc_song'})
     global NameError
     pos = 0
     song_title_constant = 2
     song_artist_constant = 3
     for table_row in tr_container:
         children = table_row.children
         null_container_holder = type(
             children[0].find('table').find_all('a'))
         for child in children:
             links = child.find('table').find_all('a')
             if type(links) is not null_container_holder:
                 try:
                     try:
                         pos = pos + 1
                         song_title = links[
                             song_title_constant].contents.first().val(
                             ).string
                         song_artist = links[
                             song_artist_constant].contents.first().val(
                             ).string
                         self.chart_list.append(
                             (('position', pos), ('title', song_title),
                              ('artist', song_artist)))
                     except NullValueError, NameError:
                         print('\n')
                 except NameError:
                     song_title = links[song_title_constant -
                                        1].contents.first().val().string
                     song_artist = links[song_artist_constant -
                                         1].contents.first().val().string
                     self.chart_list.append(
                         (('position', pos), ('title', song_title),
                          ('artist', song_artist)))
     return json.dumps(self.chart_list, indent=3)
Example #13
0
    def test_repr_unicode(self):

        s = Soupy('<html>∂ƒ</html>')
        print(s)
        print(repr(s))
        if not PY3:  # must be ascii-encodable on py2
            assert repr(s).encode('ascii')
        print(text_type(s))
Example #14
0
def check_podcast(type, url):
    global CHANNEL, videos, bot, apikey

    page = Soupy(urllib.urlopen(url))
    try:
        namenode = page.find("h2")
        latestname = namenode.text.val()
        if not latestname == videos[type]:
            latestdesc = page.find(class_="deck").text.val().strip()
            bot.say(CHANNEL, "[New %s] %s - %s %s" % (PODCAST_NAMES[type], latestname, latestdesc, url))
            log.info("New %s: %s" % (PODCAST_NAMES[type], latestname))
            videos[type] = latestname
            return True
        return False
    except:
        log.error("Failed checking for latest %s at %s" % (type, url))
        return False
Example #15
0
    def __init__(self, url, generator = correct_content_generator):

        self.url = url
        self.tags = []
        self.img_url = []

        soup = Soupy(download(url))

        self.title = soup.find('title').text.val() or 'Lorem Ipsum'
        self.safe_title = safe_chars(self.title)
        try:
            find_start_tag(soup)
        except NameError as err:
            generator = only_p_generator
        
        for tag in generator(soup):
            self.retrieve_file(tag)
Example #16
0
def check_podcast(type, url):
    global CHANNEL, videos, bot, apikey

    page = Soupy(urllib.urlopen(url))
    try:
        namenode = page.find("h2")
        latestname = namenode.text.val()
        if not latestname == videos[type]:
            latestdesc = page.find(class_="deck").text.val().strip()
            bot.say(
                CHANNEL, "[New %s] %s - %s %s" %
                (PODCAST_NAMES[type], latestname, latestdesc, url))
            log.info("New %s: %s" % (PODCAST_NAMES[type], latestname))
            videos[type] = latestname
            return True
        return False
    except:
        log.error("Failed checking for latest %s at %s" % (type, url))
        return False
Example #17
0
 def get_chart(self, chart_name):
     self.chart_list = []
     self.chart_name = chart_name.lower()
     global KeyError
     try:
         number = self.chart_titles_dict[self.chart_name]
     except KeyError:
         #return json.dumps(["That chart does not exist"], indent = 2)
         raise ValueError('That chart does not exist')
     self.url = self.base_url + str(number)
     raw = requests.get(self.url)
     soup = Soupy(raw.text)
     tr_container = soup.find_all('tr',{'class':'latc_song'})
     global NameError
     pos = 0
     song_title_constant = 2
     song_artist_constant = 3
     for table_row in tr_container:
         children = table_row.children
         null_container_holder = type(children[0].find('table').find_all('a'))
         for child in children:
             links = child.find('table').find_all('a')
             if type(links) is not null_container_holder:
                 try:
                     try:
                         pos = pos + 1
                         song_title = links[song_title_constant].contents.first().val().string
                         song_artist = links[song_artist_constant].contents.first().val().string
                         self.chart_list.append((('position',pos),('title',song_title), ('artist',song_artist)))
                     except NullValueError, NameError:
                         print ('\n')
                 except NameError:
                     song_title = links[song_title_constant-1].contents.first().val().string
                     song_artist = links[song_artist_constant-1].contents.first().val().string
                     self.chart_list.append((('position',pos),('title',song_title), ('artist',song_artist)))
     return json.dumps(self.chart_list, indent = 3)
Example #18
0
    def test_simple_dump(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(a=Q.text).val()
        assert result == [{'a': '1'}, {'a': '2'}, {'a': '3'}]

        result = node.find_all('a').dump(Q.text).val()
        assert result == [('1', ), ('2', ), ('3', )]

        with pytest.raises(ValueError):
            node.find('a').dump(Q.text, a=Q.text)
Example #19
0
    def test_dump_with_map(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(
            a=Q.text.map(int)).val()
        assert result == [{'a': 1}, {'a': 2}, {'a': 3}]
Example #20
0
    def test_dump_with_getitem(self):
        node = Soupy('<a val="1">1</a>')

        result = node.find_all('a').dump(
            a=Q.attrs["val"]).val()
        assert result == [{'a': "1"}]
Example #21
0
    def test_simple_dump(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(
            a=Q.text).val()
        assert result == [{'a': '1'}, {'a': '2'}, {'a': '3'}]
Example #22
0
 def setup_method(self, method):
     self.node = Soupy('<html><body><a>1</a><a>2</a><a>3</a></body></html>')
Example #23
0
    def test_prettify(self):
        s = Soupy('<html>∂ƒ</html>')

        assert s.prettify() == s.val().prettify()
Example #24
0
 def test_call(self):
     node = Soupy('<a class="test">test</a>')
     assert node('a').val() == node.val()('a')
Example #25
0
 def test_iter(self):
     node = Soupy('<a class="test">test</a>')
     for a, b in zip(node, node.val()):
         assert a.val() == b
Example #26
0
    def test_dump_with_map(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(a=Q.text.map(int)).val()
        assert result == [{'a': 1}, {'a': 2}, {'a': 3}]
Example #27
0
    def test_dump_with_getitem(self):
        node = Soupy('<a val="1">1</a>')

        result = node.find_all('a').dump(a=Q.attrs["val"]).val()
        assert result == [{'a': "1"}]
Example #28
0
    def test_dump_with_method(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(a=Q.find('b').orelse('')).val()
        assert result == [{'a': ''}, {'a': ''}, {'a': ''}]
Example #29
0
    def test_simple_dump(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(a=Q.text).val()
        assert result == [{'a': '1'}, {'a': '2'}, {'a': '3'}]
Example #30
0
 def test_scalar_properties(self, attr):
     node = Soupy('<a class="foo"><b><c>test</c></b></a>').find('c')
     dom = node.val()
     assert getattr(node, attr).val() == getattr(dom, attr)
Example #31
0
 def test_collection_properties(self, attr):
     node = Soupy('<a class="foo"><b><c>test</c></b></a>').find('b')
     dom = node.val()
     assert list(getattr(node, attr).val()) == list(getattr(dom, attr))
Example #32
0
def return_all_links(url):    
    soup = Soupy(download(url))
    return [tag for tag in soup.find_all('a') if is_tag_not_anchor(tag)]
Example #33
0
    def test_repr_unicode(self):

        s = Collection([Soupy('<html>∂ƒ</html>')])
        print(s)
        print(repr(s))
        print(text_type(s))
Example #34
0
    def test_dump_with_multi_map(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(
            a=Q.text.map(int).map(lambda x: x * 2)).val()
        assert result == [{'a': 2}, {'a': 4}, {'a': 6}]
Example #35
0
 def test_iter(self):
     node = Soupy('<a class="test">test</a>')
     for a, b in zip(node, node.val()):
         assert a.val() == b
Example #36
0
class TestCollection(object):
    def setup_method(self, method):
        self.node = Soupy('<html><body><a>1</a><a>2</a><a>3</a></body></html>')

    def test_slice(self):
        node = self.node
        dom = node.val()

        assert isinstance(node.children[::2], Collection)
        assert node.children[::2].val() == list(dom.children)[::2]

    def test_slice_on_iterator(self):
        c = Collection((Scalar(i) for i in range(5)))
        assert c[::2].val() == [0, 2, 4]

    def test_get_single(self):
        node = self.node.find('body')
        dom = node.val()
        assert node.children[1].val() == dom.contents[1]

    def test_get_single_on_iterator(self):
        c = Collection((Scalar(i) for i in range(5)))
        assert c[2].val() == 2

    def test_map(self):
        node = self.node
        assert node.find_all('a').map(len).val() == 3

    def test_first(self):
        node = self.node
        assert node.find_all('a').first().text.val() == '1'

    def test_first_empty(self):
        node = self.node
        assert isinstance(node.find_all('x').first(), NullNode)

    def test_each(self):
        node = self.node
        result = node.find_all('a').each(Q.text.map(int)).val()
        assert result == [1, 2, 3]

    def test_multi_each(self):
        node = self.node
        result = node.find_all('a').each(Q.text.map(int), Q.text).val()
        assert result == [(1, '1'), (2, '2'), (3, '3')]

    def test_filter(self):
        node = self.node
        result = node.find_all('a').filter(Q.text.map(int) > 1).val()
        assert len(result) == 2

    def test_filter(self):
        node = self.node
        result = node.find_all('a').exclude(Q.text.map(int) > 1).val()
        assert len(result) == 1

    def test_filter_noarg(self):
        node = self.node
        result = node.find_all('a').each(Q.text.map(int) > 1).filter().val()
        assert len(result) == 2

    def test_takewhile(self):
        node = self.node
        result = node.find_all('a').takewhile(Q.text.map(int) < 2).val()
        assert len(result) == 1

    def test_dropwhile(self):
        node = self.node

        result = node.find_all('a').dropwhile(Q.text.map(int) < 2).val()
        assert len(result) == 2

    def test_index_oob(self):
        assert isinstance(Collection([])[5], NullNode)

    def test_bool(self):

        assert Collection([Scalar(1)])
        assert not Collection([])

    def test_count(self):

        assert Collection([]).count().val() == 0
        assert Collection([Scalar(1)]).count().val() == 1
        assert NullCollection().count().val() == 0

    def test_repr_unicode(self):

        s = Collection([Soupy('<html>∂ƒ</html>')])
        print(s)
        print(repr(s))
        print(text_type(s))

    def test_zip(self):

        c1 = Collection(map(Scalar, [1, 2, 3]))
        c2 = c1.each(Q * 2)
        c3 = c1.zip(c2)

        assert c3.val() == [(1, 2), (2, 4), (3, 6)]

        with pytest.raises(ValueError):
            c1.zip([1, 2])

    def test_dictzip(self):

        c = Collection([Scalar(1), Scalar(2)])
        result = c.dictzip(['a', 'b'])
        expected = {'a': 1, 'b': 2}

        assert result.val() == expected

        lbls = Collection([Scalar('a'), Scalar('b')])
        assert c.dictzip(lbls).val() == expected

    def test_list(self):

        items = list(map(Scalar, [1, 2]))
        assert list(Collection(items)) == items

    def test_typecheck(self):
        """ Collections must contain wrappers """
        with pytest.raises(TypeError):
            Collection([1])

    def test_all(self):

        c = Collection(map(Scalar, [True, False]))
        assert c.any().val()
        assert not c.all().val()
        assert not c.none().val()

        c = Collection(map(Scalar, [True, True]))
        assert c.any().val()
        assert c.all().val()
        assert not c.none().val()

        c = Collection(map(Scalar, [False, False]))
        assert not c.any().val()
        assert not c.all().val()
        assert c.none().val()

        c = Collection([])
        assert not c.any().val()
        assert c.none().val()
        assert c.all().val()  # this is python's behavior for empty lists
Example #37
0
 def test_nonnull_returns_self(self):
     s = Soupy('')
     assert s.nonnull() == s
Example #38
0
    def test_failed_search(self):
        node = Soupy('<a><b>1</b></a><a>2</a>')

        with pytest.raises(NullValueError):
            node.find_all('a').dump(a=Q.find('b').text)
Example #39
0
class TestCollection(object):

    def setup_method(self, method):
        self.node = Soupy('<html><body><a>1</a><a>2</a><a>3</a></body></html>')

    def test_slice(self):
        node = self.node
        dom = node.val()

        assert isinstance(node.children[::2], Collection)
        assert node.children[::2].val() == list(dom.children)[::2]

    def test_slice_on_iterator(self):
        c = Collection((Scalar(i) for i in range(5)))
        assert c[::2].val() == [0, 2, 4]

    def test_get_single(self):
        node = self.node.find('body')
        dom = node.val()
        assert node.children[1].val() == dom.contents[1]

    def test_get_single_on_iterator(self):
        c = Collection((Scalar(i) for i in range(5)))
        assert c[2].val() == 2

    def test_map(self):
        node = self.node
        assert node.find_all('a').map(len).val() == 3

    def test_first(self):
        node = self.node
        assert node.find_all('a').first().text.val() == '1'

    def test_first_empty(self):
        node = self.node
        assert isinstance(node.find_all('x').first(), NullNode)

    def test_each(self):
        node = self.node
        result = node.find_all('a').each(Q.text.map(int)).val()
        assert result == [1, 2, 3]

    def test_multi_each(self):
        node = self.node
        result = node.find_all('a').each(Q.text.map(int),
                                         Q.text).val()
        assert result == [(1, '1'), (2, '2'), (3, '3')]

    def test_filter(self):
        node = self.node
        result = node.find_all('a').filter(Q.text.map(int) > 1).val()
        assert len(result) == 2

    def test_filter(self):
        node = self.node
        result = node.find_all('a').exclude(Q.text.map(int) > 1).val()
        assert len(result) == 1

    def test_filter_noarg(self):
        node = self.node
        result = node.find_all('a').each(Q.text.map(int) > 1).filter().val()
        assert len(result) == 2

    def test_takewhile(self):
        node = self.node
        result = node.find_all('a').takewhile(Q.text.map(int) < 2).val()
        assert len(result) == 1

    def test_dropwhile(self):
        node = self.node

        result = node.find_all('a').dropwhile(Q.text.map(int) < 2).val()
        assert len(result) == 2

    def test_index_oob(self):
        assert isinstance(Collection([])[5], NullNode)

    def test_bool(self):

        assert Collection([Scalar(1)])
        assert not Collection([])

    def test_count(self):

        assert Collection([]).count().val() == 0
        assert Collection([Scalar(1)]).count().val() == 1
        assert NullCollection().count().val() == 0

    def test_repr_unicode(self):

        s = Collection([Soupy('<html>∂ƒ</html>')])
        print(s)
        print(repr(s))
        print(text_type(s))

    def test_zip(self):

        c1 = Collection(map(Scalar, [1, 2, 3]))
        c2 = c1.each(Q * 2)
        c3 = c1.zip(c2)

        assert c3.val() == [(1, 2), (2, 4), (3, 6)]

        with pytest.raises(ValueError):
            c1.zip([1, 2])

    def test_dictzip(self):

        c = Collection([Scalar(1), Scalar(2)])
        result = c.dictzip(['a', 'b'])
        expected = {'a': 1, 'b': 2}

        assert result.val() == expected

        lbls = Collection([Scalar('a'), Scalar('b')])
        assert c.dictzip(lbls).val() == expected

    def test_list(self):

        items = list(map(Scalar, [1, 2]))
        assert list(Collection(items)) == items

    def test_typecheck(self):
        """ Collections must contain wrappers """
        with pytest.raises(TypeError):
            Collection([1])

    def test_all(self):

        c = Collection(map(Scalar, [True, False]))
        assert c.any().val()
        assert not c.all().val()
        assert not c.none().val()

        c = Collection(map(Scalar, [True, True]))
        assert c.any().val()
        assert c.all().val()
        assert not c.none().val()

        c = Collection(map(Scalar, [False, False]))
        assert not c.any().val()
        assert not c.all().val()
        assert c.none().val()

        c = Collection([])
        assert not c.any().val()
        assert c.none().val()
        assert c.all().val()  # this is python's behavior for empty lists
Example #40
0
    def test_prettify(self):
        s = Soupy('<html>∂ƒ</html>')

        assert s.prettify() == s.val().prettify()
Example #41
0
 def test_collection_properties(self, attr):
     node = Soupy('<a class="foo"><b><c>test</c></b></a>').find('b')
     dom = node.val()
     assert list(getattr(node, attr).val()) == list(getattr(dom, attr))
Example #42
0
    def test_either_fallback(self):
        node = Soupy('<a><b>1</b></a><a>2</a>')

        assert isinstance(node.apply(either(Q.find('d').text,
                                            Q.find('d').text)),
                          Null)
Example #43
0
    def test_dump_with_method(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(
            a=Q.find('b').orelse('')).val()
        assert result == [{'a': ''}, {'a': ''}, {'a': ''}]
Example #44
0
 def test_nonnull_returns_self(self):
     s = Soupy('')
     assert s.nonnull() == s
Example #45
0
 def test_scalar_properties(self, attr):
     node = Soupy('<a class="foo"><b><c>test</c></b></a>').find('c')
     dom = node.val()
     assert getattr(node, attr).val() == getattr(dom, attr)
Example #46
0
 def test_find_multi_fail(self, method):
     node = Soupy('<a class="test">test</a>')
     result = getattr(node, method)('b')
     assert len(result) == 0
Example #47
0
    def test_dump_with_multi_map(self):
        node = Soupy('<a>1</a><a>2</a><a>3</a>')

        result = node.find_all('a').dump(
            a=Q.text.map(int).map(lambda x: x * 2)).val()
        assert result == [{'a': 2}, {'a': 4}, {'a': 6}]
Example #48
0
 def test_call(self):
     node = Soupy('<a class="test">test</a>')
     assert node('a').val() == node.val()('a')
Example #49
0
 def test_node_properties(self, attr):
     node = Soupy('<b><d></d><c>test</c><d></d></b>').find('c')
     dom = node.val()
     assert getattr(node, attr).val() == getattr(dom, attr)
Example #50
0
 def setup_method(self, method):
     self.node = Soupy('<html><body><a>1</a><a>2</a><a>3</a></body></html>')
Example #51
0
    def test_either(self):
        node = Soupy('<a><b>1</b></a><a>2</a>')

        assert node.apply(either(Q.find('c').text,
                                 Q.find('b').text)).val() == '1'
Example #52
0
    def test_either(self):
        node = Soupy('<a><b>1</b></a><a>2</a>')

        assert node.apply(either(Q.find('c').text,
                                 Q.find('b').text)).val() == '1'
Example #53
0
 def test_node_properties(self, attr):
     node = Soupy('<b><d></d><c>test</c><d></d></b>').find('c')
     dom = node.val()
     assert getattr(node, attr).val() == getattr(dom, attr)
Example #54
0
    def test_either_fallback(self):
        node = Soupy('<a><b>1</b></a><a>2</a>')

        assert isinstance(
            node.apply(either(Q.find('d').text,
                              Q.find('d').text)), Null)
Example #55
0
    def test_repr_unicode(self):

        s = Soupy('<html>∂ƒ</html>')
        print(s)
        print(repr(s))
        print(text_type(s))
Example #56
0
 def test_find_single_fail(self, method):
     node = Soupy('<a class="test">test</a>')
     assert isinstance(getattr(node, method)('b'), NullNode)