Ejemplos de _stripper en Python, ejemplos de helpers._stripper en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: wishlist.py Proyecto: tonylaw7/Amazon-Wishlist

    def authors(self):
        """Returns the authors names, co-writers or manufacturers for every item.

        >>> authors = wishlist.authors()
        """
        ret = []
        attr = ('de ', 'di ', 'by ', 'von ')
        for author in self.page.xpath("//div[@class='pTitle']"):
            subtree = helpers.tostring(author,
                                       encoding='unicode',
                                       method='html',
                                       pretty_print=True)
            if 'span' in subtree:
                parser = helpers.etree.HTMLParser()
                div = helpers.etree.fromstring(subtree, parser)
                res = div.xpath("//span[@class='small itemByline']//text()")
                for author in res:
                    author = author.replace('~', '').strip()
                    if author.startswith(tuple(attr)):
                        author = author[3:].strip()
                        ret.append(helpers._stripper(author))
                    else:
                        ret.append(helpers._stripper(author))
            else:
                ret.append(ur'')
        dirt = ['DVD', 'VHS']
        for item in dirt:
            while item in ret:
                ret.remove(item)
        return ret

Ejemplo n.º 2

0

Mostrar archivo

Archivo: profile.py Proyecto: tonylaw7/Amazon-Wishlist

    def wishlists_details(self):
        """
        Returns a tuple with lists, the first with all wishlists
        codes and the second with their total number of items
        (i.e. wishlist size).

        >>> details = person.wishlists_details()
        """
        retcodes = []
        for code in self.page.xpath("//div[@id='profile']/div[@id='regListpublicBlock']/div/@id"):
            retcodes.append(helpers._stripper(code.replace('regListsList', '')))

        retsizes = []
        for size in self.page.xpath("//div[@id='profile']/div[@id='regListpublicBlock']/div/div/span[1]//text()"):
            retsizes.append(helpers._stripper(size))

        return retcodes, retsizes

Ejemplo n.º 3

0

Mostrar archivo

Archivo: profile.py Proyecto: tonylaw7/Amazon-Wishlist

    def basic_info(self):
        """
        Returns the name of the wishlist owner and, if available,
        the address of its profile picture.

        >>> info = person.basic_info()
        """
        # wishlists are supposed to show a first name, so it's safe to assume it will never be null
        ret = []
        for name in self.page.xpath("//td[@id='profile-name-Field']//text()"):
            ret.append(helpers._stripper(name))

        photo = self.page.xpath("//div[@id='profile']/div/img/@src")
        if photo:
            filename = photo[0].split('.')
            filename = '.'.join(filename[:-2]) + '.' + filename[-1]
            ret.append(helpers._stripper(filename))

        return ret

Ejemplo n.º 4

0

Mostrar archivo

Archivo: wishlist.py Proyecto: tonylaw7/Amazon-Wishlist

    def titles(self):
        """
        Returns items titles, even if they are pretty long
        ones (like academic books or journals).

        >>> titles = wishlist.titles()
        """
        ret = []
        for title in self.page.xpath("//div[@class='pTitle']/strong//text()"):
            ret.append(helpers._stripper(title))
        return ret

Ejemplo n.º 5

0

Mostrar archivo

Archivo: wishlist.py Proyecto: tonylaw7/Amazon-Wishlist

    def covers(self):
        """Returns the addresses of items pictures (e.g. book covers, albums pictures).

        >>> covers = wishlist.covers()
        """
        ret = []
        for filename in self.page.xpath(
                "//div/form/table/tbody[*]/tr[*]/td[*]/div[@class='pImage']/img/@src"
        ):
            filename = filename.split('.')
            filename = '.'.join(filename[:-2]) + '.' + filename[-1]
            ret.append(helpers._stripper(filename))
        return ret

Ejemplo n.º 6

0

Mostrar archivo

Archivo: wishlist.py Proyecto: tonylaw7/Amazon-Wishlist

    def via(self):
        """
        Returns the sorted original web pages from which the wished item was
        pulled, only for Universal items not sold by Amazon directly.

        >>> via = wishlist.via()
        """
        ret = []
        for url in self.page.xpath(
                "//div/form/table/tbody[*]/tr[*]/td[*]/strong[2]//text()"):
            ret.append(helpers._stripper(url))
        ret = sorted(list(set(ret)))
        return ret

Ejemplo n.º 7

0

Mostrar archivo

Archivo: wishlist.py Proyecto: tonylaw7/Amazon-Wishlist

    def ideas(self):
        """Returns a list of ideas to shop for later, as reminders

        >>> ideas = wishlist.ideas()
        """
        ret = []
        ideas = [
            ur'Idea', ur'Idee', ur'Id\xc3\xa9e', ur'\xe8\xa7\x82\xe5\xbf\xb5',
            ur'Id\xc3\xa9ia'
        ]
        for row in zip(self.titles(), self.prices()):
            if row[1] in ideas:
                ret.append(helpers._stripper(row[0]))
        return ret

Ejemplo n.º 8

0

Mostrar archivo

Archivo: search.py Proyecto: tonylaw7/Amazon-Wishlist

    def list(self):
        """
        Returns a list with tuples containing all matching usernames
        and their main wishlist ID, with which you can get secondary
        lists via the Wishlist() class.

        >>> wishlists = search.list()
        >>> for row in wishlists:
        >>>     print row
        """
        # before pipe, page with usernames; after, single exact matches
        wishlists = self.page.xpath(
            "//td/span/a//@href | //div[@id='sortbarDisplay']/form//@action")
        names = self.page.xpath(
            "//td/span/a//text() | //h1[@class='visitor']//text()")
        names = [helpers._stripper(n) for n in names]

        codes = []
        for code in wishlists:
            codes.append(helpers._stripper(code.split('/')[3]))
        # FIXME: hack not to return empty search results,
        # whose only anchor text is not english
        if not 'tg' in codes:
            return zip(names, codes)

Ejemplo n.º 9

0

Mostrar archivo

Archivo: wishlist.py Proyecto: tonylaw7/Amazon-Wishlist

    def urls(self):
        """Returns the page address of a given item in the wishlist, with its full details.

        >>> urls = wishlist.urls()
        """
        ret = []
        for url in self.page.xpath("//tbody[@class='itemWrapper']//@name"):
            if 'item' in url:
                code = url.split('.')[3]
                if code:
                    res = 'http://www.amazon' + self.domain + '/dp/' + code
                else:
                    res = ''
                ret.append(helpers._stripper(res))
        return ret

Ejemplo n.º 10

0

Mostrar archivo

Archivo: wishlist.py Proyecto: tonylaw7/Amazon-Wishlist

    def prices(self):
        """Returns the price tags for every item in a wishlist.

        >>> prices = wishlist.prices()
        """
        prices = self.page.xpath(
            "//td[@class='pPrice'][not(text()) and not(strong)] | //td[@class='pPrice']/strong[3] | //td[@class='pPrice']/strong[1] | //td[@class='Price']/span/strong//text()"
        )

        # cleanups, every store has different price tag extras
        if 'EUR' in self.currency:
            dust = 'EUR'
        elif 'CDN' in self.currency:
            dust = 'CDN%s' % self.symbol
        elif 'INR' in self.currency:
            dust = 'Rs. '
        elif 'CNY' in self.currency:
            dust = u'\xa5'
        elif 'BRL' in self.currency:
            dust = 'R%s ' % self.symbol
        elif 'JPY' in self.currency:
            dust = u'\x81\x8f'
        else:
            dust = self.symbol

        ret = []
        for price in prices:
            res = helpers.tostring(price,
                                   encoding='unicode',
                                   method='text',
                                   pretty_print=True).strip()
            if 'At' not in res:
                # TODO: how would it work out for non-english stores? quite a huge bug ahead...
                if 'Click' in res:
                    res = ''
                if 'EUR' in self.currency or 'BRL' in self.currency:
                    res = res.replace(dust, '')
                    res = res.replace('.', '')
                    res = res.replace(',', '.')
                else:
                    res = res.replace(dust, '')
                    res = res.replace(',', '')
                ret.append(helpers._stripper(res))
        return ret