Example #1
0
    def parse_product(self, response):
        soup = BeautifulSoup(response.body, 'lxml')

        p = Product()

        for element, path in self.selectors.viewitems():
            node = soup.select_one(path)

            if not node:
                continue
            if element == 'image':
                p[element] = url_fix(urljoin(response.url, node['src']))
            else:
                p[element] = text(node)

        if 'name' in p and 'number' in p:
            p['url'] = response.url
            p['pricing'], p['discountcode'] = get_prices(soup)
            soup.decompose()
            yield p
        else:
            # Only follow links on non-product pages
            soup.decompose()
            for link in self.link_extractor.extract_links(response):
                yield Request(url=link.url)
Example #2
0
def get_discount_code(tags, depth=3):
    """Find a discount code by expanding the search outwards."""
    for tag in tags:
        matches = discount_re.search(text(tag))

        if matches:
            return matches.group(1).encode('ascii', errors='ignore')
        elif tag.parent and depth:
            code = get_discount_code([tag.parent], depth - 1)
            if code:
                return code.encode('ascii', errors='ignore')
Example #3
0
def extract_price_info(soup):
    # Find price tables by searching upwards from price cells
    price_cells = soup.find_all(row_price)
    transpose = False

    if not price_cells:
        price_cells = soup.find_all(column_price)
        transpose = True

    dom_tables = [cell.find_parent('table') for cell in price_cells]

    # Remove duplicates while preserving order
    dom_tables = [table for table, _ in groupby(dom_tables)]

    # Convert dom tables to matrices and transpose if necessary
    matrices = [[[text(c) for c in r] for r in t('tr')] for t in dom_tables]

    if transpose:
        matrices = [list(izip_longest(*m)) for m in matrices]

    discount_code = get_discount_code(dom_tables)
    decompose_all(dom_tables)
    return (matrices, discount_code)
Example #4
0
def quant_str(string):
    matches = quantity_re.search(text(string))
    if matches:
        return re.sub('[,\+]', '', matches.group(1).encode('ascii', errors='ignore'))
Example #5
0
def price_str(string):
    matches = price_re.search(text(string))
    if matches:
        return matches.group(1).encode('ascii', errors='ignore')