Example #1
0
    def parseData(self, content, url):
        listMobile = []
        listProduct = content.find('ul', attrs={'class': 'homeproduct'})
        temp = listProduct.findAll('li')
        allProducts = [x.find('a', href=True) for x in temp]
        if len(allProducts) == 0:
            raise NoProductFoundException
        for a in allProducts:
            try:
                image_html = ScrapEngine.hideInvalidTag(
                    a.find('img'), ['strike'])
                name_html = ScrapEngine.hideInvalidTag(a.find('h3'),
                                                       ['strike'])
                price_html = ScrapEngine.hideInvalidTag(
                    a.find('div', attrs={'class': 'price'}),
                    ['strike', 'span'])
                image_src = "NA"
                if 'src' in image_html.attrs:
                    image_src = image_html['src']
                elif 'data-original' in image_html.attrs:
                    image_src = image_html['data-original']
                name = ScrapEngine.processString(name_html.getText(),
                                                 self.ignoreTerm)
                name_idx = name.find(" ")

                price = ScrapEngine.processString(price_html.getText(),
                                                  self.ignoreTerm)
                href = "n.a"
                href = urljoin(url, a['href'])
                try:
                    listMobile.append(
                        PhoneData(brand=name,
                                  model="",
                                  price=price,
                                  vendor="thegioididong",
                                  info={
                                      "url": href,
                                      "img": image_src
                                  }))
                except PhoneDataInvalidException as error:
                    print("Unable to parse: " + name + ": " + price +
                          ". Error:" + str(error))
                    pass
            except Exception as e:
                print("Error: " + str(e))
                pass
        print("Done with: " + url)
        print("Found {} items".format(str(len(listMobile))))
        return listMobile
Example #2
0
    def parseData(self, content, url):
        listMobile = []
        listProduct = content.find('div', attrs={'class': 'product-list'})
        allProducts = listProduct.findAll('div', attrs={'class': 'list-item'})
        if len(allProducts) == 0:
            raise NoProductFoundException
        for a in allProducts:
            image_html = ScrapEngine.hideInvalidTag(a.find('img'), ['strike'])
            name_html = ScrapEngine.hideInvalidTag(
                a.find('div', attrs={'class': 'product-name'}), ['strike'])
            price_html = ScrapEngine.hideInvalidTag(
                a.find('div', attrs={'class': 'product-price'}), ['strike'])
            try:
                image_src = image_html['src']
                name = ScrapEngine.processString(name_html.getText(),
                                                 self.ignoreTerm)
                name_idx = name.find(" ")

                price = ScrapEngine.processString(price_html.getText(),
                                                  self.ignoreTerm)
                href = "n.a"
                temp = name_html.find('a', href=True)
                href = urljoin(url, temp['href'])
                try:
                    listMobile.append(
                        PhoneData(brand=name,
                                  model="",
                                  price=price,
                                  vendor="hoanghaMobile",
                                  info={
                                      "url": href,
                                      "img": image_src
                                  }))
                except PhoneDataInvalidException as error:
                    print("Unable to parse: " + name + ": " + price +
                          ". Error:" + str(error))
                    pass
            except Exception as e:
                print("Error: " + str(e))
                pass
        print("Done with: " + url)
        return listMobile
Example #3
0
    def test_hideTag(self):
        tag = ['a', 'li', 'strike']
        url = os.path.dirname(
            os.path.realpath(__file__)) + "/../testdata/testWebsite.html"
        content = open(url, encoding='utf8')
        soup = BeautifulSoup(content.read(), features="html.parser")
        result = ScrapEngine.hideInvalidTag(soup, tag)
        temp1 = result.findAll('a')
        temp2 = result.findAll('li')
        temp3 = result.findAll('div')
        self.assertEqual(0, len(temp1))
        self.assertEqual(0, len(temp2))
        self.assertNotEqual(0, len(temp3))

        temp = "<div class=\"product-price\"><strike>33.990.000 ₫</strike> 29.890.000 ₫</div>"
        soup = BeautifulSoup(temp, features="html.parser")
        temp1 = soup.findAll('strike')
        self.assertEqual(1, len(temp1))
        result = ScrapEngine.hideInvalidTag(soup, tag)
        temp1 = result.findAll('strike')
        self.assertEqual(0, len(temp1))