コード例 #1
0
    def parseData(self, content, url):
        listMobile = []
        listProduct = content.find('ul', attrs={'class': 'homeproduct'})
        temp = listProduct.findAll('li')
        allProducts = [x.find('a', href=True) for x in temp]
        if len(allProducts) == 0:
            raise NoProductFoundException
        for a in allProducts:
            try:
                image_html = ScrapEngine.hideInvalidTag(
                    a.find('img'), ['strike'])
                name_html = ScrapEngine.hideInvalidTag(a.find('h3'),
                                                       ['strike'])
                price_html = ScrapEngine.hideInvalidTag(
                    a.find('div', attrs={'class': 'price'}),
                    ['strike', 'span'])
                image_src = "NA"
                if 'src' in image_html.attrs:
                    image_src = image_html['src']
                elif 'data-original' in image_html.attrs:
                    image_src = image_html['data-original']
                name = ScrapEngine.processString(name_html.getText(),
                                                 self.ignoreTerm)
                name_idx = name.find(" ")

                price = ScrapEngine.processString(price_html.getText(),
                                                  self.ignoreTerm)
                href = "n.a"
                href = urljoin(url, a['href'])
                try:
                    listMobile.append(
                        PhoneData(brand=name,
                                  model="",
                                  price=price,
                                  vendor="thegioididong",
                                  info={
                                      "url": href,
                                      "img": image_src
                                  }))
                except PhoneDataInvalidException as error:
                    print("Unable to parse: " + name + ": " + price +
                          ". Error:" + str(error))
                    pass
            except Exception as e:
                print("Error: " + str(e))
                pass
        print("Done with: " + url)
        print("Found {} items".format(str(len(listMobile))))
        return listMobile
コード例 #2
0
    def parseData(self, content, url):
        listMobile = []
        listProduct = content.find('div', attrs={'class': 'product-list'})
        allProducts = listProduct.findAll('div', attrs={'class': 'list-item'})
        if len(allProducts) == 0:
            raise NoProductFoundException
        for a in allProducts:
            image_html = ScrapEngine.hideInvalidTag(a.find('img'), ['strike'])
            name_html = ScrapEngine.hideInvalidTag(
                a.find('div', attrs={'class': 'product-name'}), ['strike'])
            price_html = ScrapEngine.hideInvalidTag(
                a.find('div', attrs={'class': 'product-price'}), ['strike'])
            try:
                image_src = image_html['src']
                name = ScrapEngine.processString(name_html.getText(),
                                                 self.ignoreTerm)
                name_idx = name.find(" ")

                price = ScrapEngine.processString(price_html.getText(),
                                                  self.ignoreTerm)
                href = "n.a"
                temp = name_html.find('a', href=True)
                href = urljoin(url, temp['href'])
                try:
                    listMobile.append(
                        PhoneData(brand=name,
                                  model="",
                                  price=price,
                                  vendor="hoanghaMobile",
                                  info={
                                      "url": href,
                                      "img": image_src
                                  }))
                except PhoneDataInvalidException as error:
                    print("Unable to parse: " + name + ": " + price +
                          ". Error:" + str(error))
                    pass
            except Exception as e:
                print("Error: " + str(e))
                pass
        print("Done with: " + url)
        return listMobile
コード例 #3
0
 def test_processString(self):
     test = "     hello world 213 #$%@*^)@    "
     ignore = ["!", "@", "#", "$", "%", "^", "&", "*", ")", "("]
     output = "hello world 213"
     self.assertEqual(output, ScrapEngine.processString(test, ignore))