def extract_product(self): next_page = "https://www.ceneo.pl/{}#tab=reviews".format( self.product_id) while next_page: respons = requests.get(next_page) page_dom = BeautifulSoup(respons.text, "html.parser") opinions = page_dom.select("div.js_product-review") for opinion in opinions: self.opinions.append( Opinion().extract_opinion(opinion).transform_opinion()) try: next_page = "https://www.ceneo.pl" + \ page_dom.select("a.pagination__next").pop()["href"] except IndexError: next_page = None print(next_page) if self.product_name == None: try: self.product_name = page_dom.find( "h1", class_= "product-top__product-info__name long-name js_product-h1-link js_product-force-scroll js_searchInGoogleTooltip default-cursor" ).text[1:] except AttributeError: self.product_name = "none" if self.product_name == "none": try: self.product_name = page_dom.find( "h1", class_= "product-top__product-info__name js_product-h1-link js_product-force-scroll js_searchInGoogleTooltip default-cursor" ).text[1:] except AttributeError: self.product_name = "none"
def importProduct(self): with open(f"app/products/{self.productId}.json", "r", encoding="UTF-8") as f: product = json.load(f) self.productName = product['productName'] opinions = product['opinions'] for opinion in opinions: self.opinions.append(Opinion(**opinion))
def read_from_json(self): print(self.product_id) with open(f"app/products/{self.product_id}.json", "r", encoding="UTF-8") as fp: prod = json.load(fp) fp.close() self.product_name = prod['product_name'] opinions = prod['opinions'] for opinion in opinions: self.opinions.append(Opinion(**opinion))
def extract_product(self): next_page = "https://www.ceneo.pl/{}#tab=reviews".format( self.product_id) while next_page: respons = requests.get(next_page) page_dom = BeautifulSoup(respons.text, "html.parser") opinions = page_dom.select("div.js_product-review") for opinion in opinions: self.opinions.append( Opinion().extract_opinion(opinion).transform_opinion()) try: next_page = "https://www.ceneo.pl" + \ page_dom.select("a.pagination__next").pop()["href"] except IndexError: next_page = None print(next_page)
def extractProduct(self): respons = requests.get("https://www.ceneo.pl/{}#tab=reviews".format( self.productId)) page = 2 while respons: pageDOM = BeautifulSoup(respons.text, 'html.parser') opinions = pageDOM.select("div.js_product-review") for opinion in opinions: self.opinions.append( Opinion().extractOpinion(opinion).transformOpinion()) respons = requests.get( "https://www.ceneo.pl/{}/opinie-".format(self.productId) + str(page), allow_redirects=False) if respons.status_code == 200: page += 1 else: break