コード例 #1
0
 def extract_product(self):
     url_prefix = "https://www.ceneo.pl"
     url_postfix = "#tab=reviews"
     url = url_prefix + "/" + self.product_id + url_postfix
     page_respons = requests.get(url)
     page_tree = BeautifulSoup(page_respons.text, 'html.parser')
     self.name = extract_element(page_tree, "h1", "product-name")
     try:
         opinions_count = int(
             extract_element(page_tree, "a", "product-reviews-link",
                             "span"))
     except AttributeError:
         opinions_count = 0
     if opinions_count > 0:
         while url:
             page_respons = requests.get(url)
             page_tree = BeautifulSoup(page_respons.text, 'html.parser')
             opinions = page_tree.find_all("div", "js_product-review")
             for opinion in opinions:
                 op = Opinion()
                 op.extract_opinion(opinion)
                 op.transform_opinion()
                 self.opinions.append(op)
             try:
                 url = url_prefix + page_tree.find(
                     "a", "pagination__next")["href"]
             except TypeError:
                 url = None
コード例 #2
0
    def extract_product(self):
        url_prefix = "https://www.ceneo.pl"
        url_postfix = "#tab=reviews"
        url = url_prefix + "/" + self.product_id + url_postfix
        page_respons = requests.get(url)
        page_tree = BeautifulSoup(page_respons.text, 'html.parser')
        self.name = extract_element(page_tree, "h1", "product-name")
        try:
            opinions_count = int(
                extract_element(page_tree, "a", "product-reviews-link",
                                "span"))
        except AttributeError:
            opinions_count = 0
        if opinions_count > 0:
            while url:
                #pobranie kodu html strony z podanego URL
                page_respons = requests.get(url)
                page_tree = BeautifulSoup(page_respons.text, 'html.parser')

                #wydobycie z kodu HTML strony fragmentów odpowiadających poszczególnym opisom
                opinions = page_tree.find_all("div", "js_product-review")

                #wydobycie składowych dla pojedynczych opinii
                for opinion in opinions:
                    op = Opinion()
                    op.extract_opinion(opinion)
                    op.transform_opinion
                    self.opinions.append(op)

                    try:
                        url = url_prefix + page_tree.find(
                            "a", "pagination__next")["href"]
                    except TypeError:
                        url = None
コード例 #3
0
ファイル: models.py プロジェクト: twarogm/CeneoScraper
    def extract_product(self):
        url_prefix = 'https://www.ceneo.pl'
        url_postfix = '#tab=reviews'
        url = url_prefix + '/' + self.product_id + url_postfix
        page_respons = requests.get(url)
        page_tree = BeautifulSoup(page_respons.text, 'html.parser')
        self.name = extract_element(page_tree, 'h1', 'product-name')
        try:
            opinions_count = int(
                extract_element(page_tree, 'a', 'product-reviews-link',
                                'span'))
        except AttributeError:
            opinions_count = 0
        if opinions_count > 0:
            while url:
                #pobranie kodu html strony z podanego URL
                page_respons = requests.get(url)
                page_tree = BeautifulSoup(page_respons.text, 'html.parser')

                #wydobycie z kodu HTML strony fragmentów odpowiadających poszczególnym opiniom
                opinions = page_tree.find_all('div', 'js_product-review')

                #wydobycie składowych dla pojedyńczej opinii
                for opinion in opinions:
                    op = Opinion()
                    op.extract_opinion(opinion)
                    op.transform_opinion
                    self.opinions.append(op)
                try:
                    url = url_prefix + page_tree.find(
                        'a', 'pagination__next')['href']
                except TypeError:
                    url = None
コード例 #4
0
    def extract_opinion(self, opinion):
        for key, args in self.tags.items():
            setattr(self, key, extract_element(opinion, *args))

        self.opinion_id = int(opinion["data-entry-id"])
        try:
            self.pros = ", ".join(
                pros.get_text().strip() for pros in opinion.find(
                    "div", "review-feature__title--positives"
                ).find_next_siblings("div", "review-feature__item"))
        except AttributeError:
            self.pros = None
        try:
            self.cons = ", ".join(
                cons.get_text().strip() for cons in opinion.find(
                    "div", "review-feature__title--negatives"
                ).find_next_siblings("div", "review-feature__item"))
        except AttributeError:
            self.cons = None

        dates = opinion.find("span", "user-post__published").find_all("time")
        self.review_date = dates.pop(0)["datetime"]
        try:
            self.purchase_date = dates.pop(0)["datetime"]
        except IndexError:
            self.purchase_date = None
コード例 #5
0
ファイル: models.py プロジェクト: twarogm/CeneoScraper
 def extract_opinion(self, opinion):
     for key, args in self.tags.items():
         setattr(self, key, extract_element(opinion, *args))
     self.opinion_id = int(opinion['data-entry-id'])
     try:
         self.pros = ', '.join(
             pros.get_text().strip() for pros in opinion.find(
                 'div', 'review-feature__title--positives'
             ).find_next_siblings('div', 'review-feature__item'))
     except AttributeError:
         self.pros = None
     try:
         self.cons = ', '.join(
             cons.get_text().strip() for cons in opinion.find(
                 'div', 'review-feature__title--negatives'
             ).find_next_siblings('div', 'review-feature__item'))
     except AttributeError:
         self.cons = None
     dates = opinion.find('span', 'user-post__published').find_all('time')
     self.review_date = dates.pop(0)['datetime']
     try:
         self.purchase_date = dates.pop(0)['datetime']
     except IndexError:
         self.purchase_date = None
コード例 #6
0
 def extract_opinion(self, opinion):
     for key, args in self.selectors.items():
         setattr(self, key, extract_element(opinion, *args))
     self.opinion_id = opinion["data-entry-id"]
     return self