コード例 #1
0
    def parse_contents(self, response):
        item = ReviewrItem()

        url = response.url
        platform = 'Android'
        title = response.xpath('//h1[@class="title"]/text()').extract()[0]
        game_name = title.replace('Review', '').replace('review', '')
        author = response.xpath('//a[@rel="author"]/text()').extract()[0]
        score = float(
            response.xpath('//div[@class="score"]/text()').extract()[-1])

        raw_date = response.xpath(
            '//p[@class="date"]/span/text()').extract()[0]
        date = datetime.strptime(raw_date.strip(), '%b %d, %Y').date()

        conclusion_raw = response.xpath(
            '//div[@class="entry-content"]/p').extract()[-3:]
        encode_con = ''.join(conclusion_raw).encode('utf-8')
        soup_con = BeautifulSoup(encode_con, 'html.parser')
        conclusion = soup_con.get_text().strip()

        item['title'] = title
        item['date'] = date
        item['game'] = game_name
        item['platform'] = platform
        item['author'] = author
        item['url'] = url
        item['score_orig'] = score
        item['score_critic'] = score
        item['conclusion'] = conclusion

        print(item)
コード例 #2
0
    def parse_contents(self, response):
        item = ReviewrItem()

        date = response.xpath(
            '//meta[@name="sailthru.date"]/@content').extract()[0][:10]
        author = response.xpath('//a[@rel="author"]/text()').extract()[0]
        title = response.xpath(
            '//meta[@property="og:title"]/@content').extract()[0]
        name = title.replace(" Review", "")
        conclusion = response.xpath(
            '//meta[@property="og:description"]/@content').extract()[0]
        url = response.xpath(
            '//meta[@property="og:url"]/@content').extract()[0]
        platform = self.check_platform(url)
        score_raw = response.xpath(
            '//meta[@name="sailthru.tags"]/@content').extract()[0]
        score = score_raw.split()[3][:3].replace('-', '.')

        item['title'] = title
        item['date'] = date
        item['game'] = name
        item['platform'] = platform
        item['author'] = author
        item['url'] = url
        item['score_orig'] = score
        item['score_critic'] = score
        item['conclusion'] = conclusion

        print(item)
コード例 #3
0
	def parse_contents(self, response):
		item = ReviewrItem()
		
		score = float(response.xpath('//h3[@class="letter-grade"]/text()').extract()[0])
		url = response.url
		title = response.xpath('//h1/text()').extract()[0]
		game_name = response.xpath('//h3[@class="actor-name"]/text()').extract()[0]
		author = response.xpath('//a[@class="auth-name"]/text()').extract()[0].strip()
		date = response.xpath('//time/@datetime').extract()[0]

		raw_conclusion = response.xpath('//div[@class="details"]').extract()[0]
		soup_con = BeautifulSoup(raw_conclusion, 'html.parser')
		conclusion = soup_con.get_text().strip()

		raw_pl = ', '.join(response.xpath('//p[@class="speakable-content"]').extract()[:2])
		soup_pl = BeautifulSoup(raw_pl, 'html.parser')
		pl_string = soup_pl.get_text().strip().lower()
		find_pl = re.findall('platform(.*?)version reviewed', pl_string)
		if len(find_pl) == 0:
			find_pl = [pl_string]
		join_pl = ''.join(find_pl)
		platform = ###

		item['title'] = title
		item['date'] = date
		item['game'] = game_name
		item['platform'] = platform
		item['author'] = author
		item['url'] = url
		item['score_orig'] = score
		item['score_critic'] = score
		item['conclusion'] = conclusion
		
		print (item)
コード例 #4
0
    def parse_contents(self, response):
        item = ReviewrItem()

        url = response.url
        json_data = response.xpath(
            '//div/script[@type="application/ld+json"]/text()').extract()[1]
        load_data = json.loads(json_data)
        date = load_data['datePublished']
        conclusion = response.xpath(
            '//p[@class="s16 b c3 lh27 fftext mar_rl4"]/text()').extract()[0]
        game_name = response.xpath(
            '//div[@class="dtc vab oh"]/a/strong/text()').extract()[0]
        title = response.xpath('//title/text()').extract()[0].strip()
        author = response.xpath('//a[@rel="author"]/text()').extract()[0]

        raw_platform = response.xpath(
            '//h2[@class="s18 as14_600 n"]/text()').extract()[0]
        platform = self.check_platform(raw_platform)

        raw_score = response.xpath(
            '//div[@id="val_ana_3"]/div[2]/span/text()').extract()[0].strip()
        score = float(raw_score.replace(',', '.'))

        item['title'] = title
        item['date'] = date
        item['game'] = game_name
        item['platform'] = platform
        item['author'] = author
        item['url'] = url
        item['score_orig'] = score
        item['score_critic'] = score
        item['conclusion'] = conclusion

        print(item)
コード例 #5
0
    def parse_contents(self, response):
        item = ReviewrItem()

        json_data = response.xpath(
            '//script[@type="application/ld+json"]/text()').extract()[1]
        data = json.loads(json_data.replace('\n', ''))
        title = response.xpath(
            '//h2[@class="text-big text-dark text-with-subtitle"]/text()'
        ).extract()[0]
        url_raw = response.xpath(
            '//meta[@property="og:url"]/@content').extract()[0]
        url = url_raw
        date = data['datePublished']
        author = data['author']['name']
        score = data['reviewRating']['ratingValue']
        game_name = data['itemReviewed']['name']
        platform = data['itemReviewed']['operatingSystem']

        conclusion_raw = response.xpath(
            '//div[@itemprop="description"]').extract()[0]
        conclusion_encoded = ''.join(conclusion_raw).encode('utf-8')
        conclusion_soup = BeautifulSoup(conclusion_encoded, 'html.parser')
        conclusion = conclusion_soup.get_text().strip()

        item['title'] = title
        item['date'] = date
        item['game'] = game_name
        item['platform'] = platform
        item['author'] = author
        item['url'] = url
        item['score_orig'] = score
        item['score_critic'] = score
        item['conclusion'] = conclusion

        print(item)
コード例 #6
0
	def parse_contents(self, response):
		item = ReviewrItem()
		
		url = response.url
		title = response.xpath('//h2[@itemprop="name"]/text()').extract()[0]
		date = response.xpath('//meta[@itemprop="datePublished"]/@content').extract()[0][0:10]
		score = response.xpath('//span[@itemprop="ratingValue"]/text()').extract()[0]
		game_raw = title.encode('ascii', 'ignore').decode('utf8')
		game_name = game_raw.replace('Anlisis de ', '')

		author_raw = response.xpath('//span[@itemprop="author"]/span/text()').extract()
		if len(author_raw) == 0:
			author_raw = response.xpath('//span[@class="author"]/text()').extract()
		author = author_raw[0]

		platform_raw = response.xpath('//div[@class="header"]/p/a/text()').extract()
		platform = #function

		item['title'] = title
		item['date'] = date
		item['game'] = game_name
		item['platform'] = platform
		item['author'] = author
		item['url'] = url
		item['score_orig'] = score
		item['score_critic'] = score
		item['conclusion'] = None
		
		print (item)
コード例 #7
0
    def parse_contents(self, response):
        item = ReviewrItem()

        data = response.xpath(
            '//script[@type="application/ld+json"]/text()').extract()
        for info in data:
            if 'reviewRating' in info:
                load_info = json.loads(info)
                orig_score = float(load_info['reviewRating']['ratingValue'])
                score = orig_score * 2.0
                url = response.url
                title = response.xpath('//h1/text()').extract()[0].strip()
                game_name = title
                author = response.xpath(
                    '//div[@class="pageheader_byline"]/address/a/text()'
                ).extract()[0]
                date = response.xpath(
                    '//meta[@property="article:published_time"]/@content'
                ).extract()[0]

                raw_platform = response.xpath(
                    '//div[@class="categories_display"]/span/@tooltip'
                ).extract()
                platform = ', '.join(raw_platform)

                raw_conclusion = response.xpath(
                    '//div[@class="review_box our_verdict"]/p').extract()[0]
                soup_con = BeautifulSoup(raw_conclusion, 'html.parser')
                conclusion = soup_con.get_text().strip()

                item['title'] = title
                item['date'] = date
                item['game'] = game_name
                item['platform'] = platform
                item['author'] = author
                item['url'] = url
                item['score_orig'] = orig_score
                item['score_critic'] = score
                item['conclusion'] = conclusion
                item['pub_id'] = 188
                item['assign_to'] = 2
                item['content'] = None

                print(item)
コード例 #8
0
    def parse_contents(self, response):
        item = ReviewrItem()

        title = response.xpath('//h1/a/text()').extract()[0]
        author = response.xpath(
            '//div[@class="postedby"]/a/text()').extract()[0]
        game_name = title.replace('review', '').replace('Review', '')
        platform = 'ios'
        url = response.url

        conclusion_raw = response.xpath(
            '//div[@class="body clearfloat"]/p').extract()[-1]
        encode_conclusion = ''.join(conclusion_raw)
        soup_conclusion = BeautifulSoup(encode_conclusion, 'html.parser')
        conclusion = soup_conclusion.get_text().strip()

        date_raw = response.xpath(
            '//div[@class="postedby"]/text()').extract()[-1]
        date = self.change_date(date_raw)

        score_raw = response.xpath(
            '//span[@class="rating"]/img/@src').extract()
        join_score = ''.join(score_raw)
        star = float(join_score.count('/star.png'))
        half = float(join_score.count('/halfstar.png') / 2.0)
        orig_score = star + half
        score = (star + half) * 2.0

        item['title'] = title
        item['date'] = date
        item['game'] = game_name
        item['platform'] = platform
        item['author'] = author
        item['url'] = url
        item['score_orig'] = orig_score
        item['score_critic'] = score
        item['conclusion'] = conclusion

        print(item)
コード例 #9
0
	def parse_contents(self, response):
		item = ReviewrItem()
		
		platform = 'android'
		url = response.url
		title = response.xpath('//h1/text()').extract()[0]
		game_name = title
		author = response.xpath('//div[@class="td-post-author-name"]/a/text()').extract()[0]
		date = response.xpath('//span[@class="td-post-date"]/time/@datetime').extract()[0]
		conclusion = response.xpath('//div[@class="td-review-summary-content"]/text()').extract()[0]
		orig_score = float(response.xpath('//div[@class="td-review-final-score"]/text()').extract()[0])
		score = orig_score * 2.0

		item['title'] = title
		item['date'] = date
		item['game'] = game_name
		item['platform'] = platform
		item['author'] = author
		item['url'] = url
		item['score_orig'] = orig_score
		item['score_critic'] = score
		item['conclusion'] = conclusion
		
		print (item)
コード例 #10
0
    def parse_contents(self, response):
        item = ReviewrItem()

        title = response.xpath('//title/text()').extract()[0]
        author = response.xpath(
            '//a[@class="underline aa_text--bold"]/text()').extract()[0]
        date_raw = response.xpath(
            '//div[@class="aa_text--center aa_opacity--05 aa_margin-t--5"]/text()'
        ).extract()[0]
        date = datetime.strptime(date_raw, '%B %d, %Y')

        score = response.xpath(
            '//div[@class="r_c_rt_t_a"]/text()').extract()[0]
        game_name = response.xpath(
            '//div[@class="r_c_rf-app_img aa_position--relative"]/img/@alt'
        ).extract()[-1]
        url = response.url
        platform = 'ios'

        conclusion_raw = response.xpath(
            '//section[@id="top"]/div[5]/div[2]/div/p').extract()
        encode_con = ''.join(conclusion_raw).encode('utf-8')
        soup_con = BeautifulSoup(encode_con, 'html.parser')
        conclusion = soup_con.get_text().strip()

        item['title'] = title
        item['date'] = date
        item['game'] = game_name
        item['platform'] = platform
        item['author'] = author
        item['url'] = url
        item['score_orig'] = score
        item['score_critic'] = score
        item['conclusion'] = conclusion

        print(item)