Python TAutil Examples

Programming Language: Python

Class/Type: TAutil

Examples at hotexamples.com: 13

Python TAutil - 13 examples found. These are the top rated real world Python examples of TAutil extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

strip_comma(5)

next_page(2)

open_page(2)

process_text(2)

wrap_tripadvisor(2)

fetch_ajax_title_content(1)

Example #1

Show file

File: ReviewParser.py Project: davduran/TripAdvisorCrawler

	def badges(self):
		"""
			totalReviewBadge contains title and counts
			helpfulVotes contains the number of helpful votes
		"""
		try: 
			totalReviewBadge = self.reviewer_tag.find('div', class_='totalReviewBadge')

			try: 
				reviewerTitle = str(totalReviewBadge.find('div', class_='reviewerTitle').string)
			except AttributeError:
				reviewerTitle = ''

			counts = totalReviewBadge.find_all('span', class_='badgeText')
			try:
				review_count = TAutil.strip_comma(counts[0].string)
			except (AttributeError, IndexError):
				review_count = -1

			try:
				hotel_review_count = TAutil.strip_comma(counts[1].string)
			except (AttributeError, IndexError):
				hotel_review_count = -1
		except AttributeError:
			reviewerTitle = ''
			review_count = -1
			hotel_review_count = -1

		try:
			helpfulVotes = str(self.reviewer_tag.select('div.helpfulVotesBadge span.badgeText')[0].string)
			helpful_count = TAutil.strip_comma(helpfulVotes)
		except (AttributeError, IndexError):
			helpful_count = -1
		return reviewerTitle, review_count, hotel_review_count, helpful_count

Example #2

Show file

File: HotelParser.py Project: davduran/TripAdvisorCrawler

    def parse_init(self, hotel_url, is_hotel_page):
        """
        select review parts of hotel html.
        click the first review title to expand the the review entry
        """
        review_strainer = SoupStrainer(id='REVIEWS')
        self.hotel_html = TAutil.open_page(hotel_url)
        self.hotel_ori_url = hotel_url
        self.soup = BeautifulSoup(self.hotel_html, parse_only = review_strainer)

        # open the first review
        if is_hotel_page:
            first_review = TAutil.wrap_tripadvisor(str(self.soup.find('div', class_='quote').a['href']))
            hotel_html = TAutil.open_page(first_review)
            self.soup = BeautifulSoup(hotel_html, parse_only = review_strainer)

Example #3

Show file

File: CityParser.py Project: davduran/TripAdvisorCrawler

	def parse_init(self, city_page_url):
		"""
		initialize the soup
		"""
		city_html = TAutil.open_page(city_page_url)
		strainer = SoupStrainer(id="ACCOM_OVERVIEW")
		self.soup = BeautifulSoup(city_html, parse_only = strainer)

Example #4

Show file

File: ReviewParser.py Project: davduran/TripAdvisorCrawler

	def review_content(self):
		if self.tc:
			text = self.tc['body']
		else:
			text = self.reviewer_tag.find('div', class_='entry').p.strings
			text = ' '.join(text)
		return TAutil.process_text(text)

Example #5

Show file

File: CityParser.py Project: davduran/TripAdvisorCrawler

	def large_reviews(self, hotel):
		try:
			temp_str = str(hotel.find('span', class_='more').a.string).strip()
			number_reviews = TAutil.strip_comma(temp_str)
		except AttributeError as e:
			return False
		if number_reviews < self.min_reviews:
			return False
		else:
			return True

Example #6

Show file

File: HotelParser.py Project: davduran/TripAdvisorCrawler

 def hotel_reviews(self):
     if not self.reviews:
         self.reviews = list()   
         while True:
             # test whether the page need ajax request
             temp_reviews = self.get_review()
             self.reviews.extend(temp_reviews)
             try:
                 next_page = TAutil.next_page(self.soup)
             except TypeError:
                 break
             self.parse_init(next_page, False)
     return self.reviews

Example #7

Show file

File: CityParser.py Project: davduran/TripAdvisorCrawler

	def hotel_urls(self):
		if not self.urls:
			self.urls = list()	
			while True:
				hotels = self.soup.find_all(id=re.compile(r'hotel_\d*'))
				temp_urls = [self.find_hotel_url(x) for x in hotels if self.large_reviews(x)]
				self.urls.extend(temp_urls)
				try:
					next_page =	TAutil.next_page(self.soup)
				except TypeError:
					break
				self.parse_init(next_page)
		return self.urls

Example #8

Show file

File: HotelParser.py Project: davduran/TripAdvisorCrawler

    def get_review(self):
        temp_reviews = self.soup.find_all(class_='review')

        title_content = []
        if self.check_ajax():
            print(self.soup.find_all('div', class_='quote'))

            review_ids = [ReviewParser.ReviewParser(x).review_id for x in temp_reviews]
            print(review_ids)

            tc = TAutil.fetch_ajax_title_content(review_ids)
            return list(zip(temp_reviews, tc))
        else:
            return list(zip(temp_reviews, [None]*len(temp_reviews)))

Example #9

Show file

File: HotelParser.py Project: davduran/TripAdvisorCrawler

 def trip_type(self):
     trip_type_s = self.soup.select('div.trip_type div.value')
     trip_type = [TAutil.strip_comma(x.string) for x in trip_type_s]
     if not trip_type:
         trip_type = [-1]*4
     return trip_type

Example #10

Show file

File: HotelParser.py Project: davduran/TripAdvisorCrawler

 def rating_count(self):
     rating_number_s = self.soup.select('div.col2of2.composite span.compositeCount')
     rating_number = [TAutil.strip_comma(x.string) for x in rating_number_s]
     return rating_number

Example #11

Show file

File: CityParser.py Project: davduran/TripAdvisorCrawler

	def find_hotel_url(self, hotel):
		return TAutil.wrap_tripadvisor(str(hotel.find(class_='property_title')['href']))

Example #12

Show file

File: ReviewParser.py Project: davduran/TripAdvisorCrawler

	def review_title(self):
		if self.tc:
			text = self.tc['name']
		else:
			text = str(self.reviewer_tag.find('div', class_='quote').string)
		return TAutil.process_text(text)

Example #13

Show file

File: ReviewParser.py Project: davduran/TripAdvisorCrawler

	def numHlp(self):
		try:
			return TAutil.strip_comma(self.reviewer_tag.find('span', class_='numHlpIn').string)
		except AttributeError:
			return 0