def getUserReviews(self, user): logging.info( 'Get user reviews for %d id: %s:', user.gid, user.id) cookies=dict(session_id2='', p='', u='') url = 'https://www.goodreads.com/review/list/'+str(user.gid)+'-stephanie?order=d&per_page=100&shelf=read&sort=rating&utf8=✓&view=reviews' r = requests.get(url, cookies=cookies) if r.status_code != 200: logging.warning('Invalid request: %s' % r.status_code) return (None, False) else: soup = BeautifulSoup(r.content, 'html.parser') if(soup.find('div', attrs={'id': 'privateProfile'})): logging.warning('User %d is private', user.gid) return (None, False) divPagination=soup.find('div', attrs={'id':'reviewPagination'}) if(divPagination is None): reviews, books, authors=parseUserReviews(self, r.content, users_to_user(user)) return ({'reviews': reviews, 'books': books, 'authors': authors}, False) pages_a=divPagination.findAll('a') pages=[] for a in pages_a: class_a=a.get('class') if(class_a is None or(class_a[0]!='next_page' and class_a[0]!= 'previous_page')): pages.append({'num':int(a.text), 'link': a.get('href')}) all_reviews=[] all_books=[] all_authors=[] reviews, books, authors=parseUserReviews(self, r.content, users_to_user(user)) all_reviews.append(reviews) all_books.append(books) all_authors.append(authors) if(len(pages)>1): total_pages_count=pages[-1]['num']+1 if(total_pages_count>6): total_pages_count=6 for p in range(2,total_pages_count): reviews, books, authors= self.getPageReviews(users_to_user(user), p, total_pages_count) all_reviews.append(reviews) all_books.append(books) all_authors.append(authors) all_reviews = [val for sublist in all_reviews for val in sublist] all_books=[val for sublist in all_books for val in sublist] all_authors=[val for sublist in all_authors for val in sublist] logging.info( '\nFinish getting %d reviews %d books %d authors',len(all_reviews), len(all_books), len(all_authors)) return ({'reviews': all_reviews, 'books': all_books, 'authors': all_authors}, False)
def getPageReviews(self, user, page,total): logging.info('get review for user '+str(user.gid)+' page '+str(page)+' of '+str(total)) cookies=dict(session_id2='', p='', u='-6IY_') url = 'https://www.goodreads.com/review/list/'+str(user.gid)+'-stephanie?order=d&page='+str(page)+'&per_page=100&sort=rating&shelf=read&utf8=✓&view=reviews' try: r = requests.get(url, cookies=cookies) if r.status_code != 200: logging.warning('Invalid request: %s' % r.status_code) return ([],[], []) else: return parseUserReviews(self, r.content, user) except Exception as e: logging.error('Exception on page reviews') return ([],[], [])
def getPageReviews(self, user, page, total): logging.info('get review for user ' + str(user.gid) + ' page ' + str(page) + ' of ' + str(total)) cookies = dict(session_id2='', p='', u='-6IY_') url = 'https://www.goodreads.com/review/list/' + str( user.gid) + '-stephanie?order=d&page=' + str( page ) + '&per_page=100&sort=rating&shelf=read&utf8=✓&view=reviews' try: r = requests.get(url, cookies=cookies) if r.status_code != 200: logging.warning('Invalid request: %s' % r.status_code) return ([], [], []) else: return parseUserReviews(self, r.content, user) except Exception as e: logging.error('Exception on page reviews') return ([], [], [])
def getUserReviews(self, user): logging.info('Get user reviews for %d id: %s:', user.gid, user.id) cookies = dict(session_id2='', p='', u='') url = 'https://www.goodreads.com/review/list/' + str( user.gid ) + '-stephanie?order=d&per_page=100&shelf=read&sort=rating&utf8=✓&view=reviews' r = requests.get(url, cookies=cookies) if r.status_code != 200: logging.warning('Invalid request: %s' % r.status_code) return (None, False) else: soup = BeautifulSoup(r.content, 'html.parser') if (soup.find('div', attrs={'id': 'privateProfile'})): logging.warning('User %d is private', user.gid) return (None, False) divPagination = soup.find('div', attrs={'id': 'reviewPagination'}) if (divPagination is None): reviews, books, authors = parseUserReviews( self, r.content, users_to_user(user)) return ({ 'reviews': reviews, 'books': books, 'authors': authors }, False) pages_a = divPagination.findAll('a') pages = [] for a in pages_a: class_a = a.get('class') if (class_a is None or (class_a[0] != 'next_page' and class_a[0] != 'previous_page')): pages.append({'num': int(a.text), 'link': a.get('href')}) all_reviews = [] all_books = [] all_authors = [] reviews, books, authors = parseUserReviews(self, r.content, users_to_user(user)) all_reviews.append(reviews) all_books.append(books) all_authors.append(authors) if (len(pages) > 1): total_pages_count = pages[-1]['num'] + 1 if (total_pages_count > 6): total_pages_count = 6 for p in range(2, total_pages_count): reviews, books, authors = self.getPageReviews( users_to_user(user), p, total_pages_count) all_reviews.append(reviews) all_books.append(books) all_authors.append(authors) all_reviews = [val for sublist in all_reviews for val in sublist] all_books = [val for sublist in all_books for val in sublist] all_authors = [val for sublist in all_authors for val in sublist] logging.info('\nFinish getting %d reviews %d books %d authors', len(all_reviews), len(all_books), len(all_authors)) return ({ 'reviews': all_reviews, 'books': all_books, 'authors': all_authors }, False)