def collectCountryBrewers(url_base, countryurl, countryname, proxies): brewers = OrderedDict() try: req = requests.get(countryurl, proxies=proxies) except Exception as err: print('Error (BeerBrewers) request') print(' ' * 4, countryurl.encode()) print(' ' * 4, err) else: try: soup = BeautifulSoup(req.content, LXML) except Exception as err: print('Error (BeerBrewers) soup:', countryurl.encode(), err) else: for a in soup.find_all( TAG_A, href=re.compile('/brewers/[\W\w\d]+/[\d]+/')): try: url_ = url_base + a[ATTRIB_HREF] name = unicode2ascii(a.text) except Exception as err: print('Error (BeerBrewers) iterator name:', err) continue else: try: row = [name] + [ unicode2ascii(td.text) for td in a.parent.parent ] except Exception as err: print('Error (BeerBrewers) iterator row:', err) row = [ name, ] + [str()] * 5 finally: brewers[url_] = tuple(row) return countryurl, countryname, brewers
def collectBeerData(beerurl, beername, image_basepath, proxies): beerData = OrderedDict() beerData[BeerGiumGrabber.STR_SHORT_DESCRIPTION] = str() beerData[BeerGiumGrabber.STR_MORE_INFO] = str() beerData[BeerGiumGrabber.STR_TABLE_INFO] = [] beerData[BeerGiumGrabber.STR_BEER_IMAGES] = [] try: req = requests.get(beerurl, proxies=proxies) except Exception as err: print('Error (BeerPages) request:') print(' '*4, beerurl.encode()) print(' '*4, err) else: try: soup = BeautifulSoup(req.content, LXML) except Exception as err: print('Error (BeerPages) soup:') print(' '*4, beerurl.encode()) print(' '*4, err) else: images = [] for a in soup.find_all(TAG_A, {'class': 'jqzoom'}): basename = os.path.basename(a[ATTRIB_HREF]) image_path = os.path.join(image_basepath, basename) images.append((a[ATTRIB_HREF], image_path)) for div in soup.find_all(TAG_DIV, {'id': 'short_description_content'}): beerData[BeerGiumGrabber.STR_SHORT_DESCRIPTION] = unicode2ascii(div.text) break for div in soup.find_all(TAG_DIV, {'id': 'more_info_sheets'}): beerData[BeerGiumGrabber.STR_MORE_INFO] = unicode2ascii(div.text) break for table in soup.find_all(TAG_TABLE, {'class': 'table-data-sheet table-bordered'}): for tr in table: r = [unicode2ascii(td.text) for td in tr] beerData[BeerGiumGrabber.STR_TABLE_INFO].append(r) break for image_url, image_path in images: if not os.path.exists(image_path): try: req = requests.get(image_url, proxies=proxies) except Exception as err: print('Error (BeerPages) request:') print(' '*4, image_url.encode()) print(' '*4, err) else: if len(req.content) > 0: try: open(image_path, ReadWriteFile.WRITE_BINARY_NEW).write(req.content) except Exception as err: print('Error (BeerPages) BeerImage save:') print(' '*4, err) else: beerData[BeerGiumGrabber.STR_BEER_IMAGES].append((image_url, image_path)) return (beerurl, beername, beerData)
def beerRows(self, print_=False): FIELDS = ['name', 'type'] ROWS = [] INFO = [('Accessory:', str()), ('Alcohol:', str()), ('Awards:', str()), ('Brewery:', str()), ('Color:', str()), ('Country:', str()), ('Hop:', str()), ('IBU:', str()), ('Malt:', str()), ('Plato:', str()), ('Rating:', str()), ('Recipes:', str()), ('Served:', str()), ('Type:', str())] FIELDS.extend([v for v, _ in INFO]) FIELDS.append('Description') if print_: print(chr(9).join(FIELDS)) FIELDS.append(self.STR_BEER_IMAGES) for _, (beer_type, beers) in self.beerTypes.items(): for beer_url, beer_name in beers.items(): row = [beer_name, beer_type] beerData = self.beerData[(beer_url, beer_name)] info = beerData[self.STR_TABLE_INFO] for i, r in enumerate(info): if i == 0: _, *r = r d = OrderedDict(INFO) ir = len(r) for iv, v in enumerate(r, 1): if v.endswith(':'): if iv < ir: if (not r[iv].endswith(':')): d[v] = unicode2ascii(r[iv]) row.extend(d.values()) elif i == 1: row.append(unicode2ascii(chr(10).join(r))) else: print('pass row:', r) if len(info) == 0: d = OrderedDict(INFO) row.extend(d.values()) row.append(str()) elif len(info) == 1: row.append(str()) if print_: print(chr(9).join(map(str, row))) images = [ image_path for _, image_path in beerData[self.STR_BEER_IMAGES] ] if not images: print(beer_name, beer_type, beer_url) row.append(images) ROWS.append(row) return FIELDS, ROWS
def runBeers(self): if (not self.update_all) and os.path.exists(self.beerPath): with open(self.beerPath, ReadWriteFile.READ_BINARY) as fh: self.beerTypes.update(pickle.load(fh)) self.beerPages.update(pickle.load(fh)) print('Beers:', len(self.beerPages)) return if (not self.update_all): return try: req = requests.get(self._url_beer_types, proxies=self.proxies) except Exception as err: print('Error (Beers) request') print(' ' * 4, self._url_beer_types.encode()) print(' ' * 4, err) else: try: soup = BeautifulSoup(req.content, LXML) except Exception as err: print('Error (Beers) soup:', err) else: for a in soup.find_all(TAG_A, {'class': 'footer'}): name = unicode2ascii(a.text) url = chr(47).join([self._url_base, a[ATTRIB_HREF]]) self.beerTypes[url] = [name, OrderedDict()] for type_url, (name, beers) in self.beerTypes.items(): try: req = requests.get(type_url, proxies=self.proxies) except Exception as err: print('Error (Beers) request') print(' ' * 4, type_url.encode()) print(' ' * 4, err) else: try: soup = BeautifulSoup(req.content, LXML) except Exception as err: print('Error (Beers) soup:', err) else: for a in soup.find_all(TAG_A, {'class': 'beer'}): beer_name = unicode2ascii(a.text) beer_url = chr(47).join( [self._url_base, a[ATTRIB_HREF]]) if 'UBID=' in beer_url: beers[beer_url] = beer_name self.beerPages[beer_url] = beer_name print('Beers:', len(self.beerPages)) with open(self.beerPath, ReadWriteFile.WRITE_BINARY_NEW) as fh: pickle.dump(self.beerTypes, fh) pickle.dump(self.beerPages, fh)
def runCountries(self): if (not self.update_all) and os.path.exists(self.countryPath): with open(self.countryPath, ReadWriteFile.READ_BINARY) as fh: self.countryPages.extend(pickle.load(fh)) print('Countries:', len(self.countryPages)) return if (not self.update_all): return try: req = requests.get(self._url_country, proxies=self.proxies) except Exception as err: print('Error (Countries) request') print(' ' * 4, self._url_country.encode()) print(' ' * 4, err) else: try: soup = BeautifulSoup(req.content, LXML) except Exception as err: print('Error (Countries) soup:', err) else: for a in soup.find_all( TAG_A, href=re.compile('/place/directory/0/[\W\w\d]+/')): name = unicode2ascii(a.text) url = self._url_base + a[ATTRIB_HREF] num, = re.findall('(\d+)', name) name = name.replace('(%s)' % num, str()) name = name.strip() self.countryPages.append((url, name, int(num))) print('Countries:', len(self.countryPages)) with open(self.countryPath, ReadWriteFile.WRITE_BINARY_NEW) as fh: pickle.dump(self.countryPages, fh)
def collectBeers(url_base, breweryurl, breweryname, proxies): beerUrls = OrderedDict() brewerData = OrderedDict() url = breweryurl + '?view=beers&show=all' try: req = requests.get(url, proxies=proxies) except Exception as err: print('Error (Beers) request:') print(' ' * 4, breweryurl.encode()) print(' ' * 4, err) else: try: soup = BeautifulSoup(req.content, LXML) except Exception as err: print('Error (Beers) soup:') print(' ' * 4, breweryurl.encode()) print(' ' * 4, err) else: for a in soup.find_all( TAG_A, href=re.compile('/beer/profile/[\d]+/[\d]+')): try: beer_url = url_base + a[ATTRIB_HREF] row = [unicode2ascii(td.text) for td in a.parent.parent] beer_name = row[0] except Exception as err: print('Error (Beers) profile:') print(' ' * 4, err) else: beerUrls[(beer_url, beer_name)] = row for b in soup.find_all(TAG_B): text = unicode2ascii(b.text) if text == 'BEER AVG': data = b.parent.text.lstrip(text).encode() brewerData[text] = data elif text == 'PLACE INFO': data = b.parent.text.lstrip(text).encode() brewerData[text] = data elif text == 'BEER STATS': data = b.parent.text.lstrip(text).encode() brewerData[text] = data return (breweryurl, breweryname, beerUrls, brewerData)
def collectBeers(url_base, breweryurl, breweryname, proxies): beerUrls = OrderedDict() soup = None req = None try: req = requests.get(breweryurl, proxies=proxies) except Exception as err: print('Error (BeerBrewers) request') print(' ' * 4, breweryurl.encode()) print(' ' * 4, err) else: try: soup = BeautifulSoup(req.content, LXML) except Exception as err: print('Error (BeerBrewers) soup:', breweryurl.encode(), err) else: for tr in soup.find_all(TAG_TR, {'valign': 'middle'}): try: beer_name = unicode2ascii(tr.td.a.text) beer_url = url_base + tr.td.a[ATTRIB_HREF] except Exception as err: print('Error (BeerBrewers) iterator beer name:', err) continue else: try: row = [ beer_name, ] + [ unicode2ascii(td) if isinstance(td, str) else unicode2ascii(td.text) for td in tr ][1:] except Exception as err: print('Error (BeerBrewers) iterator row:', err) row = [ beer_name, ] + [str()] * 5 finally: beerUrls[beer_url] = tuple(row) return (breweryurl, breweryname, beerUrls)
def runBelgiumBeers(self): if (not self.update_all) and os.path.exists(self.beerPath): with open(self.beerPath, ReadWriteFile.READ_BINARY) as fh: self.beerPages.update( pickle.load(fh) ) print('Beers:', len(self.beerPages)) return if (not self.update_all): return numPages = 0 try: req = requests.get(self._url_beer_belgium, proxies=self.proxies) except Exception as err: print('Error (Beers) request') print(' '*4, self._url_country.encode()) print(' '*4, err) else: try: soup = BeautifulSoup(req.content, LXML) except Exception as err: print('Error (Beers) soup:', err) else: pages = [] for a in soup.find_all(TAG_A, href=re.compile('/en/360-belgium\?p=[\d]+')): pages.append(int(a[ATTRIB_HREF].split(chr(61), 1)[1])) numPages = max(pages) for a in soup.find_all(TAG_A, {'class': 'product-name'}): name = unicode2ascii(a.text) self.beerPages[a[ATTRIB_HREF]] = [name, str(), str()] for p in a.parent.parent.find_all(TAG_P, {'class': 'pro_list_manufacturer'}): self.beerPages[a[ATTRIB_HREF]][1] = unicode2ascii(p.text) for span in a.parent.parent.find_all(TAG_SPAN, {'itemprop': 'price'}): self.beerPages[a[ATTRIB_HREF]][2] = unicode2ascii(span.text) print(numPages, len(self.beerPages)) for pageId in range(2, numPages+1): url = '{}/en/360-belgium?p={}'.format(self._url_base, pageId) try: req = requests.get(url, proxies=self.proxies) except Exception as err: print('Error (Beers) request') print(' '*4, self._url_country.encode()) print(' '*4, err) else: try: soup = BeautifulSoup(req.content, LXML) except Exception as err: print('Error (Beers) soup:', err) else: for a in soup.find_all(TAG_A, {'class': 'product-name'}): name = unicode2ascii(a.text) self.beerPages[a[ATTRIB_HREF]] = [name, str(), str()] for p in a.parent.parent.find_all(TAG_P, {'class': 'pro_list_manufacturer'}): self.beerPages[a[ATTRIB_HREF]][1] = unicode2ascii(p.text) for span in a.parent.parent.find_all(TAG_SPAN, {'itemprop': 'price'}): self.beerPages[a[ATTRIB_HREF]][2] = unicode2ascii(span.text) print('Beers:', len(self.beerPages)) with open(self.beerPath, ReadWriteFile.WRITE_BINARY_NEW) as fh: pickle.dump(self.beerPages, fh)
def runCountryBrewers(self): global TASK_FINISHED global TASK_TOTAL TASK_FINISHED = 0 TASK_TOTAL = 0 if os.path.exists(self.countryBreweryPath): with open(self.countryBreweryPath, ReadWriteFile.READ_BINARY) as fh: self.countryBreweryPages.update(pickle.load(fh)) print( 'Countries-Breweries:', sum( len(self.countryBreweryPages[c]) for c in self.countryBreweryPages)) return if not self.update_all: return pages = OrderedDict() try: req = requests.get(self._url_country_brewers, proxies=self.proxies) except Exception as err: print('Error (BeerBrewers) request') print(' ' * 4, self._url_country_brewers.encode()) print(' ' * 4, err) else: try: soup = BeautifulSoup(req.content, LXML) except Exception as err: print('Error (BeerBrewers) soup:', err) else: for a in soup.find_all( TAG_A, href=re.compile('/breweries/[\W\w\d]+/[\d]+/')): url_ = self._url_base + a[ATTRIB_HREF] name = unicode2ascii(a.text) pages[url_] = name print('Countries:', len(pages)) progress = len(pages) pool = Pool(processes=self.THREAD_NUMBER) for step, country_url in enumerate(pages, 1): country_name = pages[country_url] print('run page', step, '-', progress, ':', country_name, country_url.encode()) pool.apply_async(collectCountryBrewers, args=(self._url_base, country_url, country_name, self.proxies), callback=resultCountryBrewers) TASK_TOTAL += 1 pool.close() print('Total tasks:', TASK_TOTAL) while ((TASK_TOTAL != TASK_FINISHED)): print('executed:', TASK_FINISHED, 'from:', TASK_TOTAL, 'progress:', TASK_FINISHED / TASK_TOTAL) sleep(self.PROGRESS_UPDATE_TIME) pool.join() print( 'Countries-Breweries:', sum( len(self.countryBreweryPages[c]) for c in self.countryBreweryPages)) with open(self.countryBreweryPath, ReadWriteFile.WRITE_BINARY_NEW) as fh: pickle.dump(self.countryBreweryPages, fh)
def collectBeerPages(beerurl, beername, image_basepath, proxies): row = [beername, beerurl, image_basepath, None, None, None, None, 0, None] try: req = requests.get(beerurl, proxies=proxies) except Exception as err: print('Error (BeerImage) request:') print(' ' * 4, beerurl.encode()) print(' ' * 4, err) else: try: soup = BeautifulSoup(req.content, LXML) except Exception as err: print('Error (BeerImage) soup:') print(' ' * 4, beerurl.encode()) print(' ' * 4, err) else: image_url = None image_path = None image_basename = None info = None for a in soup.find_all( TAG_A, href=re.compile('/beerstyles/[\w\W\d]+/[\d]+')): row[8] = unicode2ascii(a.text) if row[8] != 'Top By Style': break for img in soup.find_all(TAG_IMG, {'itemprop': 'image'}): image_url = img[ATTRIB_SRC] image_basename = os.path.basename(image_url) image_path = os.path.join(image_basepath, image_basename) for small in soup.find_all(TAG_SMALL): data = unicode2ascii(small.text) if 'RATINGS:' in data: info = data break elif len(small) > 8: info = data break if info is None: for a in soup.find_all( 'a', href=re.compile('/beer/[\W\w\d]+/[\d]+/')): if b'Proceed to the aliased beer' in a.parent.text.encode( ): info = unicode2ascii(a.parent.text) break if info is None and b"we didn't find this beer" in req.content: info = unicode2ascii(req.content.decode()) row[3] = image_url row[4] = image_path row[5] = info if image_url and (os.path.exists(image_path)): row[7] = os.stat(image_path).st_size elif image_url and (not os.path.exists(image_path)): p, _ = os.path.split(image_url) p, _ = os.path.split(p) img_url = chr(47).join([p, image_basename]) row[6] = img_url try: req = requests.get(img_url, proxies=proxies) except Exception as err: print('Error (BeerImage) image:') print(' ' * 4, beerurl.encode()) print(' ' * 4, err) else: iLen = len(req.content) if iLen > 0: try: open(image_path, ReadWriteFile.WRITE_BINARY_NEW).write( req.content) except Exception as err: print('Error (BeerImage) image)save:') print(' ' * 4, beerurl.encode()) print(' ' * 4, err) else: row[7] = iLen return (beerurl, beername, row)
def beerRows(self, print_=False): STATS = [ ('Reviews:', -1), ('Ratings:', -1), ('Avg:', -1), ('pDev:', '-1%'), ('Wants:', -1), ('Gots:', -1), ('For Trade:', -1), ] INFO = [ ('Brewed by:', str()), ('Style:', str()), ('Alcohol by volume (ABV):', str()), ('Availability:', str()), ('Notes / Commercial Description:', str()), ('Added by', str()), ] FIELDS = [ 'brewery', 'beer', 'ba-score', 'ba-desc', 'ba-reviews', 'bros-score', 'bros-desc', ] FIELDS.extend([n for n, _ in STATS]) FIELDS.extend([n for n, _ in INFO]) if print_: print(chr(9).join(map(str, FIELDS))) FIELDS.append(self.STR_BEER_IMAGES) ROWS = [] iROWS = set() beers = OrderedDict() beer_number = 0 for b_url, b_name in self.beerPages: beers[b_name] = OrderedDict() #print(b_name, b_url.encode()) for beer_url, beer_name in self.beerPages[(b_url, b_name)]: if not beer_url.endswith(chr(47)): continue #print(' '*4, beer_name, beer_url.encode()) beerData = self.beerData[(beer_url, beer_name)] ba_score = beerData[self.STR_BA_SCORE] ba_score = ba_score.decode().replace(chr(9), str()).split(chr(10)) ba_score = ba_score[ba_score.index(self.STR_BA_SCORE) + 2:] ba_score_index = -1 if ba_score[0].isdigit(): ba_score_index = int(ba_score[0]) elif ba_score[0] == '-': ba_score_index = 0 review, = re.findall('[\d,.]+', ba_score[-1]) the_bros = beerData[self.STR_THE_BROS] the_bros = the_bros.decode().replace(chr(9), str()).split(chr(10)) the_bros = the_bros[the_bros.index(self.STR_THE_BROS) + 2:] the_bros_index = -1 if the_bros[0].isdigit(): the_bros_index = int(the_bros[0]) elif the_bros[0] == '-': the_bros_index = 0 beer_stats = beerData[self.STR_BEER_STATS] beer_stats = beer_stats.decode().replace(chr(9), str()).split(chr(10)) beer_stats = beer_stats[beer_stats.index(self.STR_BEER_STATS) + 3:] stats = OrderedDict(STATS) for name in stats: if name in beer_stats: value = beer_stats[beer_stats.index(name) + 1] if not value: value = beer_stats[beer_stats.index(name) + 2] if value == 'NAN%': continue stats[name] = str2val(value) beer_info = beerData[self.STR_BEER_INFO] beer_info = beer_info.decode().replace(chr(9), str()).split(chr(10)) beer_info = beer_info[beer_info.index(self.STR_BEER_INFO) + 2:] info = OrderedDict(INFO) names = list(info.keys()) #print(names) #print(beer_info) for f, n in zip(names, names[1:] + [None]): s, e = -1, -1 for i, v in enumerate(beer_info): if f in v: s = i if n and n in v: e = i + 1 break if s > -1 and e > -1: d = chr(32).join(beer_info[s:e]).strip() d = d[d.find(f) + len(f):d.find(n)].strip() elif s > -1 and e == -1: d = chr(32).join(beer_info[s:]).strip() d = d[d.find(f) + len(f):].strip() else: continue if d in ('No notes at this time.', 'not listed'): continue info[f] = unicode2ascii(d) row = [ b_name, beer_name, ba_score_index, str2val(ba_score[1]), str2val(review), the_bros_index, the_bros[1], ] row.extend(stats.values()) row.extend(info.values()) if print_: print(chr(9).join(map(str, row))) id_ = hash(tuple(row)) if id_ in iROWS: continue iROWS.add(id_) row.append(beerData[self.STR_BEER_IMAGES]) ROWS.append(row) beer_number += 1 print('Beers:', beer_number) return FIELDS, ROWS
def collectCountryBrewers(url_base, countryurl, countryname, proxies): brewers = OrderedDict() prefixies = [] try: req = requests.get(countryurl, proxies=proxies) except Exception as err: print('Error (CountryBrewers) request:') print(' ' * 4, countryurl.encode()) print(' ' * 4, err) else: try: soup = BeautifulSoup(req.content, LXML) except Exception as err: print('Error (CountryBrewers) soup:') print(' ' * 4, countryurl.encode()) print(' ' * 4, err) else: urls = [] for a in soup.find_all( TAG_A, href=re.compile('/place/list/[\w\W\d]+brewery=Y')): urls.append(url_base + a[ATTRIB_HREF]) for a in soup.find_all( TAG_A, href=re.compile('/place/list/\?city=[\w\W\d]+')): urls.append(url_base + a[ATTRIB_HREF]) for pos, url in enumerate(urls, 1): print(pos) try: req = requests.get(url, proxies=proxies) except Exception as err: print('Error (CountryBrewers) request-1-{}:'.format(pos)) print(' ' * 4, url.encode()) print(' ' * 4, err) else: try: soup = BeautifulSoup(req.text, LXML) except Exception as err: print('Error (CountryBrewers) soup-1-{}:'.format(pos)) print(' ' * 4, url.encode()) print(' ' * 4, err) else: prefix_s = str() prefix_e = str() indexes = [] for a in soup.find_all( TAG_A, href=re.compile('/place/list/\?start=[\d]+')): ids = re.findall('[\d]+', a[ATTRIB_HREF]) if ids: prefix_s, prefix_e = a[ATTRIB_HREF].split( chr(61), 1) prefix_e = prefix_e.lstrip(ids[0]) indexes.append(int(ids[0])) if indexes: minIndex = min(indexes) maxIndex = max(indexes) for a in soup.find_all( TAG_A, href=re.compile('/beer/profile')): brewery_name = unicode2ascii(a.b.text) brewery_url = url_base + a[ATTRIB_HREF] brewers[brewery_url] = brewery_name prefixies.append( (minIndex, maxIndex, prefix_s, prefix_e)) else: for a in soup.find_all( TAG_A, href=re.compile('/beer/profile')): brewery_name = unicode2ascii(a.b.text) brewery_url = url_base + a[ATTRIB_HREF] brewers[brewery_url] = brewery_name for minIndex, maxIndex, prefix_s, prefix_e in prefixies: for pos, index in enumerate(range(minIndex, maxIndex + 20, 20)): url = url_base + prefix_s + '=' + str(index) + prefix_e try: req = requests.get(url, proxies=proxies) except Exception as err: print('Error (CountryBrewers) request-2-{}:'.format(pos)) print(' ' * 4, url.encode()) print(' ' * 4, err) else: try: soup = BeautifulSoup(req.text, LXML) except Exception as err: print('Error (CountryBrewers) soup-2-{}:'.format(pos)) print(' ' * 4, brewery_url.encode()) print(' ' * 4, err) else: for a in soup.find_all(TAG_A, href=re.compile('/beer/profile')): brewery_name = unicode2ascii(a.b.text) brewery_url = url_base + a[ATTRIB_HREF] brewers[brewery_url] = brewery_name return countryurl, countryname, brewers
def collectBeerPages(beerurl, beername, image_basepath, proxies): beerData = OrderedDict() beerData[BeerAdvocateGrabber.STR_BEER_IMAGES] = [] beerData[BeerAdvocateGrabber.STR_BA_SCORE] = bytes() beerData[BeerAdvocateGrabber.STR_THE_BROS] = bytes() beerData[BeerAdvocateGrabber.STR_BEER_STATS] = bytes() beerData[BeerAdvocateGrabber.STR_BEER_INFO] = bytes() try: req = requests.get(beerurl, proxies=proxies) except Exception as err: print('Error (BeerPages) request:') print(' ' * 4, beerurl.encode()) print(' ' * 4, err) else: try: soup = BeautifulSoup(req.content, LXML) except Exception as err: print('Error (BeerPages) soup:') print(' ' * 4, beerurl.encode()) print(' ' * 4, err) else: for b in soup.find_all(TAG_B): text = unicode2ascii(b.text) if text == BeerAdvocateGrabber.STR_BA_SCORE: beerData[text] = b.parent.text.lstrip(text).encode() elif text == BeerAdvocateGrabber.STR_THE_BROS: beerData[text] = b.parent.text.lstrip(text).encode() elif text == BeerAdvocateGrabber.STR_BEER_STATS: beerData[text] = b.parent.text.lstrip(text).encode() elif text == BeerAdvocateGrabber.STR_BEER_INFO: beerData[text] = b.parent.text.lstrip(text).encode() for img in soup.find_all(TAG_IMG): if not '/beers/' in img[ATTRIB_SRC]: continue basename = os.path.basename(img[ATTRIB_SRC]) if basename == 'c_beer_image.gif': continue elif basename == 'placeholder-beer.jpg': continue image_path = os.path.join(image_basepath, basename) if os.path.exists(image_path): beerData[BeerAdvocateGrabber.STR_BEER_IMAGES].append( (img[ATTRIB_SRC], image_path)) else: try: req = requests.get(img[ATTRIB_SRC], proxies=proxies) except Exception as err: print('Error (BeerPages) BeerImage request:') print(' ' * 4, img[ATTRIB_SRC].encode()) print(' ' * 4, err) else: try: open(image_path, ReadWriteFile.WRITE_BINARY_NEW).write( req.content) except Exception as err: print('Error (BeerPages) BeerImage save:') print(' ' * 4, err) else: beerData[ BeerAdvocateGrabber.STR_BEER_IMAGES].append( (img[ATTRIB_SRC], image_path)) return (beerurl, beername, beerData)