def _categorize(self): """ categorize wine info """ entities = WebLinkWineTemp.query().fetch(50) # to avoid running datastore free quota limit for entity in entities: result = re.findall(r"BuyWine/Item/\d+|sku|skuIT-\d+|bwe\d+|wines/\d+|/wine/|Apply/Vintage/\d+", entity.link, re.I) # sku ; BuyWine/Item ; bwe query = WebLinkWine.query(WebLinkWine.link == entity.link) if result and query.count() == 0: new_wine_info = WebLinkWine() new_wine_info.link = entity.link new_wine_info.title = entity.title new_wine_info.put()
def _search_price(self): entities = WebLinkWine.query().fetch(50) for entity in entities: # belmontwine match_result = re.findall(r'^(?=http://www.belmontwine.com/)(?=bwe/\d+)$', entity.link, re.I) if match_result: req = urllib2.Request(entity.link) response = urllib2.urlopen(req) # need to add new mechanism to prevent fetch javascript searched_page = response.read() soup = BeautifulSoup(searched_page) found_price = soup.find('td', { "class" : "detail-price-txt" }) price = found_price.string.strip() if price != '': wine_price = WinePriceInfo(link = entity.link, current_price = price, created_datetime = datetime.now()) wine_price.put() # winebid match_result = re.findall(r'^(?=http://www.winebid.com/Apply/Vintage/)(?=\d+).*$', entity.link, re.I) if match_result: req = urllib2.Request(entity.link) response = urllib2.urlopen(req) # need to add new mechanism to prevent fetch javascript searched_page = response.read() soup = BeautifulSoup(searched_page) found_price = soup.find('div', { "class" : "price" } ) price = found_price.string.strip() if price != '': wine_price = WinePriceInfo(link = entity.link, current_price = price, created_datetime = datetime.now()) wine_price.put() # k&l match_result = re.findall(r'^(?=http://www.klwines.com/)(?=.*sku=\d+)$', entity.link, re.I) if match_result: req = urllib2.Request(entity.link) response = urllib2.urlopen(req) # need to add new mechanism to prevent fetch javascript searched_page = response.read() soup = BeautifulSoup(searched_page) found_price = soup.find('span', { "class" : "price" }) price_elem = found_price.find('strong') price = price_elem.string.strip() if price != '': wine_price = WinePriceInfo(link = entity.link, current_price = price, created_datetime = datetime.now()) wine_price.put()