def replicate_type(): """Duplicate a type from type_from to type_to.""" # build bulk from ghost_spider.elastic import LocationRestaurantEs from ghost_spider import progressbar size = 100 page = 0 progress = None total = 0 while True: start_from = page * size results = LocationRestaurantEs.search({"query": {"match_all": {}}, "size": size, "from": start_from}) if not progress: total = results["hits"]["total"] progress = progressbar.AnimatedProgressBar(end=total, width=100) if not results["hits"]["hits"]: break page += 1 bulk = "" for result in results["hits"]["hits"]: data = result["_source"] bulk += LocationRestaurantEs.bulk_data(data, type_name="restaurants_back") progress + size progress.show_progress() LocationRestaurantEs.send(bulk) if progress: progress + total progress.show_progress() print "total %s" % total
def update_location(): """Duplicate a type from type_from to type_to.""" # build bulk from ghost_spider.elastic import LocationRestaurantEs from ghost_spider.helper import LocationHotelSelectors from ghost_spider import progressbar import re size = 100 page = 0 progress = None total = 0 query = {"query": {"match_all": {}}, "size": size, "from": 0} query["sort"] = [{"name.untouched": "asc"}] while True: query["from"] = page * size results = LocationRestaurantEs.search(query) if not progress: total = results["hits"]["total"] print "total %s" % total progress = progressbar.AnimatedProgressBar(end=total, width=100) if not results["hits"]["hits"]: break page += 1 bulk = "" for result in results["hits"]["hits"]: location = result["_source"] data_id = result["_id"] genre = [] area_found = False for a in location["page_body"]["genre"]: result = re.findall(r'genrecd=\d+', a) if u'genrecd=01' in result: if not area_found: text = re.findall(r'>(.*)</', a) location['area'] = text[0] if location['area']: location['area_ascii'] = LocationRestaurantEs.analyze(location['area'], 'romaji_ascii_normal_analyzer') area_found = True else: text = re.findall(r'>(.*)</', a) if text and text[0]: genre.append(text[0]) if not area_found and len(location["page_body"]["breadcrumbs"]) > 1: location['area'] = location["page_body"]["breadcrumbs"][-1] if location['area']: location['area_ascii'] = LocationRestaurantEs.analyze(location['area'], 'romaji_ascii_normal_analyzer') elif not area_found: location['area'] = '' location['area_ascii'] = '' kind = LocationHotelSelectors.convert_latte_kind(genre) location['genre'] = genre location['kind'] = kind bulk += LocationRestaurantEs.bulk_data(location, action="update", data_id=data_id) progress + size progress.show_progress() LocationRestaurantEs.send(bulk) if progress: progress + total progress.show_progress() print " "