class GooglePlacesParser(): def __init__(self, redis_que): self.crud = MongoCRUD() self.keys_count = app_keys self.keys = app_keys.keys() self.key_begin = self.app_keys_pop() # self.key = app_keys_count(self.key_begin) self.redis_queue = redis_que def app_keys_pop(self): if len(self.keys) > 0: key = self.keys.pop() return key else: print "*------*-*all app keys have been used*-*-----*" sys.exit() def app_keys_count(self, key): count = self.keys_count[key] if count > 999: key = self.app_keys_pop() count = self.keys_count[key] count = count + 1 self.keys_count[key] = count self.key_begin = key print self.key_begin print count return key def change_radius(self): radius = 500 return radius def change_language(self): language = 'zh-TW' return language def get_url(self, location, type): print '*********' url = 'https://maps.googleapis.com/maps/api/place/search/json?sensor=false' url += '&language=%s' % self.change_language() url += '&location=' + '%s,%s' % (location['lat'], location['lng']) url += '&radius=%s' % self.change_radius() # 500 m url += '&types=%s' % '|'.join(type) url += '&key=%s' % self.app_keys_count(self.key_begin) url += '&pagetoken=' return url def save_url_ToQueue(self): all_locations = self.crud.read_all_locations() if len(self.keys) > 0: for location in all_locations: for type in types: url = self.get_url(location, type) print url self.redis_queue.put(url) self.crud.update_location_status(location['_id']) else: print "*------*-*all app keys have been used*-*-----*" sys.exit() def parse_html(self, url): # Show the source time.sleep(2) br = Browser(url) josn_response = br.get_html() status = josn_response['status'] if status == 'OK': results = josn_response['results'] # insert to mongo self.crud.save_map_data_insert(results) if 'next_page_token' in josn_response: pagetoken = '&pagetoken=%s' % josn_response['next_page_token'] url = re.sub(r'&pagetoken=.*', pagetoken, url) save_url_ToQueue(url) # self.parse_html(url) else: pass elif status == 'OVER_QUERY_LIMIT': self.key = self.app_keys_pop() url = re.sub(r'&key=.*&pagetoken', '&key=%s&pagetoken' % self.key, url) save_url_ToQueue(url) # self.parse_html(url) else: return
class GooglePlacesParser(): def __init__(self): self.crud = MongoCRUD() self.keys = app_keys self.key = self.app_keys_pop() def app_keys_pop(self): if len(self.keys) > 0: key = self.keys.pop() return key else: print "*------*-*all app keys have been used*-*-----*" sys.exit() def change_radius(self): radius = 500 return radius def change_language(self): language = 'zh-TW' return language def get_url(self, location, type): url = 'https://maps.googleapis.com/maps/api/place/search/json?sensor=false' url += '&language=%s' % self.change_language() url += '&location=' + '%s,%s' % (location['lat'], location['lng']) url += '&radius=%s' % self.change_radius() # 500 m url += '&types=establishment|%s' % '|'.join(type) url += '&key=%s' % self.key url += '&pagetoken=' return url def run(self): all_locations = self.crud.read_all_locations() if len(self.keys) > 0: for location in all_locations: for type in types: url = self.get_url(location, type) print url self.parse_html(url) self.crud.update_location_status(location['_id']) else: print "*------*-*all app keys have been used*-*-----*" sys.exit() def parse_html(self, url): # Show the source time.sleep(2) br = Browser(url) josn_response = br.get_html() status = josn_response['status'] if status == 'OK': results = josn_response['results'] # insert to mongo self.crud.save_map_data_insert(results) if 'next_page_token' in josn_response: pagetoken = '&pagetoken=%s' % josn_response['next_page_token'] url = re.sub(r'&pagetoken=.*', pagetoken, url) self.parse_html(url) else: pass elif status == 'OVER_QUERY_LIMIT': self.key = self.app_keys_pop() url = re.sub(r'&key=.*&pagetoken', '&key=%s&pagetoken' % self.key, url) self.parse_html(url) else: return