def __init__(self, redis_que): self.crud = MongoCRUD() self.keys_count = app_keys self.keys = app_keys.keys() self.key_begin = self.app_keys_pop() # self.key = app_keys_count(self.key_begin) self.redis_queue = redis_que
class Extractor(Worker): def __init__(self, exit_flag, job_queue, done_queue, api_key_manager): Worker.__init__(self, exit_flag, job_queue, done_queue, -1) self.crud = MongoCRUD() def do_job(self, job): if job.data["status"] == 1: json_data = job.data["page_json"] results = json_data["result"] self.crud.save_map_data_insert(results) if "next_page_token" in json_data: page_token = json_data["next_page_token"] next_url = JobURL.next_url(job.data["base_url"], page_token) page = job.page_idx + 1 new_job = GooglePOIJob(next_url, page) job.new_urls.append(new_job)
class Extractor(Worker): def __init__(self, exit_flag, job_queue, done_queue, api_key_manager): Worker.__init__(self, exit_flag, job_queue, done_queue, -1) self.crud = MongoCRUD() def do_job(self, job): if job.data['status'] == 1: json_data = job.data['page_json'] results = json_data['result'] self.crud.save_map_data_insert(results) if 'next_page_token' in json_data: page_token = json_data['next_page_token'] next_url = JobURL.next_url(job.data['base_url'], page_token) page = job.page_idx + 1 new_job = GooglePOIJob(next_url, page) job.new_urls.append(new_job)
def __init__(self,redis_que): self.crud = MongoCRUD() self.keys_count = app_keys self.keys = app_keys.keys() self.key_begin = self.app_keys_pop() # self.key = app_keys_count(self.key_begin) self.redis_queue = redis_que
def __init__(self, exit_flag, job_queue, done_queue, api_key_manager): Worker.__init__(self, exit_flag, job_queue, done_queue, -1) self.crud = MongoCRUD()
def api_crud(audi_file_type=None, audi_file_id=None): try: if audi_file_type not in VALID_FORMAT: raise UnsupportedRequestError("Only formats {} are allowed".format( ", ".join(VALID_FORMAT))) request_method = request.method if request_method == "GET": data_obj = MongoCRUD(audi_file_type, id=audi_file_id) return jsonify(data_obj.read()) if request_method == "DELETE": data_obj = MongoCRUD(audi_file_type, id=audi_file_id) return jsonify(data_obj.delete()) if request_method == "PUT": data = request.json data_obj = MongoCRUD(audi_file_type, id=audi_file_id, data=data) return jsonify(data_obj.update()) if request_method == "POST": data = request.json data_obj = MongoCRUD(audi_file_type, id=audi_file_id, data=data) return jsonify(data_obj.create()) except UnsupportedRequestError as error: return {"result": {"error": error.message}}, 422 except Exception as exc: return {"result": {"error": str(exc)}}, 500
class GooglePlacesParser(): def __init__(self, redis_que): self.crud = MongoCRUD() self.keys_count = app_keys self.keys = app_keys.keys() self.key_begin = self.app_keys_pop() # self.key = app_keys_count(self.key_begin) self.redis_queue = redis_que def app_keys_pop(self): if len(self.keys) > 0: key = self.keys.pop() return key else: print "*------*-*all app keys have been used*-*-----*" sys.exit() def app_keys_count(self, key): count = self.keys_count[key] if count > 999: key = self.app_keys_pop() count = self.keys_count[key] count = count + 1 self.keys_count[key] = count self.key_begin = key print self.key_begin print count return key def change_radius(self): radius = 500 return radius def change_language(self): language = 'zh-TW' return language def get_url(self, location, type): print '*********' url = 'https://maps.googleapis.com/maps/api/place/search/json?sensor=false' url += '&language=%s' % self.change_language() url += '&location=' + '%s,%s' % (location['lat'], location['lng']) url += '&radius=%s' % self.change_radius() # 500 m url += '&types=%s' % '|'.join(type) url += '&key=%s' % self.app_keys_count(self.key_begin) url += '&pagetoken=' return url def save_url_ToQueue(self): all_locations = self.crud.read_all_locations() if len(self.keys) > 0: for location in all_locations: for type in types: url = self.get_url(location, type) print url self.redis_queue.put(url) self.crud.update_location_status(location['_id']) else: print "*------*-*all app keys have been used*-*-----*" sys.exit() def parse_html(self, url): # Show the source time.sleep(2) br = Browser(url) josn_response = br.get_html() status = josn_response['status'] if status == 'OK': results = josn_response['results'] # insert to mongo self.crud.save_map_data_insert(results) if 'next_page_token' in josn_response: pagetoken = '&pagetoken=%s' % josn_response['next_page_token'] url = re.sub(r'&pagetoken=.*', pagetoken, url) save_url_ToQueue(url) # self.parse_html(url) else: pass elif status == 'OVER_QUERY_LIMIT': self.key = self.app_keys_pop() url = re.sub(r'&key=.*&pagetoken', '&key=%s&pagetoken' % self.key, url) save_url_ToQueue(url) # self.parse_html(url) else: return
class EuQueue(object): def __init__(self,name,redis_que): self.crud = MongoCRUD() self.keys_count = app_keys self.keys = app_keys.keys() self.key_begin = self.app_keys_pop() # self.key = app_keys_count(self.key_begin) self.redis_queue = redis_que self.name = name self.db = redis.Redis() def app_keys_pop(self): if len(self.keys) > 0: key = self.keys.pop() return key else: print "*------*-*all app keys have been used*-*-----*" sys.exit() def app_keys_count(self, key): count = self.keys_count[key] if count > 999: key = self.app_keys_pop() count = self.keys_count[key] count = count + 1 self.keys_count[key] = count self.key_begin = key print self.key_begin print count return key def change_radius(self): radius = 500 return radius def change_language(self): language = 'zh-TW' return language def get_url(self, location, type): print '*********' url = 'https://maps.googleapis.com/maps/api/place/search/json?sensor=false' url += '&language=%s' % self.change_language() url += '&location=' + '%s,%s' % (location['lat'], location['lng']) url += '&radius=%s' % self.change_radius() # 500 m url += '&types=%s' % '|'.join(type) url += '&key=%s' % self.app_keys_count(self.key_begin) url += '&pagetoken=' return url def save_html(self, item): print "save html into eu_queue:", item self.db.rpush(self.name, item) def get_html(self): item = self.db.lpop(self.name) return item def parse_html_save(self, item): # Show the source time.sleep(2) josn_response = self.get_html() status = item['status'] if status == 'OK': results = josn_response['results'] # insert to mongo self.crud.save_html_insert(results) if 'next_page_token' in item: pagetoken = '&pagetoken=%s' % josn_response['next_page_token'] url = re.sub(r'&pagetoken=.*', pagetoken, url) self.parse_html(url) else: pass elif status == 'OVER_QUERY_LIMIT': self.key = self.app_keys_pop() url = re.sub(r'&key=.*&pagetoken', '&key=%s&pagetoken' % self.key, url) self.parse_html(url) else: return '''
def __init__(self): self.crud = MongoCRUD() self.keys = app_keys self.key = self.app_keys_pop()
class GooglePlacesParser(): def __init__(self): self.crud = MongoCRUD() self.keys = app_keys self.key = self.app_keys_pop() def app_keys_pop(self): if len(self.keys) > 0: key = self.keys.pop() return key else: print "*------*-*all app keys have been used*-*-----*" sys.exit() def change_radius(self): radius = 500 return radius def change_language(self): language = 'zh-TW' return language def get_url(self, location, type): url = 'https://maps.googleapis.com/maps/api/place/search/json?sensor=false' url += '&language=%s' % self.change_language() url += '&location=' + '%s,%s' % (location['lat'], location['lng']) url += '&radius=%s' % self.change_radius() # 500 m url += '&types=establishment|%s' % '|'.join(type) url += '&key=%s' % self.key url += '&pagetoken=' return url def run(self): all_locations = self.crud.read_all_locations() if len(self.keys) > 0: for location in all_locations: for type in types: url = self.get_url(location, type) print url self.parse_html(url) self.crud.update_location_status(location['_id']) else: print "*------*-*all app keys have been used*-*-----*" sys.exit() def parse_html(self, url): # Show the source time.sleep(2) br = Browser(url) josn_response = br.get_html() status = josn_response['status'] if status == 'OK': results = josn_response['results'] # insert to mongo self.crud.save_map_data_insert(results) if 'next_page_token' in josn_response: pagetoken = '&pagetoken=%s' % josn_response['next_page_token'] url = re.sub(r'&pagetoken=.*', pagetoken, url) self.parse_html(url) else: pass elif status == 'OVER_QUERY_LIMIT': self.key = self.app_keys_pop() url = re.sub(r'&key=.*&pagetoken', '&key=%s&pagetoken' % self.key, url) self.parse_html(url) else: return
def save(self, results): mcrud = MongoCRUD() mcrud.save_circle_centers(results)
class EuQueue(object): def __init__(self, name, redis_que): self.crud = MongoCRUD() self.keys_count = app_keys self.keys = app_keys.keys() self.key_begin = self.app_keys_pop() # self.key = app_keys_count(self.key_begin) self.redis_queue = redis_que self.name = name self.db = redis.Redis() def app_keys_pop(self): if len(self.keys) > 0: key = self.keys.pop() return key else: print "*------*-*all app keys have been used*-*-----*" sys.exit() def app_keys_count(self, key): count = self.keys_count[key] if count > 999: key = self.app_keys_pop() count = self.keys_count[key] count = count + 1 self.keys_count[key] = count self.key_begin = key print self.key_begin print count return key def change_radius(self): radius = 500 return radius def change_language(self): language = 'zh-TW' return language def get_url(self, location, type): print '*********' url = 'https://maps.googleapis.com/maps/api/place/search/json?sensor=false' url += '&language=%s' % self.change_language() url += '&location=' + '%s,%s' % (location['lat'], location['lng']) url += '&radius=%s' % self.change_radius() # 500 m url += '&types=%s' % '|'.join(type) url += '&key=%s' % self.app_keys_count(self.key_begin) url += '&pagetoken=' return url def save_html(self, item): print "save html into eu_queue:", item self.db.rpush(self.name, item) def get_html(self): item = self.db.lpop(self.name) return item def parse_html_save(self, item): # Show the source time.sleep(2) josn_response = self.get_html() status = item['status'] if status == 'OK': results = josn_response['results'] # insert to mongo self.crud.save_html_insert(results) if 'next_page_token' in item: pagetoken = '&pagetoken=%s' % josn_response['next_page_token'] url = re.sub(r'&pagetoken=.*', pagetoken, url) self.parse_html(url) else: pass elif status == 'OVER_QUERY_LIMIT': self.key = self.app_keys_pop() url = re.sub(r'&key=.*&pagetoken', '&key=%s&pagetoken' % self.key, url) self.parse_html(url) else: return '''