def get(self): flush_cache = urllib.unquote( cgi.escape(self.request.get('flushcache')).lower()[:50]) reset_pos = urllib.unquote( cgi.escape(self.request.get('resetpos')).lower()[:50]) get_stats = urllib.unquote( cgi.escape(self.request.get('getstats')).lower()[:50]) if flush_cache: if not memcache.flush_all(): logging.error("Error flushing memcache") if reset_pos: memcache.set("index", 1, 86400) SearchPosition(key_name="index", position=1).put() if get_stats: d = memcache.get_stats() #index = memcache.get("index") #if index: #d['indexmc'] = index #else: #d['indexmc'] = -1 index_from_ds = SearchPosition.get_by_key_name("index") if index_from_ds: d['indexds'] = index_from_ds.position else: d['indexds'] = -1 s = json.dumps(d) self.response.out.write(s) else: template_values = {} template = jinja_environment.get_template('html/admin_page.html') self.response.out.write(template.render(template_values))
def get(self): index = memcache.get("index") if not index: index_from_ds = SearchPosition.get_by_key_name("index") if not index_from_ds: index = 1 #Add it to datastore SearchPosition(key_name="index", position=index).put() else: index = index_from_ds.position memcache.add("index", index, 86400) #Spawn tasks for i in range(index, index + NUM_THREADS): taskqueue.add(url='/crawl/worker', params={'index': i}) #, target='backend' #Update Memcache if not memcache.incr("index"): logging.error("Memcache set failed") #crawlPerson(index) #Update Datastore index_from_ds = SearchPosition.get_by_key_name("index") if index_from_ds: index_from_ds.position = (index + NUM_THREADS) else: index_from_ds = SearchPosition(key_name="index", position=index) index_from_ds.put()
def get(self): flush_cache = urllib.unquote(cgi.escape(self.request.get('flushcache')).lower()[:50]) reset_pos = urllib.unquote(cgi.escape(self.request.get('resetpos')).lower()[:50]) get_stats = urllib.unquote(cgi.escape(self.request.get('getstats')).lower()[:50]) if flush_cache: if not memcache.flush_all(): logging.error("Error flushing memcache") if reset_pos: memcache.set("index", 1, 86400) SearchPosition(key_name="index", position=1).put() if get_stats: d = memcache.get_stats() #index = memcache.get("index") #if index: #d['indexmc'] = index #else: #d['indexmc'] = -1 index_from_ds = SearchPosition.get_by_key_name("index") if index_from_ds: d['indexds'] = index_from_ds.position else: d['indexds'] = -1 s = json.dumps(d) self.response.out.write(s) else: template_values = {} path = os.path.join(os.path.dirname(__file__), 'admin_page.html') self.response.out.write(template.render(path, template_values))
def crawlPerson(index): logging.info("In CrawlPerson") if index: result = Crawler().getMap(index) logging.info(str(result)) putResult(result) return mutex = Mutex('mutex lock') try: mutex.lock() index_from_ds = SearchPosition.get_by_id("index") if index_from_ds: index = index_from_ds.position else: index_from_ds = SearchPosition(id='index',position=1) index_from_ds.put() index = 1 result = Crawler().getMap(index) logging.info(str(result)) if 'error' in result.keys(): logging.warn("error at index" + str(index) + ", error is " + result['error']) if result['error'] == 'page_not_found': logging.warn("Invalid index: " + str(index)) raise Exception() if result['error'] == 'end of database': logging.warn("Index out of range: " + str(index)) index_from_ds.position = 1 index_from_ds.put() else: logging.info("putting results") putResult(result) index_from_ds.position = (int(index) + 1) logging.info("INCREMENT " + str(index)) index_from_ds.put() mutex.unlock() except Exception as e: raise e finally: mutex.unlock()
def crawlPerson(index): logging.info("In CrawlPerson") mutex = Mutex('mutex lock') try: mutex.lock() index_from_ds = SearchPosition.get_by_id("index") if index_from_ds: index = index_from_ds.position else: index_from_ds = SearchPosition(id='index',position=1) index_from_ds.put() index = 1 result = Crawler().getMap(index) logging.info(str(result)) if 'error' in result.keys(): logging.warn("error at index" + str(index) + ", error is " + result['error']) if result['error'] == 'page_not_found': logging.warn("Invalid index: " + str(index)) raise Exception() if result['error'] == 'end of database': logging.warn("Index out of range: " + str(index)) index_from_ds.position = 1 index_from_ds.put() else: logging.info("putting results") putResult(result) index_from_ds.position = (int(index) + 1) logging.info("INCREMENT " + str(index)) index_from_ds.put() mutex.unlock() except Exception as e: raise e finally: mutex.unlock()