def put_mock_log(self, index_name): with open('../data/logs/njnert_logs/njnet_access_mix.log') as f: for no, line in enumerate(f): d = Document(message=line) d.save(using=self.client, index=index_name) if no % 1000 == 0: print('put {} rows'.format(no))
def pg2es(): Doc = Document() for row in places.objects.all(): geojson = serialize('geojson', [row], geometry_field='geom') Doc.save(index = "places2") print(geojson)
def worker_abort(worker): current_app = worker.app.wsgi() es = Elasticsearch( current_app.config.get("ELASTIC").split(","), verify_certs=False, timeout=120, ) doc = Document(url=worker.current_request.uri, host=socket.gethostname(), pid=os.getpid(), timestamp=datetime.now(), timeout=True, error="Hit gunicorn timeout prior to request completion") doc.save(using=es, index=".datashader_tiles")
def main(): import time start = time.time() index_name = 'test_index' es_index = Index(index_name) if not es_index.exists(): es_index.put_alias(using='default') # Number of data node es_index.settings(number_of_shards=1) es_index.save() else: es_index.put_alias(using='default', name="sss3") doc = Document(first_name='cheng', last_name="unknowname", hometown='China') doc.save(using='default', index=index_name) print(time.time() - start)
def save(self, es_connection: Elasticsearch, document: Document): """ Saves the client_document in Elasticsearch. This should be override by the Document Dao :param es_connection: :param document: :return: doc_status, doc_meta """ return document.save(using=es_connection)
def insert_or_ignore(job: elasticsearch_dsl.Document, alias='default', index=JOB_INDEX): """Inserts the training job into the elasticsearch index if no job with the same name and creation timestamp exists. """ if index == JOB_INDEX and 'slack' not in job.raw_log: print('job is incomplete, returning') return matches = index.search() \ .query('match', job_name=job.job_name) \ .query('match', created_at=job.created_at) \ .count() if matches == 0: job.save(using=alias) else: print('job {} created at {} exists'.format( job.job_name, job.created_at))
def save(self, **kwargs): return Document.save(self, **kwargs)
def merge_generated_parameters(params, idx, hash): """ :param params: :param paramsfile: :param idx: :return: """ layer_id = "%s_%s" % (hash, socket.gethostname()) es = Elasticsearch( current_app.config.get("ELASTIC").split(","), verify_certs=False, timeout=120 ) #See if the hash exists try: doc = Document.get(id=layer_id, using=es, index=".datashader_layers") except NotFoundError: doc = None if not doc: #if not, create the hash in the db but only if it does not already exist try: doc = Document(_id=layer_id, creating_host=socket.gethostname(), creating_pid=os.getpid(), creating_timestamp=datetime.now(), generated_params=None, params=params) doc.save(using=es, index=".datashader_layers", op_type="create", skip_empty=False) current_app.logger.debug("Created Hash document") except ConflictError: current_app.logger.debug("Hash document now exists, continuing") #re-fetch to get sequence number correct doc = Document.get(id=layer_id, using=es, index=".datashader_layers") #Check for generator timeouts: if doc.to_dict().get("generated_params", {}).get("generation_start_time") and \ datetime.now() > datetime.strptime(doc.to_dict().get("generated_params", {}).get("generation_start_time"),"%Y-%m-%dT%H:%M:%S.%f")+timedelta(seconds=5*60): #Something caused the worker generating the params to time out so clear that entry try: doc.update(using=es, index=".datashader_layers", retry_on_conflict=0, refresh=True, \ generated_params=None) except ConflictError: current_app.logger.debug("Abandoned resetting parameters due to conflict, other process has completed.") #Loop-check if the generated params are in missing/in-process/complete timeout_at = datetime.now()+timedelta(seconds=45) while doc.to_dict().get("generated_params", {}).get("complete", False) == False: if datetime.now() > timeout_at: current_app.logger.info("Hit timeout waiting for generated parameters to be placed into database") break #If missing, mark them as in generation if not doc.to_dict().get("generated_params", None): #Mark them as being generated but do so with concurrenty control #https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html current_app.logger.info("Discovering generated parameters") generated_params = dict() generated_params["complete"] = False generated_params["generation_start_time"] = datetime.now() generated_params["generating_host"] = socket.gethostname() generated_params["generating_pid"] = os.getpid() try: doc.update(using=es, index=".datashader_layers", retry_on_conflict=0, refresh=True, \ generated_params=generated_params) except ConflictError: current_app.logger.debug("Abandoned generating parameters due to conflict, will wait for other process to complete.") break #Generate and save off parameters current_app.logger.warn("Discovering generated params") generated_params.update(generate_global_params(params, idx)) generated_params["generation_complete_time"] = datetime.now() generated_params["complete"] = True #Store off generated params doc.update(using=es, index=".datashader_layers", retry_on_conflict=0, refresh=True, \ generated_params=generated_params) break else: time.sleep(1) doc = Document.get(id=layer_id, using=es, index=".datashader_layers") #We now have params so use them params["generated_params"] = doc.to_dict().get("generated_params") return params
def get_tms(idx, x: int, y: int, z: int): tile_height_px = 256 tile_width_px = 256 # Validate request is from proxy if proxy mode is enabled tms_key = current_app.config.get("TMS_KEY") tms_proxy_key = request.headers.get("TMS_PROXY_KEY") if tms_key is not None: if tms_key != tms_proxy_key: current_app.logger.warning( "TMS must be accessed via reverse proxy: keys %s != %s", tms_key, tms_proxy_key, ) return Response("TMS must be accessed via reverse proxy", status=403) # TMS tile coordinates x = int(x) y = int(y) z = int(z) es = Elasticsearch( current_app.config.get("ELASTIC").split(","), verify_certs=False, timeout=120, ) # Get hash and parameters try: parameter_hash, params = extract_parameters(request) except Exception as e: current_app.logger.exception("Error while extracting parameters") params = {"user": request.headers.get("es-security-runas-user", None)} #Create an error entry in .datashader_tiles doc = Document( idx=idx, x=x, y=y, z=z, url=request.url, host=socket.gethostname(), pid=os.getpid(), timestamp=datetime.now(), params=params, error=repr(e) ) doc.save(using=es, index=".datashader_tiles") #Generate and return an error tile return error_tile_response(e, tile_height_px, tile_width_px) cache_dir = Path(current_app.config["CACHE_DIRECTORY"]) tile_name = f"{idx}/{parameter_hash}/{z}/{x}/{y}.png" tile_id = "%s_%s_%s_%s_%s" % (idx, parameter_hash, z, x, y) force = request.args.get("force") # Check if the cached image already exists c = get_cache(cache_dir, tile_name) if c is not None and force is None: current_app.logger.info("Hit cache (%s), returning", parameter_hash) # Return Cached Value img = c try: body = {"script" : {"source": "ctx._source.cache_hits++"}} es.update(".datashader_tiles", tile_id, body=body, retry_on_conflict=5) except NotFoundError: current_app.logger.warn("Unable to find cached tile entry in .datashader_tiles") else: # Generate a tile if force is not None: current_app.logger.info( "Forced cache flush, generating a new tile %s/%s/%s", z, x, y ) else: current_app.logger.info( "No cache (%s), generating a new tile %s/%s/%s", parameter_hash, z, x, y ) check_cache_dir(cache_dir, idx) headers = get_es_headers(request_headers=request.headers, user=params["user"]) current_app.logger.debug("Loaded input headers %s", request.headers) current_app.logger.debug("Loaded elasticsearch headers %s", headers) # Get or generate extended parameters params = merge_generated_parameters(params, idx, parameter_hash) # Separate call for ellipse t1 = datetime.now() try: if params["render_mode"] in ["ellipses", "tracks"]: img, metrics = generate_nonaggregated_tile(idx, x, y, z, params) else: img, metrics = generate_tile(idx, x, y, z, params) except Exception as e: logging.exception("Exception Generating Tile for request %s", request) #Create an error entry in .datashader_tiles doc = Document( hash=parameter_hash, idx=idx, x=x, y=y, z=z, url=request.url, host=socket.gethostname(), pid=os.getpid(), timestamp=datetime.now(), params=params, error=repr(e) ) doc.save(using=es, index=".datashader_tiles") # generate an error tile/don't cache cache it return error_tile_response(e, tile_height_px, tile_width_px) et = (datetime.now() - t1).total_seconds() # Make entry into .datashader_tiles doc = Document( _id=tile_id, hash=parameter_hash, idx=idx, x=x, y=y, z=z, url=request.url, host=socket.gethostname(), pid=os.getpid(), render_time=et, timestamp=datetime.now(), params=params, metrics=metrics, cache_hits=0, ) doc.save(using=es, index=".datashader_tiles") # Store image as well set_cache(cache_dir, tile_name, img) resp = Response(img, status=200) resp.headers["Content-Type"] = "image/png" resp.headers["Access-Control-Allow-Origin"] = "*" resp.headers["Datashader-Parameter-Hash"] = parameter_hash resp.headers["Datashader-RunAs-User"] = params.get("user", "") resp.cache_control.max_age = 60 return resp