def put_mock_log(self, index_name): with open('../data/logs/njnert_logs/njnet_access_mix.log') as f: for no, line in enumerate(f): d = Document(message=line) d.save(using=self.client, index=index_name) if no % 1000 == 0: print('put {} rows'.format(no))
def pg2es(): Doc = Document() for row in places.objects.all(): geojson = serialize('geojson', [row], geometry_field='geom') Doc.save(index = "places2") print(geojson)
def worker_abort(worker): current_app = worker.app.wsgi() es = Elasticsearch( current_app.config.get("ELASTIC").split(","), verify_certs=False, timeout=120, ) doc = Document(url=worker.current_request.uri, host=socket.gethostname(), pid=os.getpid(), timestamp=datetime.now(), timeout=True, error="Hit gunicorn timeout prior to request completion") doc.save(using=es, index=".datashader_tiles")
def upgrade_downgrade(action): """Upgrade or downgrade index holdings. Correct items_count and public_items_count for holdings of type serial. :param str action: upgrade or downgrade. """ index = HoldingsSearch.Meta.index query = HoldingsSearch()\ .filter('term', holdings_type='serial') \ .source(['pid']) ids = [(h.meta.id, h.pid) for h in query.scan()] count = 0 LOGGER.info(f'Indexing {len(ids)} records ....') for (_id, pid) in ids: document = Document.get(_id, index=index, using=current_search_client) items_count, public_items_count = get_counts(pid, action) document.update(items_count=items_count, public_items_count=public_items_count, index=index, using=current_search_client, refresh=True) count += 1 LOGGER.info(f'{count} records indexed.')
def spotlight_courses(self): """This method queries elasticsearch for courses with ids matching the ids of stored CourseSpotlight objects that are active""" course_spotlights = CourseSpotlight.objects.filter(active=True) id_list = [] result = [] for spotlight in course_spotlights: id_list.append(spotlight.course_id) docs = Document.mget( id_list, using='default', index=self.index, raise_on_error=True, missing='none', ) for doc in docs: curr_dict = doc.to_dict(include_meta=True, skip_empty=True) obj_data = curr_dict["_source"] meta = {} meta["id"] = curr_dict["_id"] meta["index"] = curr_dict["_index"] obj_data["meta"] = meta result.append(obj_data) return result
def main(): import time start = time.time() index_name = 'test_index' es_index = Index(index_name) if not es_index.exists(): es_index.put_alias(using='default') # Number of data node es_index.settings(number_of_shards=1) es_index.save() else: es_index.put_alias(using='default', name="sss3") doc = Document(first_name='cheng', last_name="unknowname", hometown='China') doc.save(using='default', index=index_name) print(time.time() - start)
def save(self, es_connection: Elasticsearch, document: Document): """ Saves the client_document in Elasticsearch. This should be override by the Document Dao :param es_connection: :param document: :return: doc_status, doc_meta """ return document.save(using=es_connection)
async def update(doc: elasticsearch_dsl.Document, doc_id: uuid.UUID): """Update a document in the index Parameters: doc: The document updates doc_id: The id of the document to update """ doc_type = type(doc) old_doc = await fc.run_in_threadpool(doc_type.get, id=doc_id) await fc.run_in_threadpool(old_doc.update, **doc.to_dict())
def search(*, document: Document, sort, limit, offset, **kwargs): s = document.search() # TODO: Define search criteria if sort: s = SearchClass._sort_helper(s, sort, kwargs) s = SearchClass._pagination_helper(s, limit, offset) return s
def insert_or_ignore(job: elasticsearch_dsl.Document, alias='default', index=JOB_INDEX): """Inserts the training job into the elasticsearch index if no job with the same name and creation timestamp exists. """ if index == JOB_INDEX and 'slack' not in job.raw_log: print('job is incomplete, returning') return matches = index.search() \ .query('match', job_name=job.job_name) \ .query('match', created_at=job.created_at) \ .count() if matches == 0: job.save(using=alias) else: print('job {} created at {} exists'.format( job.job_name, job.created_at))
def delete_by_doc_id(self, es_connection: Elasticsearch, document: Document = None, *args): """ Delete the client_document in Elasticsearch. This should be override by the Document Dao :param es_connection: :param document: :return: doc_status, doc_meta """ return document.delete(using=es_connection)
def update(cls, _id, date, data): """Update all data for a record. :param str _id: Elasticsearch document ID. :param str date: Log date, useful for getting the right index. :param dict data: New record data. """ index = cls.get_index({'date': date}) document = Document.get(_id, index=index, using=current_search_client) # Assign each properties to the document for key, item in data.items(): document[key] = item result = document.save( index=index, using=current_search_client, refresh=True, ) if result != 'updated': raise Exception('Operation log cannot be updated.')
def get(self, request): id = request.GET.get("id", "") doc = Document.get(id=id, index='policynew', using=client).to_dict() link = doc['link'] title = doc['title'] policy_object_list = [] # 从数据库中取出推荐的政策 policy_recommend = PolicyRecommend.objects.filter(title=title) if len(policy_recommend) > 0: policy_object = policy_recommend[0] policy_str = policy_object.recommend policy_items = policy_str.split(",")[:3] policy_names = [item.split(' ')[0] for item in policy_items] policy_scores = [ item.split(' ')[1] for item in policy_items if len(item.split(' ')[1]) == 7 ] print(policy_scores) policy_recommend_urls = [] for policy_name in policy_names: resp = Search(using=client, index='policynew').query("match", title=policy_name) response = resp.execute() link_str = response['hits']['hits'][0]["_id"] if (policy_name == response[0].title): policy_recommend_url = "http://127.0.0.1:8000/detail/?id=" + link_str else: policy_recommend_url = "" policy_recommend_urls.append(policy_recommend_url) policy_object_list = list( zip(policy_names, policy_scores, policy_recommend_urls)) # relation_policies_dict[relation_policy] = relation_policy_url # 从数据库中取出相关的政策 relation_policies_dict = {} relations = RelationPolicies.objects.filter(title=title) if len(relations) == 1: # print(relations[0].relation_policies) relation_policies = set(relations[0].relation_policies.split(',')) # print(relation_policies) for relation_policy in relation_policies: if (relation_policy != ''): resp = Search(using=client, index='policynew').query( "match", title=relation_policy) response = resp.execute() link_str = response['hits']['hits'][0]["_id"] if (relation_policy == response[0].title): relation_policy_url = "http://127.0.0.1:8000/detail/?id=" + link_str else: relation_policy_url = "" relation_policies_dict[ relation_policy] = relation_policy_url # 生成结果返回对象 result_dict = {} result_dict['link'] = link if policy_object_list != []: print(policy_object_list) result_dict['policy_object_list'] = policy_object_list if relation_policies_dict != {}: result_dict['relation_policies_dict'] = relation_policies_dict return render( request, 'detail.html', { 'link': link, 'relation_policies_dict': relation_policies_dict, 'policy_object_list': policy_object_list })
def merge_generated_parameters(params, idx, hash): """ :param params: :param paramsfile: :param idx: :return: """ layer_id = "%s_%s" % (hash, socket.gethostname()) es = Elasticsearch( current_app.config.get("ELASTIC").split(","), verify_certs=False, timeout=120 ) #See if the hash exists try: doc = Document.get(id=layer_id, using=es, index=".datashader_layers") except NotFoundError: doc = None if not doc: #if not, create the hash in the db but only if it does not already exist try: doc = Document(_id=layer_id, creating_host=socket.gethostname(), creating_pid=os.getpid(), creating_timestamp=datetime.now(), generated_params=None, params=params) doc.save(using=es, index=".datashader_layers", op_type="create", skip_empty=False) current_app.logger.debug("Created Hash document") except ConflictError: current_app.logger.debug("Hash document now exists, continuing") #re-fetch to get sequence number correct doc = Document.get(id=layer_id, using=es, index=".datashader_layers") #Check for generator timeouts: if doc.to_dict().get("generated_params", {}).get("generation_start_time") and \ datetime.now() > datetime.strptime(doc.to_dict().get("generated_params", {}).get("generation_start_time"),"%Y-%m-%dT%H:%M:%S.%f")+timedelta(seconds=5*60): #Something caused the worker generating the params to time out so clear that entry try: doc.update(using=es, index=".datashader_layers", retry_on_conflict=0, refresh=True, \ generated_params=None) except ConflictError: current_app.logger.debug("Abandoned resetting parameters due to conflict, other process has completed.") #Loop-check if the generated params are in missing/in-process/complete timeout_at = datetime.now()+timedelta(seconds=45) while doc.to_dict().get("generated_params", {}).get("complete", False) == False: if datetime.now() > timeout_at: current_app.logger.info("Hit timeout waiting for generated parameters to be placed into database") break #If missing, mark them as in generation if not doc.to_dict().get("generated_params", None): #Mark them as being generated but do so with concurrenty control #https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html current_app.logger.info("Discovering generated parameters") generated_params = dict() generated_params["complete"] = False generated_params["generation_start_time"] = datetime.now() generated_params["generating_host"] = socket.gethostname() generated_params["generating_pid"] = os.getpid() try: doc.update(using=es, index=".datashader_layers", retry_on_conflict=0, refresh=True, \ generated_params=generated_params) except ConflictError: current_app.logger.debug("Abandoned generating parameters due to conflict, will wait for other process to complete.") break #Generate and save off parameters current_app.logger.warn("Discovering generated params") generated_params.update(generate_global_params(params, idx)) generated_params["generation_complete_time"] = datetime.now() generated_params["complete"] = True #Store off generated params doc.update(using=es, index=".datashader_layers", retry_on_conflict=0, refresh=True, \ generated_params=generated_params) break else: time.sleep(1) doc = Document.get(id=layer_id, using=es, index=".datashader_layers") #We now have params so use them params["generated_params"] = doc.to_dict().get("generated_params") return params
def handle(self, **options): autodiscover_modules('search_indices') for search_class in Document.__subclasses__(): search_class.init() _LOG.info('search_class:[%s] initialized', search_class)
def delete(self, **kwargs): return Document.delete(self, **kwargs)
def update(self, **fields): return Document.update(self, **fields)
def save(self, **kwargs): return Document.save(self, **kwargs)
def search_by_id(self, id): return Document.get(id, self.__es, self.__index)
def get_tms(idx, x: int, y: int, z: int): tile_height_px = 256 tile_width_px = 256 # Validate request is from proxy if proxy mode is enabled tms_key = current_app.config.get("TMS_KEY") tms_proxy_key = request.headers.get("TMS_PROXY_KEY") if tms_key is not None: if tms_key != tms_proxy_key: current_app.logger.warning( "TMS must be accessed via reverse proxy: keys %s != %s", tms_key, tms_proxy_key, ) return Response("TMS must be accessed via reverse proxy", status=403) # TMS tile coordinates x = int(x) y = int(y) z = int(z) es = Elasticsearch( current_app.config.get("ELASTIC").split(","), verify_certs=False, timeout=120, ) # Get hash and parameters try: parameter_hash, params = extract_parameters(request) except Exception as e: current_app.logger.exception("Error while extracting parameters") params = {"user": request.headers.get("es-security-runas-user", None)} #Create an error entry in .datashader_tiles doc = Document( idx=idx, x=x, y=y, z=z, url=request.url, host=socket.gethostname(), pid=os.getpid(), timestamp=datetime.now(), params=params, error=repr(e) ) doc.save(using=es, index=".datashader_tiles") #Generate and return an error tile return error_tile_response(e, tile_height_px, tile_width_px) cache_dir = Path(current_app.config["CACHE_DIRECTORY"]) tile_name = f"{idx}/{parameter_hash}/{z}/{x}/{y}.png" tile_id = "%s_%s_%s_%s_%s" % (idx, parameter_hash, z, x, y) force = request.args.get("force") # Check if the cached image already exists c = get_cache(cache_dir, tile_name) if c is not None and force is None: current_app.logger.info("Hit cache (%s), returning", parameter_hash) # Return Cached Value img = c try: body = {"script" : {"source": "ctx._source.cache_hits++"}} es.update(".datashader_tiles", tile_id, body=body, retry_on_conflict=5) except NotFoundError: current_app.logger.warn("Unable to find cached tile entry in .datashader_tiles") else: # Generate a tile if force is not None: current_app.logger.info( "Forced cache flush, generating a new tile %s/%s/%s", z, x, y ) else: current_app.logger.info( "No cache (%s), generating a new tile %s/%s/%s", parameter_hash, z, x, y ) check_cache_dir(cache_dir, idx) headers = get_es_headers(request_headers=request.headers, user=params["user"]) current_app.logger.debug("Loaded input headers %s", request.headers) current_app.logger.debug("Loaded elasticsearch headers %s", headers) # Get or generate extended parameters params = merge_generated_parameters(params, idx, parameter_hash) # Separate call for ellipse t1 = datetime.now() try: if params["render_mode"] in ["ellipses", "tracks"]: img, metrics = generate_nonaggregated_tile(idx, x, y, z, params) else: img, metrics = generate_tile(idx, x, y, z, params) except Exception as e: logging.exception("Exception Generating Tile for request %s", request) #Create an error entry in .datashader_tiles doc = Document( hash=parameter_hash, idx=idx, x=x, y=y, z=z, url=request.url, host=socket.gethostname(), pid=os.getpid(), timestamp=datetime.now(), params=params, error=repr(e) ) doc.save(using=es, index=".datashader_tiles") # generate an error tile/don't cache cache it return error_tile_response(e, tile_height_px, tile_width_px) et = (datetime.now() - t1).total_seconds() # Make entry into .datashader_tiles doc = Document( _id=tile_id, hash=parameter_hash, idx=idx, x=x, y=y, z=z, url=request.url, host=socket.gethostname(), pid=os.getpid(), render_time=et, timestamp=datetime.now(), params=params, metrics=metrics, cache_hits=0, ) doc.save(using=es, index=".datashader_tiles") # Store image as well set_cache(cache_dir, tile_name, img) resp = Response(img, status=200) resp.headers["Content-Type"] = "image/png" resp.headers["Access-Control-Allow-Origin"] = "*" resp.headers["Datashader-Parameter-Hash"] = parameter_hash resp.headers["Datashader-RunAs-User"] = params.get("user", "") resp.cache_control.max_age = 60 return resp