Beispiel #1
0
 def put_mock_log(self, index_name):
     with open('../data/logs/njnert_logs/njnet_access_mix.log') as f:
         for no, line in enumerate(f):
             d = Document(message=line)
             d.save(using=self.client, index=index_name)
             if no % 1000 == 0:
                 print('put {} rows'.format(no))
Beispiel #2
0
def pg2es():

    Doc = Document()

    for row in places.objects.all():
        geojson = serialize('geojson', [row], geometry_field='geom')

        Doc.save(index = "places2")

        print(geojson)    
def worker_abort(worker):
    current_app = worker.app.wsgi()

    es = Elasticsearch(
        current_app.config.get("ELASTIC").split(","),
        verify_certs=False,
        timeout=120,
    )
    doc = Document(url=worker.current_request.uri,
                   host=socket.gethostname(),
                   pid=os.getpid(),
                   timestamp=datetime.now(),
                   timeout=True,
                   error="Hit gunicorn timeout prior to request completion")
    doc.save(using=es, index=".datashader_tiles")
def upgrade_downgrade(action):
    """Upgrade or downgrade index holdings.

    Correct items_count and public_items_count for holdings of type serial.
    :param str action: upgrade or downgrade.
    """
    index = HoldingsSearch.Meta.index
    query = HoldingsSearch()\
        .filter('term', holdings_type='serial') \
        .source(['pid'])

    ids = [(h.meta.id, h.pid) for h in query.scan()]
    count = 0

    LOGGER.info(f'Indexing {len(ids)} records ....')
    for (_id, pid) in ids:
        document = Document.get(_id, index=index, using=current_search_client)
        items_count, public_items_count = get_counts(pid, action)

        document.update(items_count=items_count,
                        public_items_count=public_items_count,
                        index=index,
                        using=current_search_client,
                        refresh=True)
        count += 1
        LOGGER.info(f'{count} records indexed.')
Beispiel #5
0
    def spotlight_courses(self):
        """This method queries elasticsearch for courses with ids matching the
            ids of stored CourseSpotlight objects that are active"""
        course_spotlights = CourseSpotlight.objects.filter(active=True)
        id_list = []
        result = []

        for spotlight in course_spotlights:
            id_list.append(spotlight.course_id)

        docs = Document.mget(
            id_list,
            using='default',
            index=self.index,
            raise_on_error=True,
            missing='none',
        )

        for doc in docs:
            curr_dict = doc.to_dict(include_meta=True, skip_empty=True)
            obj_data = curr_dict["_source"]
            meta = {}

            meta["id"] = curr_dict["_id"]
            meta["index"] = curr_dict["_index"]
            obj_data["meta"] = meta
            result.append(obj_data)

        return result
Beispiel #6
0
def main():
    import time
    start = time.time()
    index_name = 'test_index'
    es_index = Index(index_name)
    if not es_index.exists():
        es_index.put_alias(using='default')
        # Number of data node
        es_index.settings(number_of_shards=1)
        es_index.save()
    else:
        es_index.put_alias(using='default', name="sss3")
    doc = Document(first_name='cheng',
                   last_name="unknowname",
                   hometown='China')
    doc.save(using='default', index=index_name)
    print(time.time() - start)
Beispiel #7
0
    def save(self, es_connection: Elasticsearch, document: Document):
        """
        Saves the client_document in Elasticsearch. This should be override by the Document Dao

        :param es_connection:
        :param document:
        :return: doc_status, doc_meta
        """
        return document.save(using=es_connection)
Beispiel #8
0
async def update(doc: elasticsearch_dsl.Document, doc_id: uuid.UUID):
    """Update a document in the index

    Parameters:
        doc: The document updates
        doc_id: The id of the document to update
    """
    doc_type = type(doc)
    old_doc = await fc.run_in_threadpool(doc_type.get, id=doc_id)
    await fc.run_in_threadpool(old_doc.update, **doc.to_dict())
Beispiel #9
0
    def search(*, document: Document, sort, limit, offset, **kwargs):
        s = document.search()

        # TODO: Define search criteria
        if sort:
            s = SearchClass._sort_helper(s, sort, kwargs)

        s = SearchClass._pagination_helper(s, limit, offset)

        return s
Beispiel #10
0
def insert_or_ignore(job: elasticsearch_dsl.Document, alias='default',
                     index=JOB_INDEX):
    """Inserts the training job into the elasticsearch index
    if no job with the same name and creation timestamp exists.
    """
    if index == JOB_INDEX and 'slack' not in job.raw_log:
        print('job is incomplete, returning')
        return

    matches = index.search() \
        .query('match', job_name=job.job_name) \
        .query('match', created_at=job.created_at) \
        .count()

    if matches == 0:
        job.save(using=alias)
    else:
        print('job {} created at {} exists'.format(
            job.job_name, job.created_at))
Beispiel #11
0
    def delete_by_doc_id(self,
                         es_connection: Elasticsearch,
                         document: Document = None,
                         *args):
        """
        Delete the client_document in Elasticsearch. This should be override by the Document Dao

        :param es_connection:
        :param document:
        :return: doc_status, doc_meta
        """
        return document.delete(using=es_connection)
Beispiel #12
0
    def update(cls, _id, date, data):
        """Update all data for a record.

        :param str _id: Elasticsearch document ID.
        :param str date: Log date, useful for getting the right index.
        :param dict data: New record data.
        """
        index = cls.get_index({'date': date})

        document = Document.get(_id, index=index, using=current_search_client)

        # Assign each properties to the document
        for key, item in data.items():
            document[key] = item

        result = document.save(
            index=index,
            using=current_search_client,
            refresh=True,
        )

        if result != 'updated':
            raise Exception('Operation log cannot be updated.')
Beispiel #13
0
    def get(self, request):
        id = request.GET.get("id", "")
        doc = Document.get(id=id, index='policynew', using=client).to_dict()
        link = doc['link']
        title = doc['title']

        policy_object_list = []
        # 从数据库中取出推荐的政策
        policy_recommend = PolicyRecommend.objects.filter(title=title)
        if len(policy_recommend) > 0:
            policy_object = policy_recommend[0]
            policy_str = policy_object.recommend
            policy_items = policy_str.split(",")[:3]
            policy_names = [item.split(' ')[0] for item in policy_items]
            policy_scores = [
                item.split(' ')[1] for item in policy_items
                if len(item.split(' ')[1]) == 7
            ]
            print(policy_scores)
            policy_recommend_urls = []
            for policy_name in policy_names:
                resp = Search(using=client,
                              index='policynew').query("match",
                                                       title=policy_name)
                response = resp.execute()
                link_str = response['hits']['hits'][0]["_id"]
                if (policy_name == response[0].title):
                    policy_recommend_url = "http://127.0.0.1:8000/detail/?id=" + link_str
                else:
                    policy_recommend_url = ""
                policy_recommend_urls.append(policy_recommend_url)
            policy_object_list = list(
                zip(policy_names, policy_scores, policy_recommend_urls))
            # relation_policies_dict[relation_policy] = relation_policy_url

        # 从数据库中取出相关的政策
        relation_policies_dict = {}
        relations = RelationPolicies.objects.filter(title=title)
        if len(relations) == 1:
            # print(relations[0].relation_policies)
            relation_policies = set(relations[0].relation_policies.split(','))
            # print(relation_policies)
            for relation_policy in relation_policies:
                if (relation_policy != ''):
                    resp = Search(using=client, index='policynew').query(
                        "match", title=relation_policy)
                    response = resp.execute()
                    link_str = response['hits']['hits'][0]["_id"]
                    if (relation_policy == response[0].title):
                        relation_policy_url = "http://127.0.0.1:8000/detail/?id=" + link_str
                    else:
                        relation_policy_url = ""
                    relation_policies_dict[
                        relation_policy] = relation_policy_url

        # 生成结果返回对象
        result_dict = {}
        result_dict['link'] = link
        if policy_object_list != []:
            print(policy_object_list)
            result_dict['policy_object_list'] = policy_object_list
        if relation_policies_dict != {}:
            result_dict['relation_policies_dict'] = relation_policies_dict
        return render(
            request, 'detail.html', {
                'link': link,
                'relation_policies_dict': relation_policies_dict,
                'policy_object_list': policy_object_list
            })
Beispiel #14
0
def merge_generated_parameters(params, idx, hash):
    """

    :param params:
    :param paramsfile:
    :param idx:
    :return:
    """

    layer_id = "%s_%s" % (hash, socket.gethostname())
    es = Elasticsearch(
        current_app.config.get("ELASTIC").split(","),
        verify_certs=False,
        timeout=120
    )

    #See if the hash exists
    try:
        doc = Document.get(id=layer_id, using=es, index=".datashader_layers")
    except NotFoundError:
        doc = None

    if not doc:
        #if not, create the hash in the db but only if it does not already exist
        try:
            doc = Document(_id=layer_id,
                            creating_host=socket.gethostname(),
                            creating_pid=os.getpid(),
                            creating_timestamp=datetime.now(),
                            generated_params=None,
                            params=params)
            doc.save(using=es, index=".datashader_layers", op_type="create", skip_empty=False)
            current_app.logger.debug("Created Hash document")
        except ConflictError:
            current_app.logger.debug("Hash document now exists, continuing")

        #re-fetch to get sequence number correct
        doc = Document.get(id=layer_id, using=es, index=".datashader_layers")

    #Check for generator timeouts:
    if doc.to_dict().get("generated_params", {}).get("generation_start_time") and \
                datetime.now() > datetime.strptime(doc.to_dict().get("generated_params", {}).get("generation_start_time"),"%Y-%m-%dT%H:%M:%S.%f")+timedelta(seconds=5*60):
        #Something caused the worker generating the params to time out so clear that entry
        try:
            doc.update(using=es, index=".datashader_layers", retry_on_conflict=0, refresh=True, \
                generated_params=None)
        except ConflictError:
            current_app.logger.debug("Abandoned resetting parameters due to conflict, other process has completed.")

    #Loop-check if the generated params are in missing/in-process/complete
    timeout_at = datetime.now()+timedelta(seconds=45)
    while doc.to_dict().get("generated_params", {}).get("complete", False) == False:
        if datetime.now() > timeout_at:
            current_app.logger.info("Hit timeout waiting for generated parameters to be placed into database")
            break
        #If missing, mark them as in generation
        if not doc.to_dict().get("generated_params", None):
            #Mark them as being generated but do so with concurrenty control
            #https://www.elastic.co/guide/en/elasticsearch/reference/current/optimistic-concurrency-control.html
            current_app.logger.info("Discovering generated parameters")
            generated_params = dict()
            generated_params["complete"] = False
            generated_params["generation_start_time"] = datetime.now()
            generated_params["generating_host"] = socket.gethostname()
            generated_params["generating_pid"] = os.getpid()
            try:
                doc.update(using=es, index=".datashader_layers", retry_on_conflict=0, refresh=True, \
                    generated_params=generated_params)
            except ConflictError:
                current_app.logger.debug("Abandoned generating parameters due to conflict, will wait for other process to complete.")
                break
            #Generate and save off parameters
            current_app.logger.warn("Discovering generated params")
            generated_params.update(generate_global_params(params, idx))
            generated_params["generation_complete_time"] = datetime.now()
            generated_params["complete"] = True
            #Store off generated params
            doc.update(using=es, index=".datashader_layers", retry_on_conflict=0, refresh=True, \
                    generated_params=generated_params)
            break
        else:
            time.sleep(1)
            doc = Document.get(id=layer_id, using=es, index=".datashader_layers")

    #We now have params so use them
    params["generated_params"] = doc.to_dict().get("generated_params")
    return params
 def handle(self, **options):
     autodiscover_modules('search_indices')
     for search_class in Document.__subclasses__():
         search_class.init()
         _LOG.info('search_class:[%s] initialized', search_class)
 def delete(self, **kwargs):
     return Document.delete(self, **kwargs)
 def update(self, **fields):
     return Document.update(self, **fields)
 def save(self, **kwargs):
     return Document.save(self, **kwargs)
Beispiel #19
0
 def search_by_id(self, id):
     return Document.get(id, self.__es, self.__index)
def get_tms(idx, x: int, y: int, z: int):
    tile_height_px = 256
    tile_width_px = 256

    # Validate request is from proxy if proxy mode is enabled
    tms_key = current_app.config.get("TMS_KEY")
    tms_proxy_key = request.headers.get("TMS_PROXY_KEY")
    if tms_key is not None:
        if tms_key != tms_proxy_key:
            current_app.logger.warning(
                "TMS must be accessed via reverse proxy: keys %s != %s",
                tms_key,
                tms_proxy_key,
            )
            return Response("TMS must be accessed via reverse proxy", status=403)

    # TMS tile coordinates
    x = int(x)
    y = int(y)
    z = int(z)

    es = Elasticsearch(
        current_app.config.get("ELASTIC").split(","),
        verify_certs=False,
        timeout=120,
    )

    # Get hash and parameters
    try:
        parameter_hash, params = extract_parameters(request)
    except Exception as e:
        current_app.logger.exception("Error while extracting parameters")
        params = {"user": request.headers.get("es-security-runas-user", None)}
        #Create an error entry in .datashader_tiles
        doc = Document(
            idx=idx,
            x=x,
            y=y,
            z=z,
            url=request.url,
            host=socket.gethostname(),
            pid=os.getpid(),
            timestamp=datetime.now(),
            params=params,
            error=repr(e)
        )
        doc.save(using=es, index=".datashader_tiles")
        #Generate and return an error tile
        return error_tile_response(e, tile_height_px, tile_width_px)

    cache_dir = Path(current_app.config["CACHE_DIRECTORY"])
    tile_name = f"{idx}/{parameter_hash}/{z}/{x}/{y}.png"
    tile_id = "%s_%s_%s_%s_%s" % (idx, parameter_hash, z, x, y)
    force = request.args.get("force")

    # Check if the cached image already exists
    c = get_cache(cache_dir, tile_name)
    if c is not None and force is None:
        current_app.logger.info("Hit cache (%s), returning", parameter_hash)
        # Return Cached Value
        img = c
        try:
            body = {"script" : {"source": "ctx._source.cache_hits++"}}
            es.update(".datashader_tiles", tile_id, body=body, retry_on_conflict=5)
        except NotFoundError:
            current_app.logger.warn("Unable to find cached tile entry in .datashader_tiles")
    else:
        # Generate a tile
        if force is not None:
            current_app.logger.info(
                "Forced cache flush, generating a new tile %s/%s/%s", z, x, y
            )
        else:
            current_app.logger.info(
                "No cache (%s), generating a new tile %s/%s/%s", parameter_hash, z, x, y
            )

        check_cache_dir(cache_dir, idx)

        headers = get_es_headers(request_headers=request.headers, user=params["user"])
        current_app.logger.debug("Loaded input headers %s", request.headers)
        current_app.logger.debug("Loaded elasticsearch headers %s", headers)

        # Get or generate extended parameters
        params = merge_generated_parameters(params, idx, parameter_hash)

        # Separate call for ellipse
        t1 = datetime.now()
        try:
            if params["render_mode"] in ["ellipses", "tracks"]:
                img, metrics = generate_nonaggregated_tile(idx, x, y, z, params)
            else:
                img, metrics = generate_tile(idx, x, y, z, params)
        except Exception as e:
            logging.exception("Exception Generating Tile for request %s", request)
            #Create an error entry in .datashader_tiles
            doc = Document(
                hash=parameter_hash,
                idx=idx,
                x=x,
                y=y,
                z=z,
                url=request.url,
                host=socket.gethostname(),
                pid=os.getpid(),
                timestamp=datetime.now(),
                params=params,
                error=repr(e)
            )
            doc.save(using=es, index=".datashader_tiles")
            # generate an error tile/don't cache cache it
            return error_tile_response(e, tile_height_px, tile_width_px)
        et = (datetime.now() - t1).total_seconds()
        # Make entry into .datashader_tiles
        doc = Document(
            _id=tile_id,
            hash=parameter_hash,
            idx=idx,
            x=x,
            y=y,
            z=z,
            url=request.url,
            host=socket.gethostname(),
            pid=os.getpid(),
            render_time=et,
            timestamp=datetime.now(),
            params=params,
            metrics=metrics,
            cache_hits=0,
        )
        doc.save(using=es, index=".datashader_tiles")

        # Store image as well
        set_cache(cache_dir, tile_name, img)

    resp = Response(img, status=200)
    resp.headers["Content-Type"] = "image/png"
    resp.headers["Access-Control-Allow-Origin"] = "*"
    resp.headers["Datashader-Parameter-Hash"] = parameter_hash
    resp.headers["Datashader-RunAs-User"] = params.get("user", "")
    resp.cache_control.max_age = 60
    return resp