Python index 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: diskover_connections.es_conn

메소드/함수: index

hotexamples.com에서의 예제들: 2

Python index - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 diskover_connections.es_conn.index에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: diskover_bot_module.py 프로젝트: og3niuz/diskover

def es_bulk_add(worker_name, dirlist, filelist, cliargs, totalcrawltime=None):
    starttime = time.time()

    docs = dirlist + filelist
    index_bulk_add(es, docs, config, cliargs)

    data = {"worker_name": worker_name, "dir_count": len(dirlist),
            "file_count": len(filelist), "bulk_time": round(time.time() - starttime, 6),
            "crawl_time": round(totalcrawltime, 6),
            "indexing_date": datetime.utcnow().isoformat()}
    es.index(index=cliargs['index'], doc_type='worker', body=data)

예제 #2

파일 보기

파일: diskover_bot_module.py 프로젝트: the-sarge/diskover

def es_bulk_add(worker_name, dirlist, filelist, cliargs, totalcrawltime=None):
    if cliargs['chunkfiles']:
        updated_dirlist = []
        # check for existing directory docs in index and update crawl time only (dirchunk)
        for d in dirlist:
            try:
                path = d[
                    'chunkpath']  # this key determins if its part of a chunked dir
                crawltime = d['crawl_time']
                f = os.path.basename(path)
                # parent path
                p = os.path.abspath(os.path.join(path, os.pardir))

                data = {
                    "size": 1,
                    "_source": ['crawl_time'],
                    "query": {
                        "query_string": {
                            "query":
                            "filename: \"" + f + "\" AND path_parent: \"" + p +
                            "\""
                        }
                    }
                }

                es.indices.refresh(index=cliargs['index'])
                res = es.search(index=cliargs['index'],
                                doc_type='directory',
                                body=data,
                                request_timeout=config['es_timeout'])

                if len(res['hits']['hits']) == 0:
                    continue

                docid = res['hits']['hits'][0]['_id']
                current_crawltime = res['hits']['hits'][0]['_source'][
                    'crawl_time']
                udpated_crawltime = current_crawltime + crawltime

                # update crawltime in index
                d = {
                    '_op_type': 'update',
                    '_index': cliargs['index'],
                    '_type': 'directory',
                    '_id': docid,
                    'doc': {
                        'crawl_time': udpated_crawltime
                    }
                }
            except KeyError:
                pass  # not part of a chunked dir

            updated_dirlist.append(d)

        dirlist = updated_dirlist

    starttime = time.time()

    docs = dirlist + filelist
    index_bulk_add(es, docs, config, cliargs)

    if not cliargs['noworkerdocs']:
        data = {
            "worker_name": worker_name,
            "dir_count": len(dirlist),
            "file_count": len(filelist),
            "bulk_time": round(time.time() - starttime, 6),
            "crawl_time": round(totalcrawltime, 6),
            "indexing_date": datetime.utcnow().isoformat()
        }
        es.index(index=cliargs['index'], doc_type='worker', body=data)