Python dump_index 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: openfda.index_util

메소드/함수: dump_index

hotexamples.com에서의 예제들: 4

Python dump_index - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 openfda.index_util.dump_index에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: pipeline.py 프로젝트: serayamaouche/openfda

    def run(self):
        schema_file = self.get_schemafile()
        assert os.path.exists(
            schema_file
        ), 'No schema file available for index %s' % self.index_name

        es_client = elasticsearch.Elasticsearch(config.es_host())

        endpoints = self.get_endpoints()
        # Get all of the endpoints served by this index
        # Create an `EndpointExport` object for each endpoint in order to export
        # each endpoint properly.
        #
        # Endpoint exports can be:
        #   date range based (quarterly output)
        #   filter based (index serves many endpoints)
        #   vanilla (endpoint is 1 to 1 with index and it is exported all at once)
        endpoint_batches = []
        for endpoint in endpoints:
            chunks = CUSTOM_CHUNKS.get(endpoint, DEFAULT_CHUNKS)
            if endpoint in RANGE_ENDPOINT_MAP:
                params = RANGE_ENDPOINT_MAP[endpoint]
                params['chunks'] = chunks
                endpoint_batches = _make_date_range_endpoint_batch(
                    endpoint, params)
            elif endpoint in FILTERED_ENPOINT_MAP:
                params = FILTERED_ENPOINT_MAP[endpoint]
                query = EndpointExport.build_term_filter(**params)
                endpoint_batches.append(
                    EndpointExport(endpoint, query=query, chunks=chunks))
            else:
                endpoint_batches.append(EndpointExport(endpoint,
                                                       chunks=chunks))

        # Dump each of the `EndpointExport` objects in the list
        for ep in endpoint_batches:
            # The output_dir will be the same for all outputs, once you factor out
            # the endpoint, so we can safely look at the first one only.
            output_dir = dirname(dirname(self.output()[0].path))
            endpoint_dir = join(output_dir, ep.endpoint[1:])
            index_util.dump_index(es_client,
                                  ep.index_name,
                                  ep.endpoint,
                                  join(endpoint_dir, ep.partition),
                                  cleaner=omit_internal_keys,
                                  query=ep.query,
                                  chunks=ep.chunks)
            common.shell_cmd('cp %s %s', schema_file, endpoint_dir)

예제 #2

파일 보기

 def map(self, key, value, output):
     es_client = elasticsearch.Elasticsearch(config.es_host())
     ep = common.ObjectDict(value)
     schema_file = join(SCHEMA_DIR, ep.index_name + '_schema.json')
     endpoint_dir = join(self.output_dir, ep.endpoint[1:])
     target_dir = join(endpoint_dir, ep.partition)
     common.shell_cmd('mkdir -p %s', target_dir)
     index_util.dump_index(es_client,
                           ep.index_name,
                           ep.endpoint,
                           target_dir,
                           cleaner=omit_internal_keys,
                           query=ep.query,
                           chunks=ep.chunks)
     # Copy the current JSON schema to the zip location so that it is included
     # in the sync to s3
     common.shell_cmd('cp %s %s', schema_file, endpoint_dir)

예제 #3

파일 보기

파일: pipeline.py 프로젝트: FDA/openfda

 def map(self, key, value, output):
   es_client = elasticsearch.Elasticsearch(config.es_host(), timeout=120)
   ep = common.ObjectDict(value)
   schema_file = join(SCHEMA_DIR, ep.index_name + '_schema.json')
   endpoint_dir = join(self.output_dir, ep.endpoint[1:])
   target_dir = join(endpoint_dir, ep.partition)
   common.shell_cmd('mkdir -p %s', target_dir)
   index_util.dump_index(es_client,
                         ep.index_name,
                         ep.endpoint,
                         target_dir,
                         cleaner=omit_internal_keys,
                         query=ep.query,
                         chunks=ep.chunks)
   # Copy the current JSON schema to the zip location so that it is included
   # in the sync to s3
   common.shell_cmd('cp %s %s', schema_file, endpoint_dir)

예제 #4

파일 보기

파일: pipeline.py 프로젝트: zheismysavior/openfda

 def map(self, key, value, output):
     es_client = elasticsearch.Elasticsearch(config.es_host(), timeout=120)
     ep = common.ObjectDict(value)
     schema_file = join(SCHEMA_DIR, ep.index_name + '_schema.json')
     endpoint_dir = join(self.output_dir, ep.endpoint[1:])
     target_dir = join(endpoint_dir, ep.partition)
     common.shell_cmd('mkdir -p %s', target_dir)
     index_util.dump_index(es_client,
                           ep.index_name,
                           ep.endpoint,
                           target_dir,
                           cleaner=omit_internal_keys,
                           query=ep.query,
                           chunks=ep.chunks)
     # Copy the current JSON schema to the zip location so that it is included
     # in the sync to s3. flock is required to avoid a race condition when copying the schema file.
     common.shell_cmd_quiet('flock --verbose %s cp %s %s', schema_file,
                            schema_file, endpoint_dir)