def pluck(pluck_list, data): ''' A helper function for extracting a specific subset of keys from a dictionary. ''' # TODO(hansnelsen): If there is collision on a document key names, a better # way of naming needs to be found. It just so happens that # there is no collision on a key name for any particular # table. result = {} data = common.ObjectDict(data) for key in pluck_list: new_key = key.split('.')[-1] result[new_key] = data.get_nested(key) return result
def map(self, key, value, output): es_client = elasticsearch.Elasticsearch(config.es_host()) ep = common.ObjectDict(value) schema_file = join(SCHEMA_DIR, ep.index_name + '_schema.json') endpoint_dir = join(self.output_dir, ep.endpoint[1:]) target_dir = join(endpoint_dir, ep.partition) common.shell_cmd('mkdir -p %s', target_dir) index_util.dump_index(es_client, ep.index_name, ep.endpoint, target_dir, cleaner=omit_internal_keys, query=ep.query, chunks=ep.chunks) # Copy the current JSON schema to the zip location so that it is included # in the sync to s3 common.shell_cmd('cp %s %s', schema_file, endpoint_dir)
def map(self, key, value, output): es_client = elasticsearch.Elasticsearch(config.es_host(), timeout=120) ep = common.ObjectDict(value) schema_file = join(SCHEMA_DIR, ep.index_name + '_schema.json') endpoint_dir = join(self.output_dir, ep.endpoint[1:]) target_dir = join(endpoint_dir, ep.partition) common.shell_cmd('mkdir -p %s', target_dir) index_util.dump_index(es_client, ep.index_name, ep.endpoint, target_dir, cleaner=omit_internal_keys, query=ep.query, chunks=ep.chunks) # Copy the current JSON schema to the zip location so that it is included # in the sync to s3. flock is required to avoid a race condition when copying the schema file. common.shell_cmd_quiet('flock --verbose %s cp %s %s', schema_file, schema_file, endpoint_dir)