コード例 #1
0
def pluck(pluck_list, data):
  ''' A helper function for extracting a specific subset of keys from a
      dictionary.
  '''
  # TODO(hansnelsen): If there is collision on a document key names, a better
  #                   way of naming needs to be found. It just so happens that
  #                   there is no collision on a key name for any particular
  #                   table.
  result = {}
  data = common.ObjectDict(data)
  for key in pluck_list:
    new_key = key.split('.')[-1]
    result[new_key] = data.get_nested(key)
  return result
コード例 #2
0
 def map(self, key, value, output):
     es_client = elasticsearch.Elasticsearch(config.es_host())
     ep = common.ObjectDict(value)
     schema_file = join(SCHEMA_DIR, ep.index_name + '_schema.json')
     endpoint_dir = join(self.output_dir, ep.endpoint[1:])
     target_dir = join(endpoint_dir, ep.partition)
     common.shell_cmd('mkdir -p %s', target_dir)
     index_util.dump_index(es_client,
                           ep.index_name,
                           ep.endpoint,
                           target_dir,
                           cleaner=omit_internal_keys,
                           query=ep.query,
                           chunks=ep.chunks)
     # Copy the current JSON schema to the zip location so that it is included
     # in the sync to s3
     common.shell_cmd('cp %s %s', schema_file, endpoint_dir)
コード例 #3
0
ファイル: pipeline.py プロジェクト: zheismysavior/openfda
 def map(self, key, value, output):
     es_client = elasticsearch.Elasticsearch(config.es_host(), timeout=120)
     ep = common.ObjectDict(value)
     schema_file = join(SCHEMA_DIR, ep.index_name + '_schema.json')
     endpoint_dir = join(self.output_dir, ep.endpoint[1:])
     target_dir = join(endpoint_dir, ep.partition)
     common.shell_cmd('mkdir -p %s', target_dir)
     index_util.dump_index(es_client,
                           ep.index_name,
                           ep.endpoint,
                           target_dir,
                           cleaner=omit_internal_keys,
                           query=ep.query,
                           chunks=ep.chunks)
     # Copy the current JSON schema to the zip location so that it is included
     # in the sync to s3. flock is required to avoid a race condition when copying the schema file.
     common.shell_cmd_quiet('flock --verbose %s cp %s %s', schema_file,
                            schema_file, endpoint_dir)