def deduplicate(fname, q=None): # TODO do this using the scroll API http://www.elasticsearch.org/guide/reference/api/search/search-type/ everything = Journal.query(q=q, size=10000000) processed = 0 try: if everything['hits']['total'] <= 0: raise Exception('Nothing to deduplicate') except KeyError: print 'ES returned a strange result, probably an error. Is ' \ 'your index missing? Here is the original response:' \ '\n ', everything for record in everything['hits']['hits']: instance = Journal(**record['_source']) if fname not in instance: # The field we're deduplicating is not in this record. Move on. continue if not isinstance(instance[fname], list): strange.write(instance['id'] + ' - ' + fname + ' not a ' 'list' + '\n') continue before = len(instance[fname]) instance[fname] = list(set(instance[fname])) # deduplicate after = len(instance[fname]) if before != after: instance.save() processed += 1 Journal.refresh() return processed
def rename_field(src, dst, q=None): # TODO do this using the scroll API http://www.elasticsearch.org/guide/reference/api/search/search-type/ everything = Journal.query(q=q, size=10000000) try: if everything['hits']['total'] <= 0: raise Exception('Nothing to rename') except KeyError: print 'ES returned a strange result, probably an error. Is ' \ 'your index missing? Here is the original response:' \ '\n ', everything for record in everything['hits']['hits']: instance = Journal(**record['_source']) if src not in instance: # The field we're renaming is not in this record. Move on. continue elif src in instance and dst in instance: # If both source and destination are present, merge them as # a list. new_data, instance[dst] = Journal.make_merge_list( instance[dst], instance[src] ) else: instance[dst] = instance[src] del instance[src] # delete original field instance.save() Journal.refresh() return everything['hits']['total']
def delete_field(name, q=None): # TODO do this using the scroll API http://www.elasticsearch.org/guide/reference/api/search/search-type/ everything = Journal.query(q=q, size=10000000) try: if everything['hits']['total'] <= 0: raise Exception('Nothing to delete') except KeyError: print 'ES returned a strange result, probably an error. Is ' \ 'your index missing? Here is the original response:' \ '\n ', everything for record in everything['hits']['hits']: instance = Journal(**record['_source']) if name not in instance: # The field we're deleting is not in this record. Move on. continue del instance[name] instance.save() Journal.refresh() return everything['hits']['total']