def deduplicate(fname, q=None):
    # TODO do this using the scroll API http://www.elasticsearch.org/guide/reference/api/search/search-type/
    everything = Journal.query(q=q, size=10000000)
    processed = 0

    try:
        if everything['hits']['total'] <= 0:
            raise Exception('Nothing to deduplicate')
    except KeyError:
        print 'ES returned a strange result, probably an error. Is ' \
              'your index missing? Here is the original response:' \
              '\n ', everything

    for record in everything['hits']['hits']:
        instance = Journal(**record['_source'])

        if fname not in instance:
        # The field we're deduplicating is not in this record. Move on.
            continue

        if not isinstance(instance[fname], list):
            strange.write(instance['id'] + ' - ' + fname + ' not a '
                                           'list' + '\n')
            continue

        before = len(instance[fname])
        instance[fname] = list(set(instance[fname]))  # deduplicate
        after = len(instance[fname])

        if before != after:
            instance.save()
            processed += 1

    Journal.refresh()
    return processed
def rename_field(src, dst, q=None):
    # TODO do this using the scroll API http://www.elasticsearch.org/guide/reference/api/search/search-type/
    everything = Journal.query(q=q, size=10000000)

    try:
        if everything['hits']['total'] <= 0:
            raise Exception('Nothing to rename')
    except KeyError:
        print 'ES returned a strange result, probably an error. Is ' \
              'your index missing? Here is the original response:' \
              '\n ', everything

    for record in everything['hits']['hits']:
        instance = Journal(**record['_source'])

        if src not in instance:
        # The field we're renaming is not in this record. Move on.
            continue
        elif src in instance and dst in instance:
        # If both source and destination are present, merge them as
        # a list.
            new_data, instance[dst] = Journal.make_merge_list(
                instance[dst], instance[src]
            )
        else:
            instance[dst] = instance[src]

        del instance[src]  # delete original field
        instance.save()
    Journal.refresh()
    return everything['hits']['total']
def delete_field(name, q=None):
    # TODO do this using the scroll API http://www.elasticsearch.org/guide/reference/api/search/search-type/
    everything = Journal.query(q=q, size=10000000)

    try:
        if everything['hits']['total'] <= 0:
            raise Exception('Nothing to delete')
    except KeyError:
        print 'ES returned a strange result, probably an error. Is ' \
              'your index missing? Here is the original response:' \
              '\n ', everything

    for record in everything['hits']['hits']:
        instance = Journal(**record['_source'])

        if name not in instance:
        # The field we're deleting is not in this record. Move on.
            continue

        del instance[name]
        instance.save()
    Journal.refresh()
    return everything['hits']['total']