def index(fname, index_name, keys_to_tag):
    fptr = open(fname, 'rb')
    line_count = 0
    conn = ES(["localhost:9200"])
    if not conn.exists_index(index_name):
        conn.create_index(index_name)
    start = time.clock()
    numb_exceptions = 0

    for line in fptr:
        if ((line_count % 10000) == 0):
            end = time.clock()
            minutes = (end - start) / 60.0
            print 'File: %s Done with %d took %f min. ' %(fname, line_count, minutes)
            print 'number of exceptions ', numb_exceptions
        line_count += 1
        data = json.loads(line)
        if not data.get('tags'):
            continue
        post_id = int(data['post_id'])
        found_content = False
        for k in keys_to_tag:
            if data.get(k):
                found_content = True
        if not found_content:
            continue
        index_data = dict()
        for k in keys_to_tag:
            value = data.get(k)
            if (value and (k == 'content')):
                try:
                    stripped_value = utils.strip_tags(value)
                except Exception:
                    stripped_value = value
                index_data[k] = stripped_value
        if post_id and data:
            try:
                conn.index(index_data, index_name, "test-type", post_id)
            except Exception:
                numb_exceptions += 1
                continue

    print 'number of exceptions ', numb_exceptions
from __future__ import unicode_literals

from pyes import ES


if __name__ == "__main__":
    conn = ES(["localhost:9200"])
    indices = ("content_index", "title_index")
    for index in indices:
        if not conn.exists_index(index):
            conn.create_index(index)