Example #1
0
from datetime import datetime
from epoms.config import EPOMSConfig
from epoms.es import *

INDEX_NAME = 'epoms'
TIMEOUT = 300

config = EPOMSConfig()
es = ES().init()

# TODO:
# - Accept parameter and delete only index that specified.

es.indices.delete( index=INDEX_NAME, ignore=[400, 404] )

# doc = {
#     'author': 'kimchy',
#     'text': 'Elasticsearch: cool. bonsai cool.',
#     'timestamp': datetime.now(),
# }

# res = es.index(index="test-index", doc_type='tweet', id=1, body=doc)
# print(res['created'])

body = {
    "mappings": config.get('index_schema')
}

res = es.indices.create( index=INDEX_NAME, body=body )

print res
Example #2
0
from datetime import datetime
from os import listdir
from os.path import isfile, join
from concurrent import futures

from epoms.config import EPOMSConfig
from epoms.news_extraction import NewsExtraction
from epoms.es import *
from epoms.db import *


INDEX_NAME  = 'epoms'
TIMEOUT     = 300
MAX_WORKER  = 1

config   = EPOMSConfig()
es_nodes = config.get('elasticsearch')['nodes']
es = ES().init()

news = (News()
    .select()
    .where( News.indexed_time >> None )
    )

def xstr(s):
    if s is None:
        return ''
    return str(s.encode('utf-8'))

print 'Connecting %s' % ( es_nodes );
print 'Indexing %d news' % ( news.count() )