from topic_tracking.util.mongo import MongoConnectionManager if __name__ == '__main__': # MongoDB host = 'localhost' port = 27017 mcm = MongoConnectionManager(host, port, MongoCodec()) database = 'processed' resource_collection = mcm.get_collection(database, 'resources', Resource) # ElasticSearch es = ES('localhost:9200', timeout=60) resource_index = 'topic_tracking_resources' story_index = 'topic_tracking_stories' # utilities index_helper = IndexHelper(es, resource_index, story_index) # get a resource to mongo resource = resource_collection.find_one_model() # analyze the resource term_string = index_helper._build_payload_string(resource.terms) pprint(term_string) params = {} params['text'] = term_string response = es._send_request('GET', resource_index + '/_analyze', None, params) pprint(response)
config_file = sys.argv[1] config = yaml.load(file(config_file, 'r')) # logging logging.config.dictConfig(config['logging']) logger = logging.getLogger() # MongoDB mcm = mongo_from_config(config['mongo']) database = config['mongo']['databases']['processed'] resource_collection = mcm.get_collection(database, 'resources', Resource) story_collection = mcm.get_collection(database, 'stories', Story) # elasticsearch es = elasticsearch_from_config(config['elasticsearch']) main_index = config['elasticsearch']['indexes']['main'] # open_index = config['elasticsearch']['indexes']['open'] # helpers index_helper = IndexHelper(es) for resource in resource_collection.find_models(): index_helper.index_resource(resource, main_index) logger.debug('Indexed resource %s.' % resource) for story in story_collection.find_models(): index_helper.index_story(story, main_index) logger.debug('Indexed story %s.' % story) logger.info('Re-indexing complete.')