def start_server(): signal.signal(signal.SIGINT, signal_exit_handler) backed_off_search_process = None if Config.get('CTI')['knowledge_base']['on_miss_backoff']: backed_off_search_process = Process(target=backed_off_search) backed_off_search_process.start() config = Config.get('server') server.run(host=config['host'], port=config['port'], threaded=True) if Config.get('CTI')['knowledge_base']['on_miss_backoff']: backed_off_search_process.join()
def __get_contexts(self, titles): contexts = [] misses = [] for title, context in self.__contexts_cache.get(titles).items(): if context is not None: contexts.append(context) else: misses.append(title) if self.__on_miss_backoff is True: for title in misses: SQLiteDict.storage( Config.get('CTI')['backed_off_search'] ['storage_name'])[title] = title else: new_contexts = self.__search_engine.contexts(misses) contexts += list(new_contexts.values()) self.__contexts_cache.set_many(new_contexts) return contexts
def __init__(self, cache_name): self.__cache_name = cache_name if ES.connection('es') is None: Logger.log(__name__, 'could not connect to elasticsearch', type='error') return ESService.create_index(self.__cache_name, Config.get('elasticsearch')['indices_settings'][self.__cache_name])
def clear_backedoff_storage(): storage_path = Data.get( Config.get('CTI')['backed_off_search']['storage_name']) if os.path.isfile(storage_path): os.remove(storage_path) Logger.log(__name__, 'the backedoff search storage is now empty') else: Logger.log(__name__, 'the backedoff search storage is already empty')
def handle_cti_request(terms, context): if len(terms) == 0: return {} scored = {} if (Config.get('CTI')['max_processes_per_job'] == 1) or (len(terms) == 1): for term in terms: scored[term] = round(cti.term_informativeness(term, context), 2) else: with Pool(Config.get('CTI')['max_processes_per_job']) as p: for term, score in p.map( partial(cti_job, cti=cti, context=context), terms): scored[term] = score return scored
def __init__(self): self.__config = Config.get('CTI')['knowledge_base'] self.__on_miss_backoff = self.__config['on_miss_backoff'] self.__search_engine = SearchEngine( top_n_contexts_per_query=self.__config['max_contexts_per_query']) self.__query_cache = QueryCache.create(self.__config['query_cache']) self.__contexts_cache = ContextsCache.create( self.__config['contexts_cache'])
def bootstrap_knowledge_base(dump_directory): Logger.log(__name__, 'bootstrapping knowledge base...') contexts_cache = ContextsCache.create( Config.get('CTI')['knowledge_base']['contexts_cache']) for article in WikipediaUtils.get_articles(dump_directory, attributes=['title', 'context']): contexts_cache.set(article['title'], article['context']) Logger.log(__name__, 'bootstrapping got context for: ' + article['title'])
def backed_off_search(): config = Config.get('CTI')['backed_off_search'] contexts_cache = ContextsCache.create( Config.get('CTI')['knowledge_base']['contexts_cache']) search_engine = SearchEngine() Logger.log(__name__, 'backed off search process started') Logger.log( __name__, 'backed off search storage has ' + str(len(SQLiteDict.storage(config['storage_name']))) + ' items') c = 0 while True: try: title = SQLiteDict.storage(config['storage_name']).popitem()[0] except KeyError: sleep(config['empty_storage_wait_seconds']) continue contexts_cache.set(title, search_engine.context(title)) Logger.log(__name__, 'backed off search got context for: ' + title) c += 1 if c > 30: c = 0 Logger.log( __name__, 'backed off search storage has ' + str(len(SQLiteDict.storage(config['storage_name']))) + ' items') sleep(config['seconds_between_searches'])
def __setup(self): self.__conns = {} self.__configs = Config.get('elasticsearch')['connections']
def clear_knowledge_base(): ESService.delete_index( Config.get('CTI')['knowledge_base']['query_cache']['name']) ESService.delete_index( Config.get('CTI')['knowledge_base']['contexts_cache']['name']) Logger.log(__name__, 'the knowledge base is now empty')