def build_index(config, use_parallel=True, noconfirm=False): bdr = DataBuilder(backend='mongodb') bdr.load_build_config(config) target_collection = bdr.pick_target_collection() target_es_index = 'genedoc_' + bdr._build_config['name'] if target_collection: es_idxer = ESIndexer(mapping=bdr.get_mapping()) es_idxer.ES_INDEX_NAME = target_es_index es_idxer.step = 10000 es_idxer.use_parallel = use_parallel es_server = es_idxer.conn.servers[0].geturl() print("ES target: {}/{}/{}".format(es_server, es_idxer.ES_INDEX_NAME, es_idxer.ES_INDEX_TYPE)) if noconfirm or ask("Continue?") == 'Y': #es_idxer.s = 609000 #es_idxer.conn.indices.delete_index(es_idxer.ES_INDEX_NAME) es_idxer.create_index() es_idxer.delete_index_type(es_idxer.ES_INDEX_TYPE, noconfirm=noconfirm) es_idxer.build_index(target_collection, verbose=False) es_idxer.optimize() else: print("Aborted.") else: print("Error: target collection is not ready yet or failed to build.")
def build_index(config, use_parallel=True, noconfirm=False): bdr = DataBuilder(backend='mongodb') bdr.load_build_config(config) target_collection = bdr.pick_target_collection() target_es_index = 'genedoc_' + bdr._build_config['name'] if target_collection: es_idxer = ESIndexer(mapping=bdr.get_mapping()) es_idxer.ES_INDEX_NAME = target_es_index es_idxer.step = 10000 es_idxer.use_parallel = use_parallel es_server = es_idxer.conn.servers[0].geturl() print "ES target: {}/{}/{}".format(es_server, es_idxer.ES_INDEX_NAME, es_idxer.ES_INDEX_TYPE) if ask("Continue?") == 'Y': #es_idxer.s = 609000 #es_idxer.conn.indices.delete_index(es_idxer.ES_INDEX_NAME) es_idxer.create_index() es_idxer.delete_index_type(es_idxer.ES_INDEX_TYPE, noconfirm=noconfirm) es_idxer.build_index(target_collection, verbose=False) es_idxer.optimize() else: print "Aborted." else: print "Error: target collection is not ready yet or failed to build."
def build_index(self, use_parallel=True): target_collection = self.get_target_collection() if target_collection: es_idxer = ESIndexer(mapping=self.get_mapping()) es_idxer.ES_INDEX_NAME = 'genedoc_' + self._build_config['name'] es_idxer.step = 10000 es_idxer.use_parallel = use_parallel #es_idxer.s = 609000 #es_idxer.conn.indices.delete_index(es_idxer.ES_INDEX_NAME) es_idxer.create_index() es_idxer.delete_index_type(es_idxer.ES_INDEX_TYPE, noconfirm=True) es_idxer.build_index(target_collection, verbose=False) es_idxer.optimize() else: logging.info("Error: target collection is not ready yet or failed to build.")
def build_index(self, use_parallel=True): target_collection = self.get_target_collection() if target_collection: es_idxer = ESIndexer(mapping=self.get_mapping()) es_idxer.ES_INDEX_NAME = 'genedoc_' + self._build_config['name'] es_idxer.step = 10000 es_idxer.use_parallel = use_parallel #es_idxer.s = 609000 #es_idxer.conn.indices.delete_index(es_idxer.ES_INDEX_NAME) es_idxer.create_index() es_idxer.delete_index_type(es_idxer.ES_INDEX_TYPE, noconfirm=True) es_idxer.build_index(target_collection, verbose=False) es_idxer.optimize() else: logging.info( "Error: target collection is not ready yet or failed to build." )
def make_test_index(): def get_sample_gene(gene): qbdr = ESQueryBuilder(fields=['_source'], size=1000) _query = qbdr.dis_max_query(gene) _query = qbdr.add_species_custom_filters_score(_query) _q = {'query': _query} if qbdr.options: _q.update(qbdr.options) esq = ESQuery() res = esq._search(_q) return [h['_source'] for h in res['hits']['hits']] gli = get_sample_gene('CDK2') + \ get_sample_gene('BTK') + \ get_sample_gene('insulin') from utils.es import ESIndexer index_name = 'genedoc_2' index_type = 'gene_sample' esidxer = ESIndexer(None, None) conn = esidxer.conn try: esidxer.delete_index_type(index_type) except: pass mapping = dict(conn.get_mapping('gene', index_name)['gene']) print conn.put_mapping(index_type, mapping, [index_name]) print "Building index..." cnt = 0 for doc in gli: conn.index(doc, index_name, index_type, doc['_id']) cnt += 1 print cnt, ':', doc['_id'] print conn.flush() print conn.refresh() print 'Done! - {} docs indexed.'.format(cnt)