Esempio n. 1
0
def sync_index(config, use_parallel=True, noconfirm=False):

    bdr = DataBuilder(backend='mongodb')
    bdr.load_build_config(config)
    target_collection = bdr.pick_target_collection()
    target_es_index = 'genedoc_' + bdr._build_config['name']


    sync_src = backend.GeneDocMongoDBBackend(target_collection)

    es_idxer = ESIndexer(bdr.get_mapping())
    es_idxer.ES_INDEX_NAME = target_es_index
    es_idxer.step = 10000
    es_idxer.use_parallel = use_parallel
    sync_target = backend.GeneDocESBackend(es_idxer)

    print '\tsync_src:\t{:<40}{}\t{}'.format(target_collection.name,
                                           sync_src.name,
                                           sync_src.count())
    print '\tsync_target\t{:<40}{}\t{}'.format(target_es_index,
                                             sync_target.name,
                                             sync_target.count())
    if noconfirm or ask("Continue?") == "Y":
        changes = diff.diff_collections(sync_src, sync_target)
        return changes
Esempio n. 2
0
def build_index(config, use_parallel=True, noconfirm=False):
    bdr = DataBuilder(backend='mongodb')
    bdr.load_build_config(config)
    target_collection = bdr.pick_target_collection()
    target_es_index = 'genedoc_' + bdr._build_config['name']

    if target_collection:
        es_idxer = ESIndexer(mapping=bdr.get_mapping())
        es_idxer.ES_INDEX_NAME = target_es_index
        es_idxer.step = 10000
        es_idxer.use_parallel = use_parallel
        es_server = es_idxer.conn.servers[0].geturl()
        print "ES target: {}/{}/{}".format(es_server,
                                           es_idxer.ES_INDEX_NAME,
                                           es_idxer.ES_INDEX_TYPE)
        if ask("Continue?") == 'Y':
            #es_idxer.s = 609000
            #es_idxer.conn.indices.delete_index(es_idxer.ES_INDEX_NAME)
            es_idxer.create_index()
            es_idxer.delete_index_type(es_idxer.ES_INDEX_TYPE, noconfirm=noconfirm)
            es_idxer.build_index(target_collection, verbose=False)
            es_idxer.optimize()
        else:
            print "Aborted."
    else:
        print "Error: target collection is not ready yet or failed to build."
Esempio n. 3
0
def build_index(config, use_parallel=True, noconfirm=False):
    bdr = DataBuilder(backend='mongodb')
    bdr.load_build_config(config)
    target_collection = bdr.pick_target_collection()
    target_es_index = 'genedoc_' + bdr._build_config['name']

    if target_collection:
        es_idxer = ESIndexer(mapping=bdr.get_mapping())
        es_idxer.ES_INDEX_NAME = target_es_index
        es_idxer.step = 10000
        es_idxer.use_parallel = use_parallel
        es_server = es_idxer.conn.servers[0].geturl()
        print("ES target: {}/{}/{}".format(es_server, es_idxer.ES_INDEX_NAME,
                                           es_idxer.ES_INDEX_TYPE))
        if noconfirm or ask("Continue?") == 'Y':
            #es_idxer.s = 609000
            #es_idxer.conn.indices.delete_index(es_idxer.ES_INDEX_NAME)
            es_idxer.create_index()
            es_idxer.delete_index_type(es_idxer.ES_INDEX_TYPE,
                                       noconfirm=noconfirm)
            es_idxer.build_index(target_collection, verbose=False)
            es_idxer.optimize()
        else:
            print("Aborted.")
    else:
        print("Error: target collection is not ready yet or failed to build.")
Esempio n. 4
0
    def sync_index(self, use_parallel=True):
        from utils import diff

        sync_src = self.get_target_collection()

        es_idxer = ESIndexer(self.get_mapping())
        es_idxer.ES_INDEX_NAME = sync_src.target_collection.name
        es_idxer.step = 10000
        es_idxer.use_parallel = use_parallel
        sync_target = databuild.backend.GeneDocESBackend(es_idxer)

        changes = diff.diff_collections(sync_src, sync_target)
        return changes
Esempio n. 5
0
    def sync_index(self, use_parallel=True):
        from utils import diff

        sync_src = self.get_target_collection()

        es_idxer = ESIndexer(self.get_mapping())
        es_idxer.ES_INDEX_NAME = sync_src.target_collection.name
        es_idxer.step = 10000
        es_idxer.use_parallel = use_parallel
        sync_target = databuild.backend.GeneDocESBackend(es_idxer)

        changes = diff.diff_collections(sync_src, sync_target)
        return changes
Esempio n. 6
0
 def build_index(self, use_parallel=True):
     target_collection = self.get_target_collection()
     if target_collection:
         es_idxer = ESIndexer(mapping=self.get_mapping())
         es_idxer.ES_INDEX_NAME = 'genedoc_' + self._build_config['name']
         es_idxer.step = 10000
         es_idxer.use_parallel = use_parallel
         #es_idxer.s = 609000
         #es_idxer.conn.indices.delete_index(es_idxer.ES_INDEX_NAME)
         es_idxer.create_index()
         es_idxer.delete_index_type(es_idxer.ES_INDEX_TYPE, noconfirm=True)
         es_idxer.build_index(target_collection, verbose=False)
         es_idxer.optimize()
     else:
         logging.info("Error: target collection is not ready yet or failed to build.")
Esempio n. 7
0
 def build_index(self, use_parallel=True):
     target_collection = self.get_target_collection()
     if target_collection:
         es_idxer = ESIndexer(mapping=self.get_mapping())
         es_idxer.ES_INDEX_NAME = 'genedoc_' + self._build_config['name']
         es_idxer.step = 10000
         es_idxer.use_parallel = use_parallel
         #es_idxer.s = 609000
         #es_idxer.conn.indices.delete_index(es_idxer.ES_INDEX_NAME)
         es_idxer.create_index()
         es_idxer.delete_index_type(es_idxer.ES_INDEX_TYPE, noconfirm=True)
         es_idxer.build_index(target_collection, verbose=False)
         es_idxer.optimize()
     else:
         logging.info(
             "Error: target collection is not ready yet or failed to build."
         )
Esempio n. 8
0
def diff2src(use_parallel=True, noconfirm=False):
    src_li = []

    target_db = get_target_db()
    src_li.extend([(name, target_db[name].count(), 'mongodb')
                   for name in sorted(target_db.collection_names())
                   if name.startswith('genedoc')])

    es_idxer = ESIndexer()
    es_idxer.conn.default_indices = []
    for es_idx in es_idxer.conn.indices.get_indices():
        if es_idx.startswith('genedoc'):
            es_idxer.ES_INDEX_NAME = es_idx
            src_li.append((es_idx, es_idxer.count()['count'], 'es'))

    print("Found {} sources:".format(len(src_li)))
    src_1 = _pick_one(src_li, "Pick first source above: ")
    src_li.remove(src_1)
    print
    src_2 = _pick_one(src_li, "Pick second source above: ")

    sync_li = []
    for src in (src_1, src_2):
        if src[2] == 'mongodb':
            b = backend.GeneDocMongoDBBackend(target_db[src[0]])
        elif src[2] == 'es':
            es_idxer = ESIndexer()
            es_idxer.ES_INDEX_NAME = src[0]
            es_idxer.step = 10000
            b = backend.GeneDocESBackend(es_idxer)
        sync_li.append(b)

    sync_src, sync_target = sync_li
    print('\tsync_src:\t{:<45}{}\t{}'.format(*src_1))
    print('\tsync_target\t{:<45}{}\t{}'.format(*src_2))
    if noconfirm or ask("Continue?") == "Y":
        changes = diff.diff_collections(sync_src,
                                        sync_target,
                                        use_parallel=use_parallel)
        return changes
Esempio n. 9
0
def sync_index(config, use_parallel=True, noconfirm=False):

    bdr = DataBuilder(backend='mongodb')
    bdr.load_build_config(config)
    target_collection = bdr.pick_target_collection()
    target_es_index = 'genedoc_' + bdr._build_config['name']

    sync_src = backend.GeneDocMongoDBBackend(target_collection)

    es_idxer = ESIndexer(bdr.get_mapping())
    es_idxer.ES_INDEX_NAME = target_es_index
    es_idxer.step = 10000
    es_idxer.use_parallel = use_parallel
    sync_target = backend.GeneDocESBackend(es_idxer)

    print('\tsync_src:\t{:<40}{}\t{}'.format(target_collection.name,
                                             sync_src.name, sync_src.count()))
    print('\tsync_target\t{:<40}{}\t{}'.format(target_es_index,
                                               sync_target.name,
                                               sync_target.count()))
    if noconfirm or ask("Continue?") == "Y":
        changes = diff.diff_collections(sync_src, sync_target)
        return changes
Esempio n. 10
0
def diff2src(use_parallel=True, noconfirm=False):
    src_li = []

    target_db = get_target_db()
    src_li.extend([(name, target_db[name].count(), 'mongodb') for name in sorted(target_db.collection_names()) if name.startswith('genedoc')])

    es_idxer = ESIndexer()
    es_idxer.conn.default_indices=[]
    for es_idx in es_idxer.conn.indices.get_indices():
        if es_idx.startswith('genedoc'):
            es_idxer.ES_INDEX_NAME = es_idx
            src_li.append((es_idx, es_idxer.count()['count'], 'es'))

    print "Found {} sources:".format(len(src_li))
    src_1 = _pick_one(src_li, "Pick first source above: ")
    src_li.remove(src_1)
    print
    src_2 = _pick_one(src_li, "Pick second source above: ")

    sync_li = []
    for src in (src_1, src_2):
        if src[2] == 'mongodb':
            b = backend.GeneDocMongoDBBackend(target_db[src[0]])
        elif src[2] == 'es':
            es_idxer = ESIndexer()
            es_idxer.ES_INDEX_NAME = src[0]
            es_idxer.step = 10000
            b = backend.GeneDocESBackend(es_idxer)
        sync_li.append(b)

    sync_src, sync_target = sync_li
    print '\tsync_src:\t{:<45}{}\t{}'.format(*src_1)
    print '\tsync_target\t{:<45}{}\t{}'.format(*src_2)
    if noconfirm or ask("Continue?") == "Y":
        changes = diff.diff_collections(sync_src, sync_target, use_parallel=use_parallel)
        return changes