Esempio n. 1
0
    def run(self, mongo_collection_name, page_size):
        bucket_size = int(page_size) if page_size else self.default_page_size
        print('Indexing data from mongo/{} to elastic/{}'.format(
            mongo_collection_name, mongo_collection_name))

        service = superdesk.get_resource_service(mongo_collection_name)
        cursor = service.get_from_mongo(None, {})
        count = cursor.count()
        no_of_buckets = len(range(0, count, bucket_size))
        print('Number of items to index: {}, pages={}'.format(
            count, no_of_buckets))

        for x in range(0, no_of_buckets):
            skip = x * bucket_size
            print('Page : {}, skip: {}'.format(x + 1, skip))
            cursor = service.get_from_mongo(None, {})
            cursor.skip(skip)
            cursor.limit(bucket_size)
            items = list(cursor)
            print('Inserting {} items'.format(len(items)))
            success, failed = superdesk.app.data._search_backend(
                mongo_collection_name).bulk_insert(mongo_collection_name,
                                                   items)
            print('Inserted {} items'.format(success))
            if failed:
                print(
                    'Failed to do bulk insert of items {}. Errors: {}'.format(
                        len(failed), failed))
                raise BulkIndexError(resource=mongo_collection_name,
                                     errors=failed)

        return 'Finished indexing collection {}'.format(mongo_collection_name)
Esempio n. 2
0
    def copy_resource(cls, resource, page_size):
        for items in cls.get_mongo_items(resource, page_size):
            print('{} Inserting {} items'.format(time.strftime('%X %x %Z'),
                                                 len(items)))
            s = time.time()
            success, failed = 0, 0

            for i in range(1, 4):
                try:
                    success, failed = superdesk.app.data._search_backend(
                        resource).bulk_insert(resource, items)
                except Exception as ex:
                    print('Exception thrown on insert to elastic {}', ex)
                    time.sleep(10)
                    continue
                else:
                    break

            print('{} Inserted {} items in {:.3f} seconds'.format(
                time.strftime('%X %x %Z'), success,
                time.time() - s))
            if failed:
                print(
                    'Failed to do bulk insert of items {}. Errors: {}'.format(
                        len(failed), failed))
                raise BulkIndexError(resource=resource, errors=failed)

        return 'Finished indexing collection {}'.format(resource)
Esempio n. 3
0
def index_elastic_from_mongo(hours=None, collection=None):
    print('Starting indexing from mongodb for "{}" collection hours={}'.format(collection, hours))

    resources = app.data.get_elastic_resources()
    if collection:
        if collection not in resources:
            raise SystemExit('Cannot find collection: {}'.format(collection))
        resources = [collection]

    for resource in resources:
        print('Starting indexing collection {}'.format(resource))

        for items in _get_mongo_items(resource, hours):
            print('{} Inserting {} items'.format(time.strftime('%X %x %Z'), len(items)))
            s = time.time()

            for i in range(1, 4):
                try:
                    success, failed = superdesk.app.data._search_backend(resource).bulk_insert(resource, items)
                except Exception as ex:
                    print('Exception thrown on insert to elastic {}', ex)
                    time.sleep(10)
                    continue
                else:
                    break

            print('{} Inserted {} items in {:.3f} seconds'.format(time.strftime('%X %x %Z'), success, time.time() - s))
            if failed:
                print('Failed to do bulk insert of items {}. Errors: {}'.format(len(failed), failed))
                raise BulkIndexError(resource=resource, errors=failed)

        print('Finished indexing collection {}'.format(resource))
Esempio n. 4
0
def index_elastic_from_mongo_from_timestamp(collection, timestamp_str, direction):
    if not collection:
        raise SystemExit('Collection not provided')
    elif not timestamp_str:
        raise SystemExit('Timestamp not provided')
    elif direction not in ['older', 'newer']:
        raise SystemExit('Direction can only be "older" or "newer", not {}'.format(direction))

    try:
        timestamp = datetime.strptime(timestamp_str, '%Y-%m-%dT%H:%M')
    except ValueError as e:
        raise SystemExit('Timestamp in incorrect format (e.g. 2019-05-20T05:00). {}'.format(e))

    print('Starting indexing from mongodb for "{}" collection, timestamp={}, direction={}'.format(
        collection,
        timestamp,
        direction
    ))

    resources = app.data.get_elastic_resources()
    if collection not in resources:
        raise SystemExit('Cannot find collection: {}'.format(collection))

    print('Starting indexing collection {}'.format(collection))

    for items in _get_mongo_items_from_timestamp(collection, timestamp, direction):
        print('{} Inserting {} items'.format(time.strftime('%X %x %Z'), len(items)))
        s = time.time()

        for i in range(1, 4):
            try:
                success, failed = superdesk.app.data._search_backend(collection).bulk_insert(collection, items)
            except Exception as ex:
                print('Exception thrown on insert to elastic {}', ex)
                time.sleep(10)
                continue
            else:
                break

        print('{} Inserted {} items in {:.3f} seconds'.format(time.strftime('%X %x %Z'), success, time.time() - s))
        if failed:
            print('Failed to do bulk insert of items {}. Errors: {}'.format(len(failed), failed))
            raise BulkIndexError(resource=collection, errors=failed)

    print('Finished indexing collection {}'.format(collection))