Esempi in Python per slice_iterable_into_chunks

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: datahub.core.utils

Metodo/funzione: slice_iterable_into_chunks

Esempi su hotexamples.com: 5

slice_iterable_into_chunks in Python: 5 esempi trovati. Questi sono i migliori esempi reali in Python per datahub.core.utils.slice_iterable_into_chunks, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

    def _add_consent_response(self, rows):
        """
        Transforms iterable to add user consent from the consent service.

        The consent lookup makes an external API call to return consent.
        For perfromance reasons the consent amount is limited by consent_page_size.
        Due to this limitaion the iterable are sliced into chunks requesting consent for 100 rows
        at a time.
        """
        # Slice iterable into chunks
        row_chunks = slice_iterable_into_chunks(rows, self.consent_page_size)
        for chunk in row_chunks:
            """
            Loop over the chunks and extract the email and item.
            Save the item because the iterator cannot be used twice.
            """
            rows = list(chunk)
            # Peform constent lookup on emails POST request
            consent_lookups = consent.get_many([
                row['email']
                for row in rows if self._is_valid_email(row['email'])
            ], )
            for row in rows:
                # Assign contact consent boolean to accepts_dit_email_marketing
                # and yield modified result.
                row['accepts_dit_email_marketing'] = consent_lookups.get(
                    row['email'], False)
                yield row

Esempio n. 2

Mostra file

def sync_app(search_app, batch_size=None, post_batch_callback=None):
    """Syncs objects for an app to ElasticSearch in batches of batch_size."""
    model_name = search_app.es_model.__name__
    batch_size = batch_size or search_app.bulk_batch_size
    logger.info(
        f'Processing {model_name} records, using batch size {batch_size}')

    read_indices, write_index = search_app.es_model.get_read_and_write_indices(
    )

    num_source_rows_processed = 0
    num_objects_synced = 0
    total_rows = search_app.queryset.count()
    it = search_app.queryset.values_list(
        'pk', flat=True).iterator(chunk_size=batch_size)
    batches = slice_iterable_into_chunks(it, batch_size)
    for batch in batches:
        objs = search_app.queryset.filter(pk__in=batch)

        num_actions = sync_objects(
            search_app.es_model,
            objs,
            read_indices,
            write_index,
            post_batch_callback=post_batch_callback,
        )

        emit_progress = (
            (num_source_rows_processed + num_actions) // PROGRESS_INTERVAL -
            num_source_rows_processed // PROGRESS_INTERVAL > 0)

        num_source_rows_processed += len(batch)
        num_objects_synced += num_actions

        if emit_progress:
            logger.info(
                f'{model_name} rows processed: {num_source_rows_processed}/{total_rows} '
                f'{num_source_rows_processed*100//total_rows}%', )

    logger.info(
        f'{model_name} rows processed: {num_source_rows_processed}/{total_rows} 100%.'
    )
    if num_source_rows_processed != num_objects_synced:
        logger.warning(
            f'{num_source_rows_processed - num_objects_synced} deleted objects detected while '
            f'syncing model {model_name}', )

Esempio n. 3

Mostra file

    def run(self, tmp_file_creator, endpoint=None):
        """Runs the synchronisation operation."""
        logger.info('Starting CH load...')
        endpoint = endpoint or settings.COMPANIESHOUSE_DOWNLOAD_URL
        ch_csv_urls = get_ch_latest_dump_file_list(endpoint)
        logger.info('Found the following Companies House CSV URLs: %s',
                    ch_csv_urls)

        for csv_url in ch_csv_urls:
            ch_company_rows = iter_ch_csv_from_url(csv_url, tmp_file_creator)

            batch_iter = slice_iterable_into_chunks(
                ch_company_rows,
                settings.BULK_INSERT_BATCH_SIZE,
            )
            with connection.cursor() as cursor:
                for batch in batch_iter:
                    self._process_batch(cursor, batch)

        logger.info('Companies House load complete, %s records loaded',
                    self.count)

Esempio n. 4

Mostra file

File: bulk_sync.py Progetto: reupen/data-hub-api

def sync_app(search_app, batch_size=None, post_batch_callback=None):
    """Syncs objects for an app to ElasticSearch in batches of batch_size."""
    model_name = search_app.es_model.__name__
    batch_size = batch_size or search_app.bulk_batch_size
    logger.info(f'Processing {model_name} records, using batch size {batch_size}')

    read_indices, write_index = search_app.es_model.get_read_and_write_indices()

    rows_processed = 0
    total_rows = search_app.queryset.count()
    it = search_app.queryset.iterator(chunk_size=batch_size)
    batches = slice_iterable_into_chunks(it, batch_size)
    for batch in batches:
        num_actions = sync_objects(
            search_app.es_model,
            batch,
            read_indices,
            write_index,
            post_batch_callback=post_batch_callback,
        )

        emit_progress = (
            (rows_processed + num_actions) // PROGRESS_INTERVAL
            - rows_processed // PROGRESS_INTERVAL
            > 0
        )

        rows_processed += num_actions

        if emit_progress:
            logger.info(
                f'{model_name} rows processed: {rows_processed}/{total_rows} '
                f'{rows_processed*100//total_rows}%',
            )

    logger.info(f'{model_name} rows processed: {rows_processed}/{total_rows} 100%.')

Esempio n. 5

Mostra file

def test_slice_iterable_into_chunks():
    """Test slice iterable into chunks."""
    size = 2
    iterable = range(5)
    chunks = list(slice_iterable_into_chunks(iterable, size))
    assert chunks == [[0, 1], [2, 3], [4]]