Ejemplos de Future en Python

Lenguaje de programación: Python

Namespace/Package Name: distributed

Clase / Tipo: Future

Ejemplos en hotexamples.com: 1

Python Future - 1 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de distributed.Future extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

Future(9)

done(3)

cancel(2)

cancelled(2)

exception(2)

result(2)

traceback(2)

Ejemplo n.º 1

Mostrar archivo

Archivo: calculate_feature_matrix.py Proyecto: sshuster/featuretools

def parallel_calculate_chunks(cutoff_time, chunk_size, feature_set, approximate, training_window,
                              save_progress, entityset, n_jobs, no_unapproximated_aggs,
                              cutoff_df_time_var, target_time, pass_columns,
                              progress_bar, dask_kwargs=None, progress_callback=None):
    from distributed import as_completed, Future
    from dask.base import tokenize

    client = None
    cluster = None
    try:
        client, cluster = create_client_and_cluster(n_jobs=n_jobs,
                                                    dask_kwargs=dask_kwargs,
                                                    entityset_size=entityset.__sizeof__())
        # scatter the entityset
        # denote future with leading underscore
        start = time.time()
        es_token = "EntitySet-{}".format(tokenize(entityset))
        if es_token in client.list_datasets():
            msg = "Using EntitySet persisted on the cluster as dataset {}"
            progress_bar.write(msg.format(es_token))
            _es = client.get_dataset(es_token)
        else:
            _es = client.scatter([entityset])[0]
            client.publish_dataset(**{_es.key: _es})

        # save features to a tempfile and scatter it
        pickled_feats = cloudpickle.dumps(feature_set)
        _saved_features = client.scatter(pickled_feats)
        client.replicate([_es, _saved_features])
        num_scattered_workers = len(client.who_has([Future(es_token)]).get(es_token, []))
        num_workers = len(client.scheduler_info()['workers'].values())

        chunks = cutoff_time.groupby(cutoff_df_time_var)

        if not chunk_size:
            chunk_size = _handle_chunk_size(1.0 / num_workers, cutoff_time.shape[0])

        chunks = _chunk_dataframe_groups(chunks, chunk_size)

        chunks = [df for _, df in chunks]

        if len(chunks) < num_workers:
            chunk_warning = "Fewer chunks ({}), than workers ({}) consider reducing the chunk size"
            warning_string = chunk_warning.format(len(chunks), num_workers)
            progress_bar.write(warning_string)

        scatter_warning(num_scattered_workers, num_workers)
        end = time.time()
        scatter_time = round(end - start)

        # if enabled, reset timer after scatter for better time remaining estimates
        if not progress_bar.disable:
            progress_bar.reset()

        scatter_string = "EntitySet scattered to {} workers in {} seconds"
        progress_bar.write(scatter_string.format(num_scattered_workers, scatter_time))
        # map chunks
        # TODO: consider handling task submission dask kwargs
        _chunks = client.map(calculate_chunk,
                             chunks,
                             feature_set=_saved_features,
                             chunk_size=None,
                             entityset=_es,
                             approximate=approximate,
                             training_window=training_window,
                             save_progress=save_progress,
                             no_unapproximated_aggs=no_unapproximated_aggs,
                             cutoff_df_time_var=cutoff_df_time_var,
                             target_time=target_time,
                             pass_columns=pass_columns,
                             progress_bar=None,
                             progress_callback=progress_callback)

        feature_matrix = []
        iterator = as_completed(_chunks).batches()
        for batch in iterator:
            results = client.gather(batch)
            for result in results:
                feature_matrix.append(result)
                previous_progress = progress_bar.n
                progress_bar.update(result.shape[0])
                if progress_callback is not None:
                    update, progress_percent, time_elapsed = update_progress_callback_parameters(progress_bar, previous_progress)
                    progress_callback(update, progress_percent, time_elapsed)

    except Exception:
        raise
    finally:
        if client is not None:
            client.close()

        if 'cluster' not in dask_kwargs and cluster is not None:
            cluster.close()

    feature_matrix = pd.concat(feature_matrix)

    return feature_matrix