コード例 #1
0
ファイル: methods_utils.py プロジェクト: HCVE/echo-clustering
async def random_search_worker(get_model, model_type, features, label,
                               configuration, output, parallel, fast):
    log("Random search starting", level=1)
    # noinspection PyBroadException
    try:
        timer = Timer()
        X, y, classifier = make_model(get_model, features, label,
                                      configuration['pipeline'],
                                      configuration['reduce_classes'])

        results: ModelCVResult = cross_validate_model_cached(X,
                                                             y,
                                                             classifier,
                                                             parallel=parallel,
                                                             fast=fast,
                                                             n_jobs=5)

        result_for_comparison, y_for_comparison = get_result_for_comparison(
            configuration['reduce_classes'], results, y, label)

        tab_data_output, records_output = output_all(result_for_comparison,
                                                     y_for_comparison)

        event_data = optimization_configuration_ready(
            results=tab_data_output,
            records=records_output,
            parameters=configuration,
            features=features,
            model_type=model_type,
            label=label,
            elapsed=timer.elapsed_cpu())

        identifier = (
            hash_features(features),
            label,
            model_type,
        )

        append_data(identifier, event_data['payload'])
        await output(event_data)
        log("Optimization iteration finished`")
    except Exception as e:
        warning(e)
        raise e
コード例 #2
0
def cross_validate_model_fold(chunk_input: WorkerInput) -> ModelResult:
    log("Execution fold", level=2)
    timer = Timer()
    classifier = chunk_input['classifier']
    X_train = chunk_input['X_train']
    y_train = chunk_input['y_train']
    X_test = chunk_input['X_test']
    return_model = chunk_input['return_model']

    if get_log_level() == 1:
        print(".")

    feature_names = \
        chunk_input['feature_names'] if \
            ('feature_names' in chunk_input and chunk_input['feature_names'] is not None) \
            else list(X_train.columns)

    classifier.fit(X_train, y_train, **chunk_input['fit_kwargs'])

    y_predict = Series(classifier.predict(X_test), index=X_test.index)
    y_train_predict = Series(classifier.predict(X_train), index=X_train.index)

    try:
        y_predict_probabilities_raw = classifier.predict_proba(X_test)
        y_train_predict_probabilities_raw = classifier.predict_proba(X_train)
    except AttributeError:
        y_predict_probabilities = y_predict
        y_train_predict_probabilities = y_train_predict
    else:
        probability_columns = [
            f'y_predict_probabilities_{i}'
            for i in range(y_predict_probabilities_raw.shape[1])
        ]
        y_predict_probabilities = DataFrame(y_predict_probabilities_raw,
                                            index=X_test.index,
                                            columns=probability_columns)
        y_train_predict_probabilities = DataFrame(
            y_train_predict_probabilities_raw,
            index=X_train.index,
            columns=probability_columns)

    if y_predict.dtype == np.float:
        y_predict = y_predict \
            .map(lambda v: 0 if v < 0 else v) \
            .map(lambda v: 1 if v > 1 else v) \
            .map(lambda v: round(v))

    try:
        feature_importance = Series(
            classifier[-1].feature_importances_,
            index=feature_names,
        )
    except (TypeError, AttributeError):
        try:
            classifier[-1].coef_
        except AttributeError:
            feature_importance = None
            logging.debug("No feature importance in the result")
        else:
            feature_importance = None
            # feature_importance = Series(classifier[-1].coef_[0], index=feature_names)

    if not return_model:
        try:
            classifier[-1].get_booster().__del__()
        except AttributeError:
            pass

    return ModelResult(y_test_score=y_predict_probabilities,
                       y_test_predict=y_predict,
                       y_train_predict=y_train_predict,
                       y_train_score=y_train_predict_probabilities,
                       feature_importance=feature_importance,
                       model=classifier[-1] if return_model else None,
                       elapsed=timer.elapsed_cpu())