async def random_search_worker(get_model, model_type, features, label, configuration, output, parallel, fast): log("Random search starting", level=1) # noinspection PyBroadException try: timer = Timer() X, y, classifier = make_model(get_model, features, label, configuration['pipeline'], configuration['reduce_classes']) results: ModelCVResult = cross_validate_model_cached(X, y, classifier, parallel=parallel, fast=fast, n_jobs=5) result_for_comparison, y_for_comparison = get_result_for_comparison( configuration['reduce_classes'], results, y, label) tab_data_output, records_output = output_all(result_for_comparison, y_for_comparison) event_data = optimization_configuration_ready( results=tab_data_output, records=records_output, parameters=configuration, features=features, model_type=model_type, label=label, elapsed=timer.elapsed_cpu()) identifier = ( hash_features(features), label, model_type, ) append_data(identifier, event_data['payload']) await output(event_data) log("Optimization iteration finished`") except Exception as e: warning(e) raise e
def cross_validate_model_fold(chunk_input: WorkerInput) -> ModelResult: log("Execution fold", level=2) timer = Timer() classifier = chunk_input['classifier'] X_train = chunk_input['X_train'] y_train = chunk_input['y_train'] X_test = chunk_input['X_test'] return_model = chunk_input['return_model'] if get_log_level() == 1: print(".") feature_names = \ chunk_input['feature_names'] if \ ('feature_names' in chunk_input and chunk_input['feature_names'] is not None) \ else list(X_train.columns) classifier.fit(X_train, y_train, **chunk_input['fit_kwargs']) y_predict = Series(classifier.predict(X_test), index=X_test.index) y_train_predict = Series(classifier.predict(X_train), index=X_train.index) try: y_predict_probabilities_raw = classifier.predict_proba(X_test) y_train_predict_probabilities_raw = classifier.predict_proba(X_train) except AttributeError: y_predict_probabilities = y_predict y_train_predict_probabilities = y_train_predict else: probability_columns = [ f'y_predict_probabilities_{i}' for i in range(y_predict_probabilities_raw.shape[1]) ] y_predict_probabilities = DataFrame(y_predict_probabilities_raw, index=X_test.index, columns=probability_columns) y_train_predict_probabilities = DataFrame( y_train_predict_probabilities_raw, index=X_train.index, columns=probability_columns) if y_predict.dtype == np.float: y_predict = y_predict \ .map(lambda v: 0 if v < 0 else v) \ .map(lambda v: 1 if v > 1 else v) \ .map(lambda v: round(v)) try: feature_importance = Series( classifier[-1].feature_importances_, index=feature_names, ) except (TypeError, AttributeError): try: classifier[-1].coef_ except AttributeError: feature_importance = None logging.debug("No feature importance in the result") else: feature_importance = None # feature_importance = Series(classifier[-1].coef_[0], index=feature_names) if not return_model: try: classifier[-1].get_booster().__del__() except AttributeError: pass return ModelResult(y_test_score=y_predict_probabilities, y_test_predict=y_predict, y_train_predict=y_train_predict, y_train_score=y_train_predict_probabilities, feature_importance=feature_importance, model=classifier[-1] if return_model else None, elapsed=timer.elapsed_cpu())