Ejemplo n.º 1
0
                n_trials=500,
                n_jobs=4)

            space = study_prune.best_trial.user_attrs['space']

            for pre, _, node in RenderTree(space.parameter_tree):
                if node.status == True:
                    print("%s%s" % (pre, node.name))

            try:
                result, search = utils_run_AutoML(
                    study_prune.best_trial,
                    X_train=X_train_hold,
                    X_test=X_test_hold,
                    y_train=y_train_hold,
                    y_test=y_test_hold,
                    categorical_indicator=categorical_indicator_hold,
                    my_scorer=my_scorer,
                    search_time=search_time_frozen,
                    memory_limit=memory_budget,
                    pipeline_size_limit=pipeline_size)

                from fastsklearnfeature.declarative_automl.optuna_package.myautoml.utils_model import show_progress
                #show_progress(search, X_test_hold, y_test_hold, my_scorer)

                print("test result: " + str(result))
                current_dynamic.append(result)
            except:
                current_dynamic.append(0.0)

            print('dynamic: ' + str(current_dynamic))
Ejemplo n.º 2
0
def run_AutoML(trial,
               X_train=None,
               X_test=None,
               y_train=None,
               y_test=None,
               categorical_indicator=None):
    search_time = None
    if not isinstance(trial, FrozenTrial):
        search_time, _, memory_limit, privacy_limit, training_time_limit, inference_time_limit, pipeline_size_limit, _, _, _, _, dataset_id = generate_parameters(
            trial, total_search_time, my_openml_datasets)

        model_weight = 0
        if trial.suggest_categorical('use_model_weight', [True, False]):
            model_weight = trial.suggest_loguniform('model_weight', 0.0000001,
                                                    1000)

        number_trials = trial.suggest_int('number_trials', 10, 500, log=False)

    else:
        #make this a hyperparameter
        search_time = trial.params['global_search_time_constraint']

        memory_limit = 10
        if 'global_memory_constraint' in trial.params:
            memory_limit = trial.params['global_memory_constraint']

        privacy_limit = None
        if 'privacy_constraint' in trial.params:
            privacy_limit = trial.params['privacy_constraint']

        training_time_limit = search_time
        if 'training_time_constraint' in trial.params:
            training_time_limit = trial.params['training_time_constraint']

        inference_time_limit = 60
        if 'inference_time_constraint' in trial.params:
            inference_time_limit = trial.params['inference_time_constraint']

        pipeline_size_limit = 350000000
        if 'pipeline_size_constraint' in trial.params:
            pipeline_size_limit = trial.params['pipeline_size_constraint']

        model_weight = 0
        if 'model_weight' in trial.params:
            pipeline_size_limit = trial.params['model_weight']

        number_trials = trial.params['number_trials']

        if 'dataset_id' in trial.params:
            dataset_id = trial.params['dataset_id']  #get same random seed
        else:
            dataset_id = 31

    if type(X_train) == type(None):

        my_random_seed = int(time.time())
        if 'data_random_seed' in trial.user_attrs:
            my_random_seed = trial.user_attrs['data_random_seed']

        X_train, X_test, y_train, y_test, categorical_indicator, attribute_names = get_data(
            dataset_id, randomstate=my_random_seed)

    my_list_constraints_values = [
        search_time, memory_limit,
        ifNull(privacy_limit, constant_value=1000), training_time_limit,
        inference_time_limit, pipeline_size_limit, model_weight, number_trials
    ]

    metafeature_values = data2features(X_train, y_train, categorical_indicator)
    features = merge_features(my_list_constraints_values, metafeature_values)
    features = FeatureTransformations().fit(features).transform(
        features, feature_names=feature_names)

    print(len(features[0]))
    print(len(feature_names_new))
    print(feature_names_new)

    assert len(feature_names_new) == features.shape[1], 'feature error'

    try:
        trial.set_user_attr('features', features)
    except:
        pass

    try:
        model_compare = pickle.load(
            open(
                '/home/felix/phd2/picture_progress/al_only/my_great_model_compare.p',
                "rb"))
        model_success = pickle.load(
            open(
                '/home/felix/phd2/picture_progress/al_only/my_great_model_success.p',
                "rb"))
    except:
        model_compare = pickle.load(
            open('/home/neutatz/data/my_models/my_great_model_compare.p',
                 "rb"))
        model_success = pickle.load(
            open('/home/neutatz/data/my_models/my_great_model_success.p',
                 "rb"))

    dynamic_params = []
    static_params = []
    for random_i in range(5):

        study_prune = optuna.create_study(direction='maximize')
        study_prune.optimize(
            lambda trial: optimize_accuracy_under_constraints2(
                trial=trial,
                metafeature_values_hold=metafeature_values,
                search_time=search_time,
                model_compare=model_compare,
                model_success=model_success,
                memory_limit=memory_limit,
                privacy_limit=privacy_limit,
                comparison_weight=model_weight),
            n_trials=number_trials,
            n_jobs=1)

        result = 0
        try:
            result, search = utils_run_AutoML(
                study_prune.best_trial,
                X_train=X_train,
                X_test=X_test,
                y_train=y_train,
                y_test=y_test,
                categorical_indicator=categorical_indicator,
                my_scorer=my_scorer,
                search_time=search_time,
                memory_limit=memory_limit,
                privacy_limit=privacy_limit)
        except:
            result = 0
        dynamic_params.append(result)

        study_prune = optuna.create_study(direction='maximize')
        study_prune.optimize(
            lambda trial: optimize_accuracy_under_constraints2(
                trial=trial,
                metafeature_values_hold=metafeature_values,
                search_time=search_time,
                model_compare=model_compare,
                model_success=model_success,
                memory_limit=memory_limit,
                privacy_limit=privacy_limit,
            ),
            n_trials=500,
            n_jobs=1)

        success_result = 0
        try:
            success_result, search = utils_run_AutoML(
                study_prune.best_trial,
                X_train=X_train,
                X_test=X_test,
                y_train=y_train,
                y_test=y_test,
                categorical_indicator=categorical_indicator,
                my_scorer=my_scorer,
                search_time=search_time,
                memory_limit=memory_limit,
                privacy_limit=privacy_limit)
        except:
            success_result = 0
        static_params.append(success_result)

    comparison = np.mean(dynamic_params) - np.mean(static_params)

    return comparison, search