コード例 #1
0
def grid_search_AB():
    grid_search(data, lambda **kwargs: TimeCombiner(AvgModel(), TimeConcepts(concepts=concepts, **kwargs)),
                {"K": 0.25}, {
                    "alpha": np.arange(0.2, 0.7, 0.2),
                    "beta": np.arange(0.02, 0.1, 0.02),
                }, plot_axes=['alpha', 'beta'], time=True,
                )
コード例 #2
0
def grid_search_AB2():
    grid_search(data, lambda **kwargs: TimeCombiner(AvgModel(), TimePriorCurrentModel(**kwargs)),
                {"KI": 0.3, 'KC': 0.3}, {
                    "alpha": np.arange(0.2, 1.1, 0.2),
                    "beta": np.arange(0.02, 0.2, 0.02),
                }, plot_axes=['alpha', 'beta'], time=True,
                )
コード例 #3
0
ファイル: mathgarden.py プロジェクト: thran/experiments2.0
def grid_search_AB():
    grid_search(data, lambda **kwargs: TimeCombiner(AvgModel(), BasicTimeModel(**kwargs)),
                {"K": 0.2}, {
                    "alpha": np.arange(0.4, 1.3, 0.2),
                    "beta": np.arange(0.06, 0.2, 0.02),
                }, plot_axes=['alpha', 'beta'], time=True,
                )
コード例 #4
0
def grid(data, model):
    utils.grid_search(data, model,
          {"KC": 3, "KI": 0.5}, {
          # {"alpha": 0.25, "beta": 0.02}, {
              "alpha": np.arange(0.4, 1.7, 0.2),
              "beta": np.arange(0., 0.2, 0.02),
              # "KC": np.arange(1.5, 5.0, 0.25),
              # "KI": np.arange(0, 2.5, 0.25),
          # }, plot_axes=["KC", "KI"])
        }, plot_axes=["alpha", "beta"])

    plt.show()
コード例 #5
0
def grid_search_Ks():
    grid_search(data, lambda **kwargs: TimeCombiner(AvgModel(), TimePriorCurrentModel(**kwargs)),
                {"alpha": 0.6, "beta": 0.1},
                {"KC": np.arange(0.1, 0.7, 0.1),"KI": np.arange(0.1, 0.7, 0.1)},
                plot_axes=['KI', 'KC'], time=True,
                )
コード例 #6
0
def grid_search_K():
    grid_search(data, lambda **kwargs: TimeCombiner(AvgModel(), TimeConcepts(concepts=concepts, **kwargs)),
                {"alpha": 0.4, "beta": 0.05},
                {"K": np.arange(0, 0.5, 0.05)},
                plot_axes='K', time=True,
                )
コード例 #7
0
ファイル: DecisionTree.py プロジェクト: naesseth/platipus
def run_model(DecisionTree_params, category):
    """Full-scale training, validation and testing using all amines.
    Args:
        DecisionTree_params:         A dictionary of the parameters for the decision tree model.
                                        See initialize() for more information.
        category:                    A string representing the category the model is classified under.
    """

    # Feature names hard-coded for decision tree visualization
    features = [
        '_rxn_M_acid', '_rxn_M_inorganic', '_rxn_M_organic', '_solv_GBL',
        '_solv_DMSO', '_solv_DMF', '_stoich_mmol_org', '_stoich_mmol_inorg',
        '_stoich_mmol_acid', '_stoich_mmol_solv', '_stoich_org/solv',
        '_stoich_inorg/solv', '_stoich_acid/solv', '_stoich_org+inorg/solv',
        '_stoich_org+inorg+acid/solv', '_stoich_org/liq', '_stoich_inorg/liq',
        '_stoich_org+inorg/liq', '_stoich_org/inorg', '_stoich_acid/inorg',
        '_rxn_Temperature_C', '_rxn_Reactiontime_s', '_feat_AvgPol',
        '_feat_Refractivity', '_feat_MaximalProjectionArea',
        '_feat_MaximalProjectionRadius', '_feat_maximalprojectionsize',
        '_feat_MinimalProjectionArea', '_feat_MinimalProjectionRadius',
        '_feat_minimalprojectionsize', '_feat_MolPol',
        '_feat_VanderWaalsSurfaceArea', '_feat_ASA', '_feat_ASA_H',
        '_feat_ASA_P', '_feat_ASA-', '_feat_ASA+',
        '_feat_ProtPolarSurfaceArea', '_feat_Hacceptorcount',
        '_feat_Hdonorcount', '_feat_RotatableBondCount',
        '_raw_standard_molweight', '_feat_AtomCount_N', '_feat_BondCount',
        '_feat_ChainAtomCount', '_feat_RingAtomCount', '_feat_primaryAmine',
        '_feat_secondaryAmine', '_rxn_plateEdgeQ', '_feat_maxproj_per_N',
        '_raw_RelativeHumidity'
    ]

    # Unload common parameters
    config = DecisionTree_params['configs'][category] if DecisionTree_params[
        'configs'] else None
    verbose = DecisionTree_params['verbose']
    warning = DecisionTree_params['warning']
    stats_path = DecisionTree_params['stats_path']
    result_dict = DecisionTree_params['result_dict']

    model_name = DecisionTree_params['model_name']
    print(f'Running model {model_name}')

    # Unload the training data specific parameters
    num_draws = DecisionTree_params['num_draws']
    train_size = DecisionTree_params['train_size']
    active_learning_iter = DecisionTree_params['active_learning_iter']
    cross_validation = DecisionTree_params['cross_validate']
    full = DecisionTree_params['full_dataset']
    active_learning = DecisionTree_params['active_learning']
    w_hx = DecisionTree_params['with_historical_data']
    w_k = DecisionTree_params['with_k']
    draw_success = DecisionTree_params['draw_success']

    # Specify the desired operation
    fine_tuning = DecisionTree_params['fine_tuning']
    save_model = DecisionTree_params['save_model']
    visualize = DecisionTree_params['visualize']
    to_file = True

    if fine_tuning:
        class_weights = [{
            0: i,
            1: 1.0 - i
        } for i in np.linspace(.05, .95, num=50)]
        class_weights.append('balanced')
        class_weights.append(None)

        max_depths = [i for i in range(9, 26)]
        max_depths.append(None)

        ft_params = {
            'criterion': ['gini', 'entropy'],
            'splitter': ['best', 'random'],
            'max_depth': max_depths,
            'min_samples_split': [i for i in range(2, 11)],
            'min_samples_leaf': [i for i in range(1, 4)],
            'class_weight': class_weights
        }

        result_path = './results/ft_{}.pkl'.format(model_name)

        grid_search(ActiveDecisionTree,
                    ft_params,
                    result_path,
                    num_draws,
                    train_size,
                    active_learning_iter,
                    active_learning=active_learning,
                    w_hx=w_hx,
                    w_k=w_k,
                    draw_success=draw_success,
                    result_dict=result_dict,
                    model_name=model_name)

    else:
        # Load the desired sized dataset under desired option
        dataset = process_dataset(num_draw=num_draws,
                                  train_size=train_size,
                                  active_learning_iter=active_learning_iter,
                                  verbose=verbose,
                                  cross_validation=cross_validation,
                                  full=full,
                                  active_learning=active_learning,
                                  w_hx=w_hx,
                                  w_k=w_k,
                                  success=draw_success)

        draws = list(dataset.keys())
        amine_list = list(dataset[0]['x_t'].keys())

        for amine in amine_list:
            # Create the decision tree model instance for the specific amine
            ADT = ActiveDecisionTree(amine=amine,
                                     config=config,
                                     verbose=verbose,
                                     stats_path=stats_path,
                                     result_dict=result_dict,
                                     model_name=model_name)
            for set_id in draws:
                # Unload the randomly drawn dataset values
                x_t, y_t, x_v, y_v, all_data, all_labels = dataset[set_id]['x_t'], \
                                                           dataset[set_id]['y_t'], \
                                                           dataset[set_id]['x_v'], \
                                                           dataset[set_id]['y_v'], \
                                                           dataset[set_id]['all_data'], \
                                                           dataset[set_id]['all_labels']

                # Load the training and validation set into the model
                ADT.load_dataset(set_id, x_t[amine], y_t[amine], x_v[amine],
                                 y_v[amine], all_data[amine],
                                 all_labels[amine])

                # Train the data on the training set
                ADT.train(warning=warning)

                # Conduct active learning with all the observations available in the pool
                if active_learning:
                    ADT.active_learning(num_iter=active_learning_iter,
                                        warning=warning)

                if visualize:
                    # Plot the decision tree
                    # To compile the graph, use the following command in terminal
                    # dot -Tpng "{dt_file_name}.dot" -o "{desired file name}.png"
                    # If using Jupyter Notebook, add ! in front to run command lines
                    file_name = './results/{0:s}_dt_{1:s}_{2:d}.dot'.format(
                        model_name, amine, set_id)
                    export_graphviz(ADT.model,
                                    feature_names=features,
                                    class_names=['FAILURE', 'SUCCESS'],
                                    out_file=file_name,
                                    filled=True,
                                    rounded=True,
                                    special_characters=True)

            if to_file:
                ADT.store_metrics_to_file()

            # Save the model for future reproducibility
            if save_model:
                ADT.save_model(model_name)
コード例 #8
0
def run_model(RandomForest_params, category):
    """Full-scale training, validation and testing using all amines.
    Args:
        RandomForest_params:         A dictionary of the parameters for the random forest model.
                                        See initialize() for more information.
        category:                    A string representing the category the model is classified under.
    """

    # Unload common parameters
    config = RandomForest_params['config'][category] if RandomForest_params[
        'config'] else None
    verbose = RandomForest_params['verbose']
    warning = RandomForest_params['warning']
    stats_path = RandomForest_params['stats_path']
    result_dict = RandomForest_params['result_dict']

    model_name = RandomForest_params['model_name']
    print(f'Running model {model_name}')

    # Unload the training data specific parameters
    num_draws = RandomForest_params['num_draws']
    train_size = RandomForest_params['train_size']
    cross_validation = RandomForest_params['cross_validate']
    active_learning = RandomForest_params['active_learning']
    w_hx = RandomForest_params['with_historical_data']
    w_k = RandomForest_params['with_k']
    active_learning_iter = RandomForest_params['active_learning_iter']
    full = RandomForest_params['full_dataset']
    draw_success = RandomForest_params['draw_success']

    # Specify the desired operation
    fine_tuning = RandomForest_params['fine_tuning']
    save_model = RandomForest_params['save_model']
    to_file = True

    if fine_tuning:
        class_weights = [{
            0: i,
            1: 1.0 - i
        } for i in np.linspace(.05, .95, num=50)]
        class_weights.append('balanced')
        class_weights.append(None)

        ft_params = {
            'n_estimators': [100, 200, 500, 1000],
            'criterion': ['gini', 'entropy'],
            'max_depth': [i for i in range(1, 9)],
            'max_features': ['auto', 'sqrt', 'log2', None],
            'bootstrap': [True],
            'min_samples_leaf': [i for i in range(1, 6)],
            'min_samples_split': [i for i in range(2, 11)],
            'ccp_alpha': [.1 * i for i in range(1)],
            'class_weight': class_weights
        }

        result_path = './results/ft_{}.pkl'.format(model_name)

        grid_search(ActiveRandomForest,
                    ft_params,
                    result_path,
                    num_draws,
                    train_size,
                    active_learning_iter,
                    active_learning=active_learning,
                    w_hx=w_hx,
                    w_k=w_k,
                    draw_success=draw_success,
                    result_dict=result_dict,
                    model_name=model_name)

    else:
        # Load the desired sized dataset under desired option
        dataset = process_dataset(num_draw=num_draws,
                                  train_size=train_size,
                                  active_learning_iter=active_learning_iter,
                                  verbose=verbose,
                                  cross_validation=cross_validation,
                                  full=full,
                                  active_learning=active_learning,
                                  w_hx=w_hx,
                                  w_k=w_k,
                                  success=draw_success)

        draws = list(dataset.keys())
        amine_list = list(dataset[0]['x_t'].keys())

        for amine in amine_list:

            # Create the RandomForest model instance for the specific amine
            ARF = ActiveRandomForest(amine=amine,
                                     config=config,
                                     verbose=verbose,
                                     stats_path=stats_path,
                                     result_dict=result_dict,
                                     model_name=model_name)

            for set_id in draws:
                # Unload the randomly drawn dataset values
                x_t, y_t, x_v, y_v, all_data, all_labels = dataset[set_id]['x_t'], \
                                                           dataset[set_id]['y_t'], \
                                                           dataset[set_id]['x_v'], \
                                                           dataset[set_id]['y_v'], \
                                                           dataset[set_id]['all_data'], \
                                                           dataset[set_id]['all_labels']

                # Load the training and validation set into the model
                ARF.load_dataset(set_id, x_t[amine], y_t[amine], x_v[amine],
                                 y_v[amine], all_data[amine],
                                 all_labels[amine])

                # Train the data on the training set
                ARF.train(warning=warning)

                # Conduct active learning with all the observations available in the pool
                if active_learning:
                    ARF.active_learning(num_iter=active_learning_iter,
                                        warning=warning)

            if to_file:
                ARF.store_metrics_to_file()

            # Save the model for future reproducibility
            if save_model:
                ARF.save_model(model_name)
コード例 #9
0
ファイル: LinearSVM.py プロジェクト: naesseth/platipus
def run_model(LinearSVM_params, category):
    """Full-scale training, validation and testing using all amines.

    Args:
        LinearSVM_params:         A dictionary of the parameters for the LinearSVM model.
                                See initialize() for more information.
        category:           A string representing the category the model is classified under.
     """

    # Unload common parameters
    config = LinearSVM_params['configs'][category] if LinearSVM_params[
        'configs'] else None
    verbose = LinearSVM_params['verbose']
    warning = LinearSVM_params['warning']
    stats_path = LinearSVM_params['stats_path']

    model_name = LinearSVM_params['model_name']
    print(f'Running model {model_name}')

    # Unload the training data specific parameters
    train_size = LinearSVM_params['train_size']
    active_learning_iter = LinearSVM_params['active_learning_iter']
    cross_validation = LinearSVM_params['cross_validate']
    full = LinearSVM_params['full_dataset']
    active_learning = LinearSVM_params['active_learning']
    w_hx = LinearSVM_params['with_historical_data']
    w_k = LinearSVM_params['with_k']

    # Specify the desired operation
    fine_tuning = LinearSVM_params['fine_tuning']
    save_model = LinearSVM_params['save_model']
    to_params = True

    if fine_tuning:
        class_weights = [{
            0: i,
            1: 1.0 - i
        } for i in np.linspace(.1, .9, num=9)]
        class_weights.append('balanced')
        class_weights.append(None)

        ft_params = {
            # 'penalty': ['l1', 'l2'],
            'penalty': ['l1'],
            # 'loss': ['hinge', 'squared_hinge'],
            'loss': ['squared_hinge'],
            'dual': [False],
            # 'C': [.001, .01, .1, 1, 10],
            'C': [i for i in np.linspace(0.001, 0.01, num=10)],
            # 'tol': [.0001, .001, .01, .1, 1],
            'tol': [i for i in np.linspace(0.01, 0.1, num=10)],
            'fit_intercept': [True],
            'class_weight': class_weights,
        }

        _ = grid_search(ActiveLinearSVM,
                        ft_params,
                        train_size,
                        active_learning_iter,
                        active_learning=active_learning,
                        w_hx=w_hx,
                        w_k=w_k,
                        info=True)
    else:
        # Load the desired sized dataset under desired option
        amine_list, x_t, y_t, x_v, y_v, all_data, all_labels = process_dataset(
            train_size=train_size,
            active_learning_iter=active_learning_iter,
            verbose=verbose,
            cross_validation=cross_validation,
            full=full,
            active_learning=active_learning,
            w_hx=w_hx,
            w_k=w_k)

        # print(amine_list)
        for amine in amine_list:
            if cross_validation:
                # print("Training and cross validation on {} amine.".format(amine))

                # Create the LinearSVM model instance for the specific amine
                ALSVM = ActiveLinearSVM(amine=amine,
                                        config=config,
                                        verbose=verbose,
                                        stats_path=stats_path,
                                        model_name=model_name)

                # Load the training and validation set into the model
                ALSVM.load_dataset(x_t[amine], y_t[amine], x_v[amine],
                                   y_v[amine], all_data[amine],
                                   all_labels[amine])

                # Train the data on the training set
                ALSVM.train(warning=warning)

                # Conduct active learning with all the observations available in the pool
                if active_learning:
                    ALSVM.active_learning(num_iter=active_learning_iter,
                                          warning=warning,
                                          to_params=to_params)
                else:
                    ALSVM.store_metrics_to_params()

                # Save the model for future reproducibility
                if save_model:
                    ALSVM.save_model(model_name)
コード例 #10
0
    SkipHandler(EloHierarchicalModel(KC=1, KI=0.75, alpha=0.8, beta=0.02)),
    # EloHierarchicalModel(alpha=0.25, beta=0.02),
    # EloConcepts(),
], dont=0, force_evaluate=0, force_run=0, runs=5, hue_order=False, answer_filters={
    "long (50) student": data.filter_students_with_many_answers(),
    "long (30) student": data.filter_students_with_many_answers(number_of_answers=30),
    "long (11) student": data.filter_students_with_many_answers(number_of_answers=11),
    "response >5s-0.5": data.transform_response_by_time(((5, 0.5),))
},
               # palette=sns.color_palette()[:2] * 4
               )



# evaluator.Evaluator(d, EloHierarchicalModel(alpha=0.25, beta=0.02)).brier_graphs()
# evaluator.Evaluator(d, EloPriorCurrentModel()).brier_graphs()
# evaluator.Evaluator(d, ItemAvgModel()).brier_graphs()

if 0:
    utils.grid_search(d, EloHierarchicalModel,
                      # {"KC": 1, "KI": 0.75}, {
                      {"alpha": 0.25, "beta": 0.02}, {
        # "alpha": np.arange(0.2, 1.3, 0.2),
        # "beta": np.arange(0., 0.2, 0.02),
        "KC": np.arange(1.5, 5.0, 0.25),
        "KI": np.arange(1.25, 4.5, 0.25),
    }, plot_axes=["KC", "KI"])
    # }, plot_axes=["alpha", "beta"])

plt.show()
コード例 #11
0
ファイル: mathgarden.py プロジェクト: thran/experiments2.0
def grid_search_K():
    grid_search(data, lambda **kwargs: TimeCombiner(AvgModel(), BasicTimeModel(**kwargs)),
                {"alpha": 0.6, "beta": 0.1},
                {"K": np.arange(0, 1, 0.05)},
                plot_axes='K', time=True,
                )
コード例 #12
0
def run_model(GradientBoosting_params, category):
    """Full-scale training, validation and testing using all amines.
    Args:
        GradientBoosting_params:         A dictionary of the parameters for the Gradient Boosting model.
                                            See initialize() for more information.
        category:                        A string representing the category the model is classified under.
    """

    # Unload common parameters
    config = GradientBoosting_params['config'][
        category] if GradientBoosting_params['config'] else None
    verbose = GradientBoosting_params['verbose']
    warning = GradientBoosting_params['warning']
    stats_path = GradientBoosting_params['stats_path']
    result_dict = GradientBoosting_params['result_dict']

    model_name = GradientBoosting_params['model_name']
    print(f'Running model {model_name}')

    # Unload the training data specific parameters
    num_draws = GradientBoosting_params['num_draws']
    train_size = GradientBoosting_params['train_size']
    active_learning_iter = GradientBoosting_params['active_learning_iter']
    active_learning = GradientBoosting_params['active_learning']
    cross_validation = GradientBoosting_params['cross_validate']
    full = GradientBoosting_params['full_dataset']
    w_hx = GradientBoosting_params['with_historical_data']
    w_k = GradientBoosting_params['with_k']
    draw_success = GradientBoosting_params['draw_success']

    # Specify the desired operation
    fine_tuning = GradientBoosting_params['fine_tuning']
    save_model = GradientBoosting_params['save_model']
    to_file = True

    if fine_tuning:
        ft_params = {
            'loss': ['deviance', 'exponential'],
            'learning_rate': [0.1, 0.01, 0.001],
            'n_estimators': [100, 200, 500, 1000],
            'criterion': ['friedman_mse', 'mse', 'mae'],
            'max_depth': [i for i in range(1, 9)],
            'max_features': ['auto', 'sqrt', 'log2', None],
            'min_samples_leaf': [1, 2, 3],
            'min_samples_split': [2, 5, 10],
            'ccp_alpha': [.1 * i for i in range(1)]
        }

        result_path = './results/ft_{}.pkl'.format(model_name)

        grid_search(ActiveGradientBoosting,
                    ft_params,
                    result_path,
                    num_draws,
                    train_size,
                    active_learning_iter,
                    active_learning=active_learning,
                    w_hx=w_hx,
                    w_k=w_k,
                    draw_success=draw_success,
                    result_dict=result_dict,
                    model_name=model_name)

    else:
        # Load the desired sized dataset under desired option
        dataset = process_dataset(num_draw=num_draws,
                                  train_size=train_size,
                                  active_learning_iter=active_learning_iter,
                                  verbose=verbose,
                                  cross_validation=cross_validation,
                                  full=full,
                                  active_learning=active_learning,
                                  w_hx=w_hx,
                                  w_k=w_k,
                                  success=draw_success)

        draws = list(dataset.keys())
        amine_list = list(dataset[0]['x_t'].keys())

        # print(training_batches.keys())
        for amine in amine_list:
            if amine == 'XZUCBFLUEBDNSJ-UHFFFAOYSA-N' and draw_success:
                # Skipping the amine with only 1 successful experiment overall
                # Can't run 4-ii and 5-ii models on this amine
                continue
            else:
                # Create the GradientBoosting model instance for the specific amine
                AGB = ActiveGradientBoosting(amine=amine,
                                             config=config,
                                             verbose=verbose,
                                             stats_path=stats_path,
                                             result_dict=result_dict,
                                             model_name=model_name)
                for set_id in draws:
                    # Unload the randomly drawn dataset values
                    x_t, y_t, x_v, y_v, all_data, all_labels = dataset[set_id]['x_t'], \
                                                               dataset[set_id]['y_t'], \
                                                               dataset[set_id]['x_v'], \
                                                               dataset[set_id]['y_v'], \
                                                               dataset[set_id]['all_data'], \
                                                               dataset[set_id]['all_labels']
                    # Load the training and validation set into the model
                    AGB.load_dataset(set_id, x_t[amine], y_t[amine],
                                     x_v[amine], y_v[amine], all_data[amine],
                                     all_labels[amine])

                    # Train the data on the training set
                    AGB.train(warning=warning)

                    # Conduct active learning with all the observations available in the pool
                    if active_learning:
                        AGB.active_learning(num_iter=active_learning_iter,
                                            warning=warning)

                if to_file:
                    AGB.store_metrics_to_file()

                # Save the model for future reproducibility
                if save_model:
                    AGB.save_model(model_name)
コード例 #13
0
ファイル: KNN.py プロジェクト: naesseth/platipus
def run_model(KNN_params, category):
    """Full-scale training, validation and testing using all amines.

    Args:
        KNN_params:         A dictionary of the parameters for the KNN model.
                                See initialize() for more information.
        category:           A string representing the category the model is classified under.
    """

    # Unload common parameters
    config = KNN_params['configs'][category] if KNN_params['configs'] else None
    verbose = KNN_params['verbose']
    warning = KNN_params['warning']
    stats_path = KNN_params['stats_path']
    result_dict = KNN_params['result_dict']

    model_name = KNN_params['model_name']
    print(f'Running model {model_name}')

    # Unload the training data specific parameters
    num_draws = KNN_params['num_draws']
    train_size = KNN_params['train_size']
    active_learning_iter = KNN_params['active_learning_iter']
    cross_validation = KNN_params['cross_validate']
    full = KNN_params['full_dataset']
    active_learning = KNN_params['active_learning']
    w_hx = KNN_params['with_historical_data']
    w_k = KNN_params['with_k']
    draw_success = KNN_params['draw_success']

    # Specify the desired operation
    fine_tuning = KNN_params['fine_tuning']
    save_model = KNN_params['save_model']
    to_file = True

    if fine_tuning:
        # Set all possible combinations
        ft_params = {
            'n_neighbors': [i for i in range(1, 10)],
            'leaf_size': [i for i in range(1, 51)],
            'p': [i for i in range(1, 4)]
        }

        result_path = './results/ft_{}.pkl'.format(model_name)

        grid_search(
            ActiveKNN,
            ft_params,
            result_path,
            num_draws,
            train_size,
            active_learning_iter,
            active_learning=active_learning,
            w_hx=w_hx,
            w_k=w_k,
            draw_success=draw_success,
            result_dict=result_dict,
            model_name=model_name,
        )

    else:
        # Load the desired sized dataset under desired option
        dataset = process_dataset(
            num_draw=num_draws,
            train_size=train_size,
            active_learning_iter=active_learning_iter,
            verbose=verbose,
            cross_validation=cross_validation,
            full=full,
            active_learning=active_learning,
            w_hx=w_hx,
            w_k=w_k,
            success=draw_success,
        )

        draws = list(dataset.keys())
        amine_list = list(dataset[0]['x_t'].keys())

        for amine in amine_list:
            # Create the KNN model instance for the specific amine
            KNN = ActiveKNN(amine=amine, config=config, verbose=verbose, stats_path=stats_path, result_dict=result_dict,
                            model_name=model_name)
            for set_id in draws:
                # Unload the randomly drawn dataset values
                x_t, y_t, x_v, y_v, all_data, all_labels = dataset[set_id]['x_t'], \
                                                           dataset[set_id]['y_t'], \
                                                           dataset[set_id]['x_v'], \
                                                           dataset[set_id]['y_v'], \
                                                           dataset[set_id]['all_data'], \
                                                           dataset[set_id]['all_labels']

                # Load the training and validation set into the model
                KNN.load_dataset(set_id, x_t[amine], y_t[amine], x_v[amine], y_v[amine], all_data[amine],
                                 all_labels[amine])

                # Train the data on the training set
                KNN.train(warning=warning)

                # Conduct active learning with all the observations available in the pool
                if active_learning:
                    KNN.active_learning(num_iter=active_learning_iter, warning=warning)

            if to_file:
                KNN.store_metrics_to_file()

            # Save the model for future reproducibility
            if save_model:
                KNN.save_model(model_name)