def crossval_multilabel_dataset(path_to_data,
                                examples_count,
                                folds_count,
                                wl_iter_range,
                                k_L_range,
                                prediction_threshold_range,
                                output_dir,
                                window_size=None):
    data_file = path_to_data + "multilabel_svm_light_data_wl_{0}"
    for wl_iterations in wl_iter_range:
        for prediction_threshold in prediction_threshold_range:
            data = dataset_manager.read_svm_light_bool_data(
                data_file.format(wl_iterations))
            base_model = {
                "wl_iterations": wl_iterations,
                "pred_threshold": prediction_threshold
            }
            if window_size:
                base_model["w"] = window_size
            best_model = crossval.d_fold_crossval(
                data,
                examples_count,
                folds_count,
                k_L_range,
                output_dir,
                base_model=base_model,
                multilabel=True,
                multilabel_prediction_threshold=prediction_threshold)
            print "Best model:", best_model
Beispiel #2
0
def prepare_target_with_predictions(svm_light_val_file, predictions_file):
    val_data = dataset_manager.read_svm_light_bool_data(svm_light_val_file)
    pred_f = open(predictions_file)

    real_targets = imap(lambda x: x[0], val_data)
    pred_targets = imap(float, pred_f.readlines())

    return real_targets, pred_targets
def crossval_multilabel_dataset(path_to_data, examples_count, folds_count, wl_iter_range, k_L_range, prediction_threshold_range, output_dir, window_size=None):
    data_file = path_to_data + "multilabel_svm_light_data_wl_{0}"
    for wl_iterations in wl_iter_range:
        for prediction_threshold in prediction_threshold_range:
            data = dataset_manager.read_svm_light_bool_data(data_file.format(wl_iterations))
            base_model = {"wl_iterations": wl_iterations, "pred_threshold": prediction_threshold}
            if window_size:
                base_model["w"] = window_size
            best_model = crossval.d_fold_crossval(data, examples_count, folds_count, k_L_range, output_dir, base_model=base_model, multilabel=True, multilabel_prediction_threshold=prediction_threshold)
            print "Best model:", best_model
Beispiel #4
0
def crossval_big_dataset():
    path_to_data = "/media/ivan/204C66C84C669874/Uni-Bonn/Thesis/Main/6_Results/svm/nci_hiv/data/A_vs_M/"
    examples_count = 1503
    folds_count = 10
    data_file = path_to_data + "svm_light_data_wl_{0}"
    for wl_iterations in wl_iter_range:
        data = dataset_manager.read_svm_light_bool_data(data_file.format(wl_iterations))
        data = imap(lambda tup: (1 if tup[0] == 2 else -1, tup[1]), data) # TODO: only for A_vs_M
        base_model = {"wl_iterations": wl_iterations}
        best_model = crossval.d_fold_crossval(data, examples_count, folds_count, k_L_range, output_dir, base_model=base_model)
        print "Best model:", best_model