Esempio n. 1
0
def get_positive_neighbors_models(in_svm_data_file_template, output_file, wl_iter_range,
                                       k_range=None, radius_range=None, override_tergets_function=None):
    assert bool(k_range) ^ bool(radius_range)
    
    def compute_model(X, y, param, param_is_k):
        _, avg_count, avg_prop = get_positive_neighbors_counts(X, y, k=param if param_is_k else None, radius=None if param_is_k else param)
        model = {"wl_iterations": w, "avg_count": avg_count, "avg_prop": avg_prop}
        if param_is_k:
            model["k"] = param
        else:
            model["radius"] = param
        return model
    
    param_range = k_range if k_range else radius_range
    param_is_k = bool(k_range)
    
    with open(output_file, "w") as fl:
        for w in wl_iter_range:
            data_file = in_svm_data_file_template.format(w)
            X, y = dataset_manager.read_svm_light_bool_data_to_sparse(data_file)
            if override_tergets_function:
                y = override_tergets_function(y)
            for param in param_range:
                model = compute_model(X, y, param, param_is_k)
                fl.write("{0},\n".format(model))
                fl.flush()
                print model
                yield model
if __name__ == '__main__':
    
#     path = "/media/ivan/204C66C84C669874/Uni-Bonn/Thesis/Main/6_Results/svm/mutagenicity/"
#     path = "/media/ivan/204C66C84C669874/Uni-Bonn/Thesis/Main/6_Results/svm/nci_hiv/"
    path = "/home/stud/ivanovi/Thesis/svm/nci_hiv/"
    
#     wl_props = helpers.svm_light_format_datasets["mutagenicity"]
#     wl_props = helpers.svm_light_format_datasets["nci-hiv"]["A-vs-M"]
    wl_props = helpers.svm_light_format_datasets["nci-hiv"]["AM-vs-I"]
#     wl_props = helpers.svm_light_format_datasets["nci-hiv"]["A-vs-I"]
    
    output_file = path + "positive_neighbors_AM_vs_I"
    
    print "Start"
    
    with open(output_file, "w") as fl:
        for w in range(0, 12):
            data_file = path + wl_props["file_template"].format(w)
            X, y = dataset_manager.read_svm_light_bool_data_to_sparse(data_file)
#             y = np.vectorize(lambda t: 1 if t == 2 else -1)(y) # Only for A_vs_M
            y = np.vectorize(lambda t: 1 if t == 2 else t)(y) # Only for AM_vs_I and A_vs_I
            for n in range(1, 500):
                prediction = PositiveNeighbors.cross_validate(X, y, n_neighbors=n, folds_count=10, approximate=False)
                print w, n, prediction
                fl.write("{0}, {1}, {2}\n".format(w, n, prediction))
                fl.flush()
                if prediction >= 0.95:
                    break
    
    print "Done"
Esempio n. 3
0
    #     path = "/media/ivan/204C66C84C669874/Uni-Bonn/Thesis/Main/6_Results/svm/nci_hiv/"
    path = "/home/stud/ivanovi/Thesis/svm/nci_hiv/"

    #     wl_props = helpers.svm_light_format_datasets["mutagenicity"]
    #     wl_props = helpers.svm_light_format_datasets["nci-hiv"]["A-vs-M"]
    wl_props = helpers.svm_light_format_datasets["nci-hiv"]["AM-vs-I"]
    #     wl_props = helpers.svm_light_format_datasets["nci-hiv"]["A-vs-I"]

    output_file = path + "positive_neighbors_AM_vs_I"

    print "Start"

    with open(output_file, "w") as fl:
        for w in range(0, 12):
            data_file = path + wl_props["file_template"].format(w)
            X, y = dataset_manager.read_svm_light_bool_data_to_sparse(
                data_file)
            #             y = np.vectorize(lambda t: 1 if t == 2 else -1)(y) # Only for A_vs_M
            y = np.vectorize(lambda t: 1 if t == 2 else t)(
                y)  # Only for AM_vs_I and A_vs_I
            for n in range(1, 500):
                prediction = PositiveNeighbors.cross_validate(
                    X, y, n_neighbors=n, folds_count=10, approximate=False)
                print w, n, prediction
                fl.write("{0}, {1}, {2}\n".format(w, n, prediction))
                fl.flush()
                if prediction >= 0.95:
                    break

    print "Done"