Exemple #1
0
def prediction(pid, tasks, queue, results, model_type, lib_path_server,
               criterion):
    model = __factory_model(model_type,
                            init_matlab=True,
                            add_path_matlab=lib_path_server,
                            DEBUG=False)
    while True:
        training = queue.get()
        if training is None: break
        model.learn(**training)
        sum80, sum65 = 0, 0
        while True:
            task = tasks.get()
            if task is None: break
            evaluate, _ = model.evaluate(task['X_test'], criterion=criterion)
            print("(pid, prediction, ground-truth) (",
                  pid,
                  evaluate,
                  task["y_test"],
                  ")",
                  flush=True)
            if task['y_test'] in evaluate:
                sum65 += u65(evaluate)
                sum80 += u80(evaluate)
        results.put(dict({'u65': sum65, 'u80': sum80}))
        queue.task_done()
    print("Worker PID finished", pid, flush=True)
def computing_precise_vs_imprecise(in_path=None,
                                   ell_optimal=0.1,
                                   cv_n_fold=10,
                                   seeds=None,
                                   lib_path_server=None,
                                   model_type_precise='lda',
                                   model_type_imprecise='ilda',
                                   scaling=True):
    data = export_data_set('iris.data') if in_path is None else pd.read_csv(
        in_path)
    logger = create_logger("computing_precise_vs_imprecise", True)
    logger.info('Training dataset and models (%s, %s, %s, %s)', in_path,
                model_type_precise, model_type_imprecise, ell_optimal)
    X = data.iloc[:, :-1].values
    if scaling: X = normalize_minmax(X)
    y = np.array(data.iloc[:, -1].tolist())
    seeds = generate_seeds(cv_n_fold) if seeds is None else seeds
    model_impr = __factory_model(model_type_imprecise,
                                 init_matlab=True,
                                 add_path_matlab=lib_path_server,
                                 DEBUG=False)
    model_prec = __factory_model_precise(model_type_precise,
                                         store_covariance=True)
    avg_imprecise, avg_precise, n_real_times = 0, 0, 0
    for time in range(cv_n_fold):
        kf = KFold(n_splits=cv_n_fold, random_state=seeds[time], shuffle=True)
        imprecise_mean, precise_mean, n_real_fold = 0, 0, 0
        for idx_train, idx_test in kf.split(y):
            X_cv_train, y_cv_train = X[idx_train], y[idx_train]
            X_cv_test, y_cv_test = X[idx_test], y[idx_test]
            model_impr.learn(X=X_cv_train, y=y_cv_train, ell=ell_optimal)
            model_prec.fit(X_cv_train, y_cv_train)
            n_real_tests, time_precise, time_imprecise = 0, 0, 0
            n_test, _ = X_cv_test.shape
            for i, test in enumerate(X_cv_test):
                evaluate_imp, _ = model_impr.evaluate(test)
                evaluate = model_prec.predict([test])
                if len(evaluate_imp) > 1:
                    n_real_tests += 1
                    if y_cv_test[i] in evaluate_imp: time_imprecise += 1
                    if y_cv_test[i] in evaluate: time_precise += 1
                logger.debug(
                    "(time, iTest, ellOptimal, cautious, prediction, ground-truth)(%s, %s, %s, %s, %s, %s)",
                    time, i, ell_optimal, evaluate_imp, evaluate, y_cv_test[i])
            logger.debug(
                "(time, ellOptimal, nRealTests, timeImprecise, timePrecise) (%s, %s, %s, %s, %s)",
                time, ell_optimal, n_real_tests, time_imprecise, time_precise)
            if n_real_tests > 0:
                n_real_fold += 1
                imprecise_mean += time_imprecise / n_real_tests
                precise_mean += time_precise / n_real_tests
        logger.debug("(time, nRealFold, imprecise, precise) (%s, %s, %s, %s)",
                     time, n_real_fold, imprecise_mean, precise_mean)
        if n_real_fold > 0:
            n_real_times += 1
            avg_imprecise += imprecise_mean / n_real_fold
            avg_precise += precise_mean / n_real_fold
    logger.debug("(dataset, models, imprec, prec) (%s, %s, %s, %s, %s)",
                 in_path, model_type_imprecise, model_type_precise,
                 avg_imprecise / n_real_times, avg_precise / n_real_times)
def output_paper_result(model_type="ieda", ell=0.5, hgrid=0.1):
    data = export_data_set('bin_normal_rnd.data')
    model = __factory_model(model_type, DEBUG=True)
    __test_imprecise_model(model,
                           data,
                           features=[1, 2],
                           hgrid=hgrid,
                           ell=ell,
                           clazz=0)
def computing_best_imprecise_mean(in_path=None, out_path=None, cv_nfold=10, model_type="ieda", test_size=0.4,
                                  from_ell=0.1, to_ell=1.0, by_ell=0.1, seed=None, lib_path_server=None, scaling=False):
    assert os.path.exists(in_path), "Without training data, not testing"
    assert os.path.exists(out_path), "File for putting results does not exist"

    logger = create_logger("computing_best_imprecise_mean", True)
    logger.info('Training dataset %s', in_path)
    data = pd.read_csv(in_path)  # , header=None)
    X = data.iloc[:, :-1].values
    if scaling: X = normalize_minmax(X)
    y = np.array(data.iloc[:, -1].tolist())

    ell_u65, ell_u80 = dict(), dict()
    seed = random.randrange(pow(2, 30)) if seed is None else seed
    logger.debug("MODEL: %s, SEED: %s", model_type, seed)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=seed)
    kf = KFold(n_splits=cv_nfold, random_state=None, shuffle=True)
    splits = list([])
    for idx_train, idx_test in kf.split(y_train):
        splits.append((idx_train, idx_test))
        logger.info("Splits %s train %s", len(splits), idx_train)
        logger.info("Splits %s test %s", len(splits), idx_test)

    # Create a CSV file for saving results
    file_csv = open(out_path, 'a')
    writer = csv.writer(file_csv)

    model = __factory_model(model_type, solver_matlab=True, add_path_matlab=lib_path_server, DEBUG=True)
    for ell_current in np.arange(from_ell, to_ell, by_ell):
        ell_u65[ell_current], ell_u80[ell_current] = 0, 0
        logger.info("ELL_CURRENT %s", ell_current)
        for idx_train, idx_test in splits:
            logger.info("Splits train %s", idx_train)
            logger.info("Splits test %s", idx_test)
            X_cv_train, y_cv_train = X_train[idx_train], y_train[idx_train]
            X_cv_test, y_cv_test = X_train[idx_test], y_train[idx_test]
            model.learn(X=X_cv_train, y=y_cv_train, ell=ell_current)
            sum_u65, sum_u80 = 0, 0
            n_test = len(idx_test)
            for i, test in enumerate(X_cv_test):
                evaluate = model.evaluate(test)
                logger.debug("(testing, ell_current, prediction, ground-truth) (%s, %s, %s, %s)",
                             i, ell_current, evaluate, y_cv_test[i])
                if y_cv_test[i] in evaluate:
                    sum_u65 += u65(evaluate)
                    sum_u80 += u80(evaluate)
            ell_u65[ell_current] += sum_u65 / n_test
            ell_u80[ell_current] += sum_u80 / n_test
            logger.debug("Partial-kfold (%s, %s, %s)", ell_current, ell_u65[ell_current], ell_u80[ell_current])
        ell_u65[ell_current] = ell_u65[ell_current] / cv_nfold
        ell_u80[ell_current] = ell_u80[ell_current] / cv_nfold
        writer.writerow([ell_current, ell_u65[ell_current], ell_u80[ell_current]])
        file_csv.flush()
        logger.debug("Partial-ell (%s, %s, %s)", ell_current, ell_u65, ell_u80)
    file_csv.close()
    logger.debug("Total-ell %s %s %s", in_path, ell_u65, ell_u80)
Exemple #5
0
def output_paper_zone_im_precise(is_imprecise=True,
                                 model_type="ieda",
                                 in_train=None,
                                 ell=2.0,
                                 hgrid=0.1,
                                 features=None,
                                 criterion="maximality",
                                 cmap_color=None):
    data = export_data_set('iris.data') if in_train is None else pd.read_csv(in_train)
    features = list([0, 1]) if features is None else features
    model = __factory_model(model_type, DEBUG=True, solver_matlab=False) if is_imprecise else None
    __test_imprecise_model(model, data,
                           features=features, hgrid=hgrid, ell=ell,
                           query=None,
                           is_imprecise=is_imprecise,
                           cmap_color=plt.cm.gist_ncar if cmap_color is None else cmap_color,
                           criterion=criterion)
Exemple #6
0
def performance_accuracy_hold_out(in_path=None,
                                  model_type="ilda",
                                  ell_optimal=0.1,
                                  lib_path_server=None,
                                  seeds=None,
                                  DEBUG=False,
                                  scaling=False):
    assert os.path.exists(
        in_path
    ), "Without training data, cannot performing cross hold-out accuracy"
    logger = create_logger("performance_accuracy_hold_out", True)
    logger.info('Training dataset (%s, %s, %s)', in_path, model_type,
                ell_optimal)
    X, y = dataset_to_Xy(in_path, scaling=scaling)

    seeds = generate_seeds(cv_n_fold) if seeds is None else seeds
    logger.info('Seeds used for accuracy %s', seeds)
    n_time = len(seeds)
    mean_u65, mean_u80 = 0, 0
    model = __factory_model(model_type,
                            solver_matlab=True,
                            add_path_matlab=lib_path_server,
                            DEBUG=DEBUG)
    for k in range(0, n_time):
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.4, random_state=seeds[k])
        model.learn(X=X_cv_train, y=y_cv_train, ell=ell_optimal)
        sum_u65, sum_u80 = 0, 0
        n_test, _ = X_test.shape
        for i, test in enumerate(X_test):
            evaluate = lqa.evaluate(test)
            logger.debug(
                "(testing, ell_current, prediction, ground-truth) (%s, %s, %s, %s)",
                i, ell_optimal, evaluate, y_test[i])
            if y_test[i] in evaluate:
                sum_u65 += u65(evaluate)
                sum_u80 += u80(evaluate)
        logger.debug("Partial-kfold (%s, %s, %s, %s)", ell_current, k,
                     sum_u65 / n_test, sum_u80 / n_test)
        mean_u65 += sum_u65 / n_test
        mean_u80 += sum_u80 / n_test
    mean_u65 = mean_u65 / n_time
    mean_u80 = mean_u80 / n_time
    logger.debug("Total-ell (%s, %s, %s, %s)", in_path, ell_optimal, mean_u65,
                 mean_u80)
Exemple #7
0
def computing_time_prediction(in_path=None,
                              ell_optimal=0.1,
                              lib_path_server=None,
                              model_type="ilda",
                              criterion="maximality",
                              k_repetition=10,
                              seeds=None):
    assert os.path.exists(in_path), "Without training data, not testing"
    data = pd.read_csv(in_path, header=None)
    logger = create_logger("computing_time_prediction", True)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].tolist()
    seeds = generate_seeds(k_repetition) if seeds is None else seeds
    logger.info(
        'Training dataset %s with maximality version (%s) and model (%s), ell_optimal (%s) and seeds %s',
        in_path, criterion, model_type, ell_optimal, seeds)
    model = __factory_model(model_type,
                            solver_matlab=True,
                            add_path_matlab=lib_path_server,
                            DEBUG=False)
    avg = np.array([])
    for k in range(k_repetition):
        logger.info("%s-fold repetition randomly, seed %s", k, seeds[k])
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=seeds[k])
        model.learn(X=X_train, y=y_train, ell=ell_optimal)
        n, _ = X_test.shape
        sum_time = 0
        for i, test in enumerate(X_test):
            start = time.time()
            evaluate = model.evaluate(test, criterion=criterion)
            end = time.time()
            logger.info("Evaluate %s, Ground-truth %s, Time %s ", evaluate,
                        y_test[i], (end - start))
            sum_time += (end - start)
        avg = np.append(avg, sum_time / n)
    logger.info("Total time (%s, %s) and average %s and sd %s of %s testing",
                in_path, avg, np.mean(avg), np.std(avg), n)