Beispiel #1
0
def disc_validate(agent, valid_feed, config, sample_shape, batch_cnt=None):
    # take all of validation data at one time.
    with torch.no_grad():
        agent.eval()
        valid_feed.epoch_init(config, shuffle=False, verbose=True)
        losses = LossManager()
        acc_feed = np.array([0, 0, 0, 0, 0.0, 0.0])
        if config.gan_type == 'wgan':
            acc_feed = np.array([0, 0])
        batch_num = 0
        while True:
            batch = valid_feed.next_batch()
            if batch is None:
                break
            loss, acc = agent.disc_train(sample_shape, batch)
            # wgan_reward.append(torch.stack(acc))
            acc_feed = acc_feed + acc
            losses.add_loss(loss)
            losses.add_backward_loss(
                agent.discriminator.model_sel_loss(loss, batch_cnt))
            batch_num += 1
    valid_loss = losses.avg_loss()
    logger.info(losses.pprint(valid_feed.name))
    logger.info("Total valid loss {}".format(valid_loss))
    if config.gan_type == 'gan':
        print_accuracy(acc_feed, batch_num, config)
    else:
        logger.info("Wgan Disc Real and Fake Score: {}, {}".format(
            acc_feed[0] / batch_num, acc_feed[1] / batch_num))
    return valid_loss
Beispiel #2
0
 def evaluate(self, X_va=[], y_va=[]):
     print('Training performance:', end='  ')
     acc1, acc5 = print_accuracy(self.y_tr, self.model.predict_proba(self.X_tr))
     print(f'Top-1 accuracy={acc1:.4f}, Top-5 accuracy={acc5:.4f}')
     d_accuracy = {'top-1': [acc1], 'top-5': [acc5]}
     if len(X_va) + len(y_va) > 0:
         print('Evaluation performance', end='  ')
         acc1, acc5 = print_accuracy(y_va, self.model.predict_proba(X_va))
         print(f'Top-1 accuracy={acc1:.4f}, Top-5 accuracy={acc5:.4f}')
         d_accuracy['top-1'].append(acc1)
         d_accuracy['top-5'].append(acc5)
         pd.DataFrame(d_accuracy, index=['train', 'val']).to_csv(self.fn.replace('.pkl', '.csv'))
def test_model(sess_test, objData):

    # sess_test : session en tensorflow
    # objData   : datos de test
    total = objData.total_images
    mbach = objData.minibatch

    if ((total / mbach) - int(total / mbach)) > 0:
        itertotal = int(total / mbach) + 1
    else:
        itertotal = int(total / mbach)

    count_success = 0
    count_by_class = np.zeros([num_class, num_class])
    prob_predicted = []

    # Iteraciones por Batch, en cada iteracion la session de tensorflow procesa los 'n' datos de entrada
    # donde 'n' es el 'mini_batch_test'
    print('\n# PHASE: Test classification')
    for i in range(itertotal):

        # Generamos el batch y sus respectivas etiquetas
        # el batch generado contiene las 'n' primeras imagenes
        batch, label = objData.generate_batch()

        # ejecutamos el grafo de tensorflow y almacenamos el vector de la ultima capa
        prob, layer = sess_test.run([vgg.prob, vgg.relu6],
                                    feed_dict={
                                        vgg_batch: batch,
                                        train_mode: False
                                    })

        # save output of a layer
        # utils.save_layer_output(layer, label, name='Train_SNC4_relu6')

        # Acumulamos la presicion de cada iteracion, para despues hacer un promedio
        count, count_by_class, prob_predicted = utils.print_accuracy(
            label,
            prob,
            matrix_confusion=count_by_class,
            predicted=prob_predicted)
        count_success = count_success + count

        # hacemos que el batch apunte a los siguiente grupo de imagenes de tamaño 'n'
        objData.next_batch_test()

    # promediamos la precision total
    accuracy_final = count_success / total
    print('\n# STATUS: Confusion Matrix')
    print(count_by_class)
    print('    Success total: ', str(count_success))
    print('    Accuracy total: ', str(accuracy_final))

    # a = objData.labels.tolist()
    # b = prob_predicted
    # cm = confusion_matrix(a, b)
    return accuracy_final
Beispiel #4
0
####################### Feature Expansion ################################
if classifier!="nn" and classifier!="bow":
    X_tr = feature_exp(X_tr)
    X_te = feature_exp(X_te)
D = X_tr.shape[1]

print "After Feature Expansion: Training : [Inputs x Features ] = [%d x %d]" % (N_tr,D)
print "After Feature Expansion: Test     : [Inputs x Features ] = [%d x %d]" % (N_te,D)

###################### Normalizing data ##################################
scaler = preprocessing.StandardScaler().fit(X_tr)
X_tr_n = scaler.transform(X_tr)
X_te_n = scaler.transform(X_te)

end = time.time()
print "\nTime taken for Data preparation = %f sec" % (end-start)

start = time.time()
print time.ctime()

y_te_p = models(X_tr_n, y_tr, X_te_n, classifier)

if isinstance(y_te_p,np.ndarray):
    if submission != 1:
        print_accuracy(y_te, y_te_p, "Test")
    else:
        save_out(y_te_p,labels_string,sorted_files_te,submission_fname)

end = time.time()
print "\nTime taken by classifier = %f sec" % (end-start)
Beispiel #5
0
            os.makedirs(res_dir_w)
        acc_list = []
        word_acc = 0
        fs = "+".join(featsset)
        for fold in range(1, K+1):
            #sys.stderr.write("[INFO] Fold %s\n" %str(fold))
            reader = LexSampReader()
            dataset = data_dir + "/" + word + "/xval/fold"+str(fold)
            trainInstances = reader.getInstances(dataset+"/"+word+".train.ls.utf8.xml")
            testInstances = reader.getInstances(dataset+"/"+word+".test.ls.utf8.xml")
            wsd.setTrain(trainInstances)
            wsd.setTest(testInstances)

            wsd.learn()
            preds = wsd.predict()
            gold = [insLabel for (insId, insLabel, offset, tokens) in testInstances]
            acc = wsd.accuracy(preds, gold)
            acc_list.append(acc)
            word_acc += acc
            sys.stderr.write("[INFO] %s fold %s: %s\n" % (word, str(fold), str(acc)))

            pred_filename = word + ".f"+str(fold)+"."+class_name+"."+fs+".out"
            res_file = res_dir_w + "/" + pred_filename
            utils.print_predictions(preds, testInstances, res_file)
        acc_filename = word +"."+class_name+"."+fs+".acc"
        acc_file = res_dir_w + "/" + acc_filename
        utils.print_accuracy(acc_list, acc_file)
        word_acc = float(word_acc) / K
        sys.stderr.write("[INFO] %s avg fscore: %s\n" % (word, str(word_acc)))
        sys.stderr.write("[INFO] Results stored in %s\n\n" % res_dir_w)
Beispiel #6
0
resname = get_parent_path(resfile, 1)[1]
res = [pd.read_csv(f) for f in resfile]

for ii, rr in enumerate(res):
    sujid = []
    for ff in rr.subject_id:
        dd = ff.split('/')
        if dd[-1] is '': dd.pop()
        nn = len(dd)
        sujid.append(dd[nn - 3] + '+' + dd[nn - 2] + '+' + dd[nn - 1])
    rr.index = sujid
    res[ii] = rr.loc[labelsujid]  # rr.loc[sujid[::-1]]

print_accuracy(res,
               resname,
               ytrue,
               prediction_name='prob_y',
               inverse_prediction=False)
print_accuracy_all(res[0:1],
                   resname[0:1],
                   ytrue,
                   prediction_name='prob_y',
                   inverse_prediction=False)
# CAT12
rescat = pd.read_csv(
    '/home/romain.valabregue/datal/QCcnn/CATI_datasets/res_cat12_suj18999.csv')
rescat.index = [sss.replace(';', '+')
                for sss in rescat.sujid]  # .values.replace(";","+")
rescat = rescat.loc[labelsujid]
print_accuracy_df(rescat, ytrue)
print_accuracy([rescat], ['IQR'],
Beispiel #7
0
def main():
    ## parse flags
    config = Options().parse()
    utils.print_opts(config)

    ## set up folders
    exp_dir = os.path.join(config.exp_dir, config.exp_name)
    model_dir = os.path.join(exp_dir, 'models')
    img_dir = os.path.join(exp_dir, 'images')
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    if not os.path.exists(img_dir):
        os.makedirs(img_dir)

    if config.solver == 'none':
        model = None
    else:
        if config.use_tbx:
            # remove old tensorboardX logs
            logs = glob.glob(os.path.join(exp_dir, 'events.out.tfevents.*'))
            if len(logs) > 0:
                os.remove(logs[0])
            tbx_writer = SummaryWriter(exp_dir)
        else:
            tbx_writer = None

        ## initialize data loaders/generators & model
        r_loader, z_loader = get_loader(config)
        if config.solver == 'w1':
            model = W1(config, r_loader, z_loader)
        elif config.solver == 'w2':
            model = W2(config, r_loader, z_loader)
        elif config.solver == 'bary_ot':
            model = BaryOT(config, r_loader, z_loader)
        cudnn.benchmark = True
        networks = model.get_networks()
        utils.print_networks(networks)

        ## training
        ## stage 1 (dual stage) of bary_ot
        start_time = time.time()
        if config.solver == 'bary_ot':
            print("Starting: dual stage for %d iters." % config.dual_iters)
            for step in range(config.dual_iters):
                model.train_diter_only(config)
                if ((step + 1) % 100) == 0:
                    stats = model.get_stats(config)
                    end_time = time.time()
                    stats['disp_time'] = (end_time - start_time) / 60.
                    start_time = end_time
                    utils.print_out(stats, step + 1, config.dual_iters,
                                    tbx_writer)
            print("dual stage iterations complete.")

        ## main training loop of w1 / w2 or stage 2 (map stage) of bary-ot
        map_iters = config.map_iters if config.solver == 'bary_ot' else config.train_iters
        if config.solver == 'bary_ot':
            print("Starting: map stage for %d iters." % map_iters)
        else:
            print("Starting training...")
        for step in range(map_iters):
            model.train_iter(config)
            if ((step + 1) % 100) == 0:
                stats = model.get_stats(config)
                end_time = time.time()
                stats['disp_time'] = (end_time - start_time) / 60.
                start_time = end_time
                utils.print_out(stats, step + 1, map_iters, tbx_writer)
            if ((step + 1) % 500) == 0:
                images = model.get_visuals(config)
                utils.visualize_iter(images, img_dir, step + 1, config)
        print("Training complete.")
        networks = model.get_networks()
        utils.save_networks(networks, model_dir)

    ## testing
    ## 1) classification accuracy
    print("Calculating domain adaptation accuracy...")
    utils.print_accuracy(config, model)

    ## 2) visualization
    if config.solver != 'none':
        root = "./usps_test" if config.direction == 'usps-mnist' else "./mnist_test"
        file = open(os.path.join(root, "data.pkl"), "rb")
        fixed_z = pickle.load(file)
        file.close()
        fixed_z = utils.to_var(fixed_z)
        fixed_gz = model.g(fixed_z).view(*fixed_z.size())
        utils.visualize_single(fixed_gz, os.path.join(img_dir, 'test.png'),
                               config)
Beispiel #8
0
def models(X_tr_n, y_tr, X_te_n, classifier):
    if(classifier == "c_svm"):
        ###################### C SVM - Accuracy - 0.44503 #############################
        model = SVC()
        model.fit(X_tr_n, y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_te_p = model.predict(X_te_n)
        # save_out(y_te_p,labels_string,sorted_files_te,'submission/testLabels_CSVM.csv')

    elif(classifier == "c_svm_l1"):
        ###################### C SVM L1 - Accuracy - 0.44503 #############################
        model = LinearSVC(penalty='l1',dual=False)
        model.fit(X_tr_n, y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_te_p = model.predict(X_te_n)

    elif(classifier == "log_reg"):
        ###################### Logistic regression #############################
        model = linear_model.LogisticRegression()
        model.fit(X_tr_n, y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_te_p = model.predict(X_te_n)

    elif(classifier == "c_svm_param"):
        ###################### C SVM Param - Accuracy - 0.50164 #############################
        model = grid_search(X_tr_n,y_tr)
        print "Best params = "
        print model.best_params_

        # model = SVC(C=10,kernel='rbf',gamma=0.001)
        # model.fit(X_tr_n, y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_te_p = model.predict(X_te_n)

    elif(classifier == "knn"):
        ###################### KNN - Accuracy -  #############################
        model = KNeighborsClassifier(n_neighbors=20)
        model.fit(X_tr_n, y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_te_p = model.predict(X_te_n)

    elif(classifier == "naive_bayes"):
        ###################### Naive Bayes - Accuracy -  #############################
        model = GaussianNB()
        model.fit(X_tr_n, y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_te_p = model.predict(X_te_n)

    elif(classifier == "ols"):
        ###################### OLS - Accuracy -  #############################
        model = linear_model.LinearRegression()
        model.fit(X_tr_n,y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_tr_p = np.round(y_tr_p)
        y_te_p = model.predict(X_te_n)
        y_te_p = np.round(y_te_p)

    elif(classifier == "ridge_reg"):
        ###################### Ridge Regression - Accuracy -  #############################
        model = linear_model.Ridge(alpha=0.001)
        model.fit(X_tr_n,y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_tr_p = np.round(y_tr_p)
        y_te_p = model.predict(X_te_n)
        y_te_p = np.round(y_te_p)

    elif(classifier == "lasso"):
        ###################### Lasso - Accuracy -  #############################
        model = linear_model.Lasso(alpha=.15,max_iter=-1)
        model.fit(X_tr_n,y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_tr_p = np.round(y_tr_p)
        y_te_p = model.predict(X_te_n)
        y_te_p = np.round(y_te_p)

    elif(classifier == "adaboost"):
        ###################### AdaBoost ###########################################
        # model = AdaBoostClassifier(RandomForestClassifier(max_features=50, n_estimators=10, max_depth=20),
        #                            n_estimators=100,learning_rate=2)
        model = AdaBoostClassifier(linear_model.SGDClassifier(n_iter=50),n_estimators=100,learning_rate=1, algorithm="SAMME")
        # model = AdaBoostClassifier(n_estimators=100,learning_rate=2)
        model.fit(X_tr_n,y_tr)
        y_tr_p = model.predict(X_tr_n)
        y_te_p = model.predict(X_te_n)

    # elif(classifier == "voting"):
        # clf1 = DecisionTreeClassifier(max_depth=4)
        # clf2 = KNeighborsClassifier(n_neighbors=7)
        # clf3 = SVC(kernel='rbf', probability=True)
        # model = VotingClassifier(estimators=[('dt', clf1), ('knn', clf2), ('svc', clf3)], voting='soft', weights=[2,1,2])
        # model.fit(X_tr_n,y_tr)
        # y_tr_p = model.predict(X_tr_n)
        # y_te_p = model.predict(X_te_n)

    elif(classifier == "random_forest"):
        ###################### Random Forest ###########################################
        # model =  RandomForestClassifier(n_estimators=100,n_jobs=4)

        # Grid search
        clf =  RandomForestClassifier(n_jobs=3)
        param_grid = {"max_depth": [10, 20, 30],
                      "max_features": [50, 100, 200],
                      "n_estimators": [10,50,100]}

        # run grid search
        model = GridSearchCV(clf, param_grid=param_grid)
        model.fit(X_tr_n,y_tr)

        print model.best_params_

        y_tr_p = model.predict(X_tr_n)
        y_te_p = model.predict(X_te_n)

    elif(classifier == "nn"):
        ############################### NN ###################################
        # tensorFlowNN(X_tr,y_tr,X_te,y_te)
        y_tr_p, y_te_p = keras_CNN(X_tr, y_tr, X_te)

    elif(classifier == "bow"):
        ############################### BOW ###################################
        X_tr_full_res, s = read_X_full_res('data/train')
        X_te_full_res, s = read_X_full_res('data/test')

        bow_obj = bow(kmeans_K = 100)
        X_bow_tr = bow_obj.fit_predict(X_tr_full_res)
        X_bow_te = bow_obj.predict(X_te_full_res)

        model = SVC()
        model.fit(X_bow_tr, y_tr)
        y_tr_p = model.predict(X_bow_tr)
        y_te_p = model.predict(X_bow_te)

    else:
        print "No Classifier selected"
        return False


    print_accuracy(y_tr, y_tr_p, "Training")

    return y_te_p