Esempio n. 1
0
    def compute_acc_and_nmi_conflicted_data(self, x, y, centers_emb, beta1, beta2):
        features = self.predict_encoder(x)
        unconf_indices, conf_indices = self.generate_unconflicted_data_index(x, centers_emb, beta1, beta2)
        
        if unconf_indices.size == 0:
            print(' '*8 + "Empty list of unconflicted data")
            acc_unconf = 0
            nmi_unconf = 0
        else:
            x_emb_unconf = self.predict_encoder(x[unconf_indices])
            y_unconf = y[unconf_indices]
            y_pred_unconf = q_mat(x_emb_unconf, centers_emb, alpha=1.0).argmax(axis=1)
            acc_unconf = metrics.acc(y_unconf, y_pred_unconf)
            nmi_unconf = metrics.nmi(y_unconf, y_pred_unconf)
            print(' '*8 + '|==>  acc unconflicted data: %.4f,  nmi unconflicted data: %.4f  <==|'% (acc_unconf, nmi_unconf))

        if conf_indices.size == 0:
            print(' '*8 + "Empty list of conflicted data")
            acc_conf = 0
            nmi_conf = 0
        else:
            x_emb_conf = self.predict_encoder(x[conf_indices])
            y_conf = y[conf_indices]
            y_pred_conf = q_mat(x_emb_conf, centers_emb, alpha=1.0).argmax(axis=1)
            acc_conf = metrics.acc(y_conf, y_pred_conf)
            nmi_conf = metrics.nmi(y_conf, y_pred_conf)
            print(' '*8 + '|==>  acc conflicted data: %.4f,  nmi conflicted data: %.4f  <==|'% (metrics.acc(y_conf, y_pred_conf), metrics.nmi(y_conf, y_pred_conf)))    
        return acc_unconf, nmi_unconf, acc_conf, nmi_conf
Esempio n. 2
0
def test_accuracy(classifier, Xt, Yt, Xv, Yv, filename=None):
    # Apprently, arrays don't work here as they try to access second dimension size...
    Yv = mat(Yv).transpose()
    Yt = mat(Yt).transpose()

    predictions = classifier.predict(Xt)
    print "Neural Net Train Accuracy:", acc(Yt, predictions), "%"
    predictions = classifier.predict(Xv)
    print "Neural Net Test Accuracy:", acc(Yv, predictions), "%"
def test_accuracy(classifier, Xt, Yt, Xv, Yv, filename=None):
    # Apprently, arrays don't work here as they try to access second dimension size...
    Yv = mat(Yv).transpose()
    Yt = mat(Yt).transpose()

    predictions = classifier.predict(Xt)
    print "Neural Net Train Accuracy:",acc(Yt, predictions),"%"
    predictions = classifier.predict(Xv)
    print "Neural Net Test Accuracy:",acc(Yv, predictions),"%"
Esempio n. 4
0
def train_model(net,
                optimizer,
                criterion,
                trainloader,
                num_ens=1,
                beta_type=0.1,
                epoch=None,
                num_epochs=None,
                layer_type='bbb'):
    net.train()
    training_loss = 0.0
    accs = []
    kl_list = []
    for i, (inputs, labels) in enumerate(trainloader, 1):

        optimizer.zero_grad()

        inputs, labels = inputs.to(device), labels.to(device)
        outputs = torch.zeros(inputs.shape[0], net.num_classes,
                              num_ens).to(device)

        if layer_type == 'mgp':
            log_priors = torch.zeros(num_ens).to(device)
            log_variational_posteriors = torch.zeros(num_ens).to(device)
            for j in range(num_ens):
                net_out, log_prior, log_variational_posterior = net(inputs)
                outputs[:, :, j] = F.log_softmax(net_out, dim=1)
                log_priors[j] = log_prior
                log_variational_posteriors[j] = log_variational_posterior

            log_prior = log_priors.mean()
            log_variational_posterior = log_variational_posteriors.mean()
            kl_list.append((log_variational_posterior - log_prior).item())
            log_outputs = utils.logmeanexp(outputs, dim=2)

            beta = metrics.get_beta(i - 1, len(trainloader), beta_type, epoch,
                                    num_epochs)
            loss = criterion(log_outputs, labels, log_prior,
                             log_variational_posterior, beta)
        else:
            kl = 0.0
            for j in range(num_ens):
                net_out, _kl = net(inputs)
                kl += _kl
                outputs[:, :, j] = F.log_softmax(net_out, dim=1)

            kl = kl / num_ens
            kl_list.append(kl.item())
            log_outputs = utils.logmeanexp(outputs, dim=2)

            beta = metrics.get_beta(i - 1, len(trainloader), beta_type, epoch,
                                    num_epochs)
            loss = criterion(log_outputs, labels, kl, beta)

        loss.backward()
        optimizer.step()

        accs.append(metrics.acc(log_outputs.data, labels))
        training_loss += loss.cpu().data.numpy()
    return training_loss / len(trainloader), np.mean(accs), np.mean(kl_list)
Esempio n. 5
0
def match(y,cl):
    cl=np.array(cl)
    y=np.array(y)
    acc = np.round(metrics.acc(y, cl), 5)
    nmi = np.round(metrics.nmi(y, cl), 5)
    ari = np.round(metrics.ari(y, cl), 5)
    return acc,nmi,ari
def train_and_validate(train_paths, val_paths):
    ############################## Training Process ###########################################
    X_train, Y_train = load_split(train_paths[0], train_paths[1])
    print(X_train.var())
    print("Started Training")
    clf = svm.SVC(gamma='scale',
                  kernel="rbf",
                  C=1.,
                  degree=2,
                  verbose=False,
                  probability=True)
    #clf = svm.SVC(gamma="scale", kernel="rbf", C=2., verbose=False, probability=True)
    clf.fit(X_train, Y_train)
    joblib.dump(clf, 'data/model_repo/svm_test.pkl')

    # ############################## Validation Process ###########################################
    print("Preparing validation documents")
    X_val, Y_val = load_split(val_paths[0], val_paths[1])

    predictions = clf.predict(X_val)

    np.savetxt("data/errors/system_predictions_bow.txt",
               clf.predict_proba(X_val))
    np.savetxt("data/errors/gt.txt", Y_val)

    val_accuracy = metrics.acc(predictions, Y_val)

    return val_accuracy
Esempio n. 7
0
    def validate(self, validate_dl):
        iterator = tqdm(validate_dl, leave=True, dynamic_ncols=True)
        iter_len = len(iterator)
        errs = []
        accs = []

        for i, (_, data, label) in enumerate(iterator):
            iterator.set_description(
                f'validate:[{self.epoch}/{self.opt.epochs}|{self.global_steps}]'
            )
            err, pred = self.validate_step(data, label)

            pred = self.get_pred_number(pred)
            label = self.get_pred_number(label.to(self.opt.device))
            acc = metrics.acc(pred, label)

            accs.append(acc)
            errs.append(err)

        curr_acc = torch.tensor(accs).mean().item()
        self.writer.add_scalar('validate/error',
                               torch.tensor(errs).mean().item(),
                               self.global_steps)
        self.writer.add_scalar('validate/acc', curr_acc, self.global_steps)
        if curr_acc > self.best_acc:
            self.best_acc = curr_acc
            self.test()
Esempio n. 8
0
def validate_model(net,
                   criterion,
                   validloader,
                   num_ens=1,
                   beta_type=0.1,
                   epoch=None,
                   num_epochs=None):
    """Calculate ensemble accuracy and NLL Loss"""
    net.train()
    valid_loss = 0.0
    accs = []

    for i, (inputs, labels) in enumerate(validloader):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = torch.zeros(inputs.shape[0], net.num_classes,
                              num_ens).to(device)
        kl = 0.0
        for j in range(num_ens):
            net_out, _kl = net(inputs)
            kl += _kl
            outputs[:, :, j] = F.log_softmax(net_out, dim=1).data

        log_outputs = utils.logmeanexp(outputs, dim=2)

        beta = metrics.get_beta(i - 1, len(validloader), beta_type, epoch,
                                num_epochs)
        valid_loss += criterion(log_outputs, labels, kl, beta).item()
        accs.append(metrics.acc(log_outputs, labels))

    return valid_loss / len(validloader), np.mean(accs)
Esempio n. 9
0
def train_model(net, optimizer, criterion, trainloader, num_ens=1):
    net.train()
    training_loss = 0.0
    accs = []
    kl_list = []
    for i, (inputs, labels) in enumerate(trainloader, 0):
        optimizer.zero_grad()

        inputs, labels = inputs.to(device), labels.to(device)
        outputs = torch.zeros(inputs.shape[0], net.num_classes,
                              num_ens).to(device)

        kl = 0.0
        for j in range(num_ens):
            net_out, _kl = net(inputs)
            kl += _kl
            outputs[:, :, j] = F.log_softmax(net_out, dim=1)

        kl = kl / num_ens
        kl_list.append(kl.item())
        log_outputs = utils.logmeanexp(outputs, dim=2)

        loss = criterion(log_outputs, labels, kl)
        loss.backward()
        optimizer.step()

        accs.append(metrics.acc(log_outputs.data, labels))
        training_loss += loss.cpu().data.numpy()
    return training_loss / len(trainloader), np.mean(accs), np.mean(kl_list)
Esempio n. 10
0
 def metric(self, y, y_pred):
     acc = np.round(metrics.acc(y, y_pred), 5)
     nmi = np.round(metrics.nmi(y, y_pred), 5)
     ari = np.round(metrics.ari(y, y_pred), 5)
     print('acc:', acc)
     print('nmi:', nmi)
     print('ari:', ari)
Esempio n. 11
0
    def fit(self,
            x,
            y=None,
            maxiter=2e4,
            batch_size=256,
            tol=1e-3,
            update_interval=140,
            save_dir='./results/temp',
            rand_seed=None):

        print('Update interval', update_interval)
        save_interval = int(x.shape[0] / batch_size) * 5  # 5 epochs
        print('Save interval', save_interval)

        # Step 1: initialize cluster centers using k-means
        print('Initializing cluster centers with k-means.')
        kmeans = KMeans(n_clusters=self.n_clusters, n_init=100)
        y_pred = kmeans.fit_predict(self.encoder.predict(x))
        y_pred_last = np.copy(y_pred)
        self.model.get_layer(name='clustering').set_weights(
            [kmeans.cluster_centers_])

        loss = 0
        index = 0
        index_array = np.arange(x.shape[0])
        for ite in range(int(maxiter)):
            if ite % update_interval == 0:
                q = self.model.predict(x, verbose=0)
                p = self.target_distribution(q)

                y_pred = q.argmax(1)
                if y is not None:
                    acc = np.round(metrics.acc(y, y_pred), 5)
                    nmi = np.round(metrics.nmi(y, y_pred), 5)
                    loss = np.round(loss, 5)
                    print('Iter %d: acc = %.5f, nmi = %.5f' % (ite, acc, nmi),
                          ' ; loss=', loss)

                # check stop criterion
                delta_label = np.sum(y_pred != y_pred_last).astype(
                    np.float32) / y_pred.shape[0]
                y_pred_last = np.copy(y_pred)
                if ite > 0 and delta_label < tol:
                    print('delta_label ', delta_label, '< tol ', tol)
                    print('Reached tolerance threshold. Stopping training.')
                    break

            idx = index_array[index * batch_size:min((index + 1) *
                                                     batch_size, x.shape[0])]
            loss = self.model.train_on_batch(x=x[idx], y=p[idx])
            index = index + 1 if (index + 1) * batch_size <= x.shape[0] else 0

            ite += 1

        # save the trained model
        print('saving model to:', save_dir + 'STC_model_final.h5')
        self.model.save_weights(save_dir + 'STC_model_final.h5')

        return y_pred
def significance_test_holdout():
    system_a_pred = np.loadtxt(
        "data/trained_models/predictions_old/no_fold/unigram_false_bigram_true_laplace_true.txt"
    )
    system_b_pred = np.loadtxt(
        "data/trained_models/predictions_old/no_fold/unigram_false_bigram_true_laplace_true.txt"
    )

    gt = [1] * 100 + [0] * 100

    sign_test_result = metrics.sign_test_precision(system_a_pred,
                                                   system_b_pred, gt)
    system_a_acc = metrics.acc(system_a_pred, gt)
    system_b_acc = metrics.acc(system_b_pred, gt)

    print("The accuracy of system A is: ", system_a_acc)
    print("The accuracy of system B is: ", system_b_acc)
    print("The result of the sign test is: ", sign_test_result)
Esempio n. 13
0
def train(args):
    # get data and model
    (x, y), model = _get_data_and_model(args)

    # split train validation data
    if y is None:
        x_train, x_val = train_test_split(x, test_size=0.1)
        y_val = None
        y_train = None
    else:
        x_train, x_val, y_train, y_val = train_test_split(x,
                                                          y,
                                                          stratify=y,
                                                          test_size=0.1)

    model.model.summary()

    # pretraining
    t0 = time()
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)
    if args.pretrained_weights is not None and os.path.exists(
            args.pretrained_weights):  # load pretrained weights
        model.autoencoder.load_weights(args.pretrained_weights)
    else:  # train
        pretrain_optimizer = SGD(1.0, 0.9) if args.method in [
            'FcDEC', 'FcIDEC', 'FcDEC-DA', 'FcIDEC-DA'
        ] else 'adam'
        model.pretrain(x_train,
                       y_train,
                       x_val,
                       y_val,
                       optimizer=pretrain_optimizer,
                       epochs=args.pretrain_epochs,
                       batch_size=args.batch_size,
                       save_dir=args.save_dir,
                       verbose=args.verbose,
                       aug_pretrain=args.aug_pretrain)
    t1 = time()
    print("Time for pretraining: %ds" % (t1 - t0))

    # clustering
    y_pred = model.fit(x,
                       y,
                       maxiter=args.maxiter,
                       batch_size=args.batch_size,
                       update_interval=args.update_interval,
                       save_dir=args.save_dir,
                       aug_cluster=args.aug_cluster)
    if y is not None:
        print('Final: acc=%.4f, nmi=%.4f, ari=%.4f' % (metrics.acc(
            y, y_pred), metrics.nmi(y, y_pred), metrics.ari(y, y_pred)))
    t2 = time()
    print("Time for pretaining, clustering and total: (%ds, %ds, %ds)" %
          (t1 - t0, t2 - t1, t2 - t0))
    print('=' * 60)
Esempio n. 14
0
def validate_model(net, criterion, valid_loader):
    valid_loss = 0.0
    net.eval()
    accs = []
    for data, target in valid_loader:
        data, target = data.to(device), target.to(device)
        output = net(data)
        loss = criterion(output, target)
        valid_loss += loss.item() * data.size(0)
        accs.append(metrics.acc(output.detach(), target))
    return valid_loss, np.mean(accs)
Esempio n. 15
0
 def on_epoch_end(self, epoch, logs=None):
     if int(epochs / 10) != 0 and epoch % int(epochs / 10) != 0:
         return
     feature_model = tf.keras.models.Model(self.model.input,
                                           self.model.get_layer('encoder_3').output)
     features = feature_model.predict(self.x)
     km = KMeans(n_clusters=len(np.unique(self.y)), n_init=20, n_jobs=4)
     y_pred = km.fit_predict(features)
     # print()
     print(' ' * 8 + '|==>  acc: %.4f,  nmi: %.4f  <==|'
           % (metrics.acc(self.y, y_pred), metrics.nmi(self.y, y_pred)))
Esempio n. 16
0
def test(args):
    assert args.weights is not None
    (x, y), model = _get_data_and_model(args)
    model.model.summary()

    print('Begin testing:', '-' * 60)
    model.load_weights(args.weights)
    y_pred = model.predict_labels(x)
    print('acc=%.4f, nmi=%.4f, ari=%.4f' % (metrics.acc(
        y, y_pred), metrics.nmi(y, y_pred), metrics.ari(y, y_pred)))
    print('End testing:', '-' * 60)
Esempio n. 17
0
def predict_regular(net, validloader, bayesian=True, num_ens=10):
    """
    For both bayesian and Frequentist models
    """
    net.eval()
    accs = []

    for i, (inputs, labels) in enumerate(validloader):
        inputs, labels = inputs.to(device), labels.to(device)
        if bayesian:
            outputs = torch.zeros(inputs.shape[0], net.num_classes, num_ens).to(device)
            for j in range(num_ens):
                net_out, _ = net(inputs)
                outputs[:, :, j] = F.log_softmax(net_out, dim=1).data

            log_outputs = utils.logmeanexp(outputs, dim=2)
            accs.append(metrics.acc(log_outputs, labels))
        else:
            output = net(inputs)
            accs.append(metrics.acc(output.detach(), labels))

    return np.mean(accs)
Esempio n. 18
0
def kmeans_():

    # use features for clustering
    from sklearn.cluster import KMeans
    km = KMeans(n_clusters=N, init='k-means++')
    #features = np.reshape(x_train, newshape=(features.shape[0], -1))
    km_trans = km.fit_transform(x_train)
    pred = km.predict(x_train)
    print pred.shape
    print('acc=', met.acc(y_train, pred), 'nmi=', met.nmi(y_train,
                                                          pred), 'ari=',
          met.ari(y_train, pred))
    return km_trans, pred
Esempio n. 19
0
    def fit(self, x, y=None, save_dir='./results/temp'):
        # print('Begin training:', '-' * 60)

        t1 = time()
        print(
            '******************** Use Denpeak to Cluster ************************'
        )

        features = self.encoder.predict(x)
        print("features shape:", features.shape)
        features = TSNE(n_components=2).fit_transform(features)
        # np.savetxt("features.txt", features)
        print("features shape:", features.shape)
        y_pred, y_border, center_num, dc_percent, dc = DenPeakCluster(features)
        print('saving picture to:', save_dir + '/2D.png')
        plt.cla()
        plt.scatter(features[:, 0], features[:, 1], c=y_pred, s=0.5, alpha=0.5)
        plt.savefig(save_dir + '/2D.png')
        np.savetxt(save_dir + '/dc_coeff.txt', [dc_percent, dc])

        # logging file
        import csv, os
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        logfile = open(save_dir + '/log.csv', 'w')
        logwriter = csv.DictWriter(
            logfile,
            fieldnames=['iter', 'acc', 'nmi', 'ari', 'loss', 'center_num'])
        logwriter.writeheader()

        acc = np.round(metrics.acc(y, y_pred), 5)
        nmi = np.round(metrics.nmi(y, y_pred), 5)
        ari = np.round(metrics.ari(y, y_pred), 5)
        # if acc>=0.95:
        np.savetxt(save_dir + '/features.txt', features)
        np.savetxt(save_dir + '/labels.txt', y_pred)
        np.savetxt(save_dir + '/border.txt', y_border)
        from Draw_border import draw
        draw(save_dir)
        logdict = dict(iter=0,
                       acc=acc,
                       nmi=nmi,
                       ari=ari,
                       center_num=center_num)
        logwriter.writerow(logdict)
        logfile.flush()
        print('Iter %d: acc=%.5f, nmi=%.5f, ari=%.5f; center_num=%d' %
              (0, acc, nmi, ari, center_num))
        logfile.close()

        return y_pred
def init_center_with_kmeans(n_clusters, X, dims, y_real=None, device="cpu"):
    model = Encoder(dims)
    enc_dec_model = {k[2:]: v for k, v in torch.load("enc_dec_model").items()}
    model.load_state_dict(enc_dec_model, strict=False)
    model = model.to(device)
    model.eval()
    with torch.no_grad():
        X_encoded = model(torch.from_numpy(X).to(device))

    kmeans = KMeans(n_clusters=n_clusters, n_init=20)
    y_pred = kmeans.fit_predict(X_encoded.cpu().numpy())
    if y_real is not None:
        print("kmeans acc: {}".format(metrics.acc(y_real, y_pred)))
    np.save("cluster_centers.npy", kmeans.cluster_centers_)
Esempio n. 21
0
def train_model(net, optimizer, criterion, train_loader):
    train_loss = 0.0
    net.train()
    accs = []
    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = net(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * data.size(0)
        accs.append(metrics.acc(output.detach(), target))
    return train_loss, np.mean(accs)
Esempio n. 22
0
def train_feature(net1, train_data):
    map_dict = read_pkl()
    if torch.cuda.is_available():
        net1 = torch.nn.DataParallel(net1, device_ids=[0])
        net1 = net1.cuda()
    prev_time = datetime.now()

    for i_dir in range(classnum):
        if not os.path.isdir('./data/' + str(i_dir)):
            os.makedirs('./data/' + str(i_dir))
    label_np = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0] * 10).reshape(10, 10)
    # label_np = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    #                      0, 0, 0, 0, 0, 0, 0, 0, 0, 0] * 20).reshape(20, 20)

    label2 = []
    idx2 = []
    for im, label in tqdm(train_data, desc="Processing train data: "):
        im = im.cuda()
        feat = net1(im)
        for i in range(feat.size(0)):
            distance_list = list()
            for ui_50D_label in map_dict.values():
                distance = sum(sum((ui_50D_label.float().cuda() - feat[i])**2))
                distance_list.append(distance.item())
            idx = distance_list.index(min(distance_list))
            save_image(
                inver_transform2(im[i]), './data/' + str(idx) + '/' +
                str(random.randint(1, 10000000)) + '.png')
            label_np[idx][label[i].item()] += 1
            label2.append(idx)
        label1 = label.numpy()
        # for _,i in enumerate(label):
        #     idx2.append(i)
        for i in label1:
            idx2.append(i)

    t2 = np.array(idx2)
    t1 = np.array(label2)
    # print(t2.shape)
    # t2 = t2.reshape([t1.size,-1]).squeeze(0)
    print('acc=%.4f, nmi=%.4f, ari=%.4f' %
          (metrics.acc(t1, t2), metrics.nmi(t1, t2), metrics.ari(t1, t2)))

    corr_num = 0
    for item in label_np:
        corr_num += item.max()
    corr = corr_num / label_np.sum()
    print(corr)
    np.save('./model/MNIST/feature/' + str(feat.size(1)) + '_' + '.npy',
            label_np)
Esempio n. 23
0
    def load_pretrain_weights(self, vade, weights_path, dataset, inputs, Y):
        vade.load_weights(weights_path + dataset + '.h5')
        sample = self.sample_output.predict(inputs, batch_size=self.batch_size)

        kmeans = KMeans(n_clusters=self.n_centroid, n_init=20)
        kmeans.fit(sample)
        self.u_p.set_value(self.floatX(kmeans.cluster_centers_.T))
        y_pred = kmeans.predict(sample)

        gam = self.gamma_output.predict(inputs, batch_size=batch_size)
        gam_acc = metrics.acc(np.argmax(gam, axis=1), Y)
        print('pretrain weights loaded!')
        print('Initial_acc:', gam_acc)
        return vade
Esempio n. 24
0
    def gmm_kmeans_cluster(self, dataloader):
        use_cuda = torch.cuda.is_available()
        if use_cuda:
            self.cuda()

        self.eval()
        data = []
        Y = []
        for batch_idx, (inputs, y) in enumerate(dataloader):
            inputs = inputs.view(inputs.size(0), -1).float()
            if use_cuda:
                inputs = inputs.cuda()
            inputs = Variable(inputs)
            _, _, _, mu, _ = self.forward(inputs)
            data.append(mu.data.cpu().numpy())
            Y.append(y.numpy())
        data = np.concatenate(data)
        Y = np.concatenate(Y)
        gmm = GaussianMixture(n_components=self.n_centroids,
                              covariance_type='full')
        gmm.fit(data)
        y_pred_gmm = gmm.predict(data)
        acc = np.round(metrics.acc(Y, y_pred_gmm), 5)
        nmi = np.round(metrics.nmi(Y, y_pred_gmm), 5)
        ari = np.round(metrics.ari(Y, y_pred_gmm), 5)
        print(
            'GMM fit of AutoEncoder embedding: acc = %.5f, nmi = %.5f, ari = %.5f'
            % (acc, nmi, ari))

        km = KMeans(n_clusters=self.n_centroids, n_init=20)
        y_pred_kmeans = km.fit_predict(data)
        acc = np.round(metrics.acc(Y, y_pred_kmeans), 5)
        nmi = np.round(metrics.nmi(Y, y_pred_kmeans), 5)
        ari = np.round(metrics.ari(Y, y_pred_kmeans), 5)
        print(
            'Kmeans clustering of AutoEncoder embedding: acc = %.5f, nmi = %.5f, ari = %.5f'
            % (acc, nmi, ari))
Esempio n. 25
0
 def on_epoch_end(self, epoch, logs=None):
     if epochs < 10 or epoch % int(epochs / 10) != 0:
         return
     feature_model = Model(
         self.model.input,
         self.model.get_layer(
             index=int(len(self.model.layers) / 2)).output)
     features = feature_model.predict(self.x)
     km = KMeans(n_clusters=len(np.unique(self.y)),
                 n_init=20,
                 n_jobs=4)
     y_pred = km.fit_predict(features)
     print(' ' * 8 + '|==>  acc: %.4f,  nmi: %.4f  <==|' %
           (metrics.acc(self.y, y_pred),
            metrics.nmi(self.y, y_pred)))
Esempio n. 26
0
def generate_predictions(vocab_path, vocab_pos_freq_path, vocab_neg_freq_path,
                         prior_pos_path, prior_neg_path, pos_test, neg_test):
    with open(vocab_path, 'r') as f:
        vocabulary = f.read().splitlines()

    with open(vocab_pos_freq_path, 'r') as f:
        vocab_pos_freq = []
        pos_freq = f.read().splitlines()

        for item in pos_freq:
            vocab_pos_freq.append(float(item))

    with open(vocab_neg_freq_path, 'r') as f:
        vocab_neg_freq = []
        neg_freq = f.read().splitlines()

        for item in neg_freq:
            vocab_neg_freq.append(float(item))

    with open(prior_pos_path, 'r') as f:
        prior_pos = float(f.read().splitlines()[0])

    with open(prior_neg_path, 'r') as f:
        prior_neg = float(f.read().splitlines()[0])

    # Generate the predictions by using a saved model
    m = multiprocessing.Manager()
    preds = m.list()
    with multiprocessing.Pool(processes=multiprocessing.cpu_count() -
                              45) as pool:
        pool.map(
            partial(classifiers.apply_multinomial_NB, vocabulary, prior_pos,
                    prior_neg, vocab_pos_freq, vocab_neg_freq, 1, preds),
            pos_test)

    with multiprocessing.Pool(processes=multiprocessing.cpu_count() -
                              45) as pool:
        pool.map(
            partial(classifiers.apply_multinomial_NB, vocabulary, prior_pos,
                    prior_neg, vocab_pos_freq, vocab_neg_freq, 0, preds),
            neg_test)

    all_gt = np.array(preds)[:, 0]
    all_preds = np.array(preds)[:, 1]

    overall_accuracy = metrics.acc(all_preds, all_gt)
    print("The overall accuracy of the model is: ", overall_accuracy)
Esempio n. 27
0
def predict_using_uncertainty_separate_models(
        net1, net2, valid_loader, uncertainty_type='epistemic_softmax', T=25):
    """
    For Bayesian models
    """
    accs = []
    total_u1 = 0.0
    total_u2 = 0.0
    set1_selected = 0
    set2_selected = 0

    epi_or_ale, soft_or_norm = uncertainty_type.split('_')
    soft_or_norm = True if soft_or_norm == 'normalized' else False

    for i, (inputs, labels) in enumerate(valid_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        pred1, epi1, ale1 = ue.get_uncertainty_per_batch(
            net1, inputs, T=T, normalized=soft_or_norm)
        pred2, epi2, ale2 = ue.get_uncertainty_per_batch(
            net2, inputs, T=T, normalized=soft_or_norm)

        if epi_or_ale == 'epistemic':
            u1 = np.sum(epi1, axis=1)
            u2 = np.sum(epi2, axis=1)
        elif epi_or_ale == 'aleatoric':
            u1 = np.sum(ale1, axis=1)
            u2 = np.sum(ale2, axis=1)
        elif epi_or_ale == 'both':
            u1 = np.sum(epi1, axis=1) + np.sum(ale1, axis=1)
            u2 = np.sum(epi2, axis=1) + np.sum(ale2, axis=1)
        else:
            raise ValueError("Not correct uncertainty type")

        total_u1 += np.sum(u1).item()
        total_u2 += np.sum(u2).item()

        set1_preferred = u2 > u1  # idx where set1 has less uncertainty
        set1_preferred = np.expand_dims(set1_preferred, 1)
        preds = np.where(set1_preferred, pred1, pred2)

        set1_selected += np.sum(set1_preferred)
        set2_selected += np.sum(~set1_preferred)

        accs.append(metrics.acc(torch.tensor(preds), labels))

    return np.mean(accs), set1_selected/(set1_selected + set2_selected), \
        set2_selected/(set1_selected + set2_selected), total_u1, total_u2
Esempio n. 28
0
def train_model(net,
                optimizer,
                criterion,
                trainloader,
                num_ens=1,
                beta_type=0.1):
    net.train()
    training_loss = 0.0
    accs = []
    kl_list = []
    freq = cfg.recording_freq_per_epoch
    freq = len(trainloader) // freq
    for i, (inputs, labels) in enumerate(trainloader, 1):
        cfg.curr_batch_no = i
        if i % freq == 0:
            cfg.record_now = True
        else:
            cfg.record_now = False

        optimizer.zero_grad()

        inputs, labels = inputs.to(device), labels.to(device)
        outputs = torch.zeros(inputs.shape[0], net.num_classes,
                              num_ens).to(device)

        kl = 0.0
        for j in range(num_ens):
            net_out, _kl = net(inputs)
            kl += _kl
            outputs[:, :, j] = F.log_softmax(net_out, dim=1)

        kl = kl / num_ens
        kl_list.append(kl.item())
        log_outputs = utils.logmeanexp(outputs, dim=2)

        beta = metrics.get_beta(i - 1, len(trainloader), beta_type)
        loss = criterion(log_outputs, labels, kl, beta)
        loss.backward()
        optimizer.step()

        accs.append(metrics.acc(log_outputs.data, labels))
        training_loss += loss.cpu().data.numpy()
    return training_loss / len(trainloader), np.mean(accs), np.mean(kl_list)
Esempio n. 29
0
def test_model(net, criterion, testloader, num_ens=10):
    """Calculate ensemble accuracy and NLL Loss"""
    net.eval()
    test_loss = 0.0
    accs = []

    for i, (inputs, labels) in enumerate(testloader):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = torch.zeros(inputs.shape[0], net.num_classes,
                              num_ens).to(device)
        kl = 0.0
        for j in range(num_ens):
            net_out, _kl = net(inputs)
            kl += _kl
            outputs[:, :, j] = F.log_softmax(net_out, dim=1).data

        log_outputs = utils.logmeanexp(outputs, dim=2)
        test_loss += criterion(log_outputs, labels, kl).item()
        accs.append(metrics.acc(log_outputs, labels))

    return test_loss / len(testloader), np.mean(accs)
def fold_acc():
    gt = [1] * 100 + [0] * 100
    #system_a_pred = np.loadtxt("data/trained_models/predictions/cross_fold/0/unigram_false_bigram_true_laplace_false_stopwords_false.txt")
    #system_b_pred = np.loadtxt("data/trained_models/predictions/cross_fold/0/unigram_true_bigram_false_laplace_false_stopwords_false.txt")

    overall_acc = []
    for i in range(0, 10):
        system_a_pred = np.loadtxt(
            "data/trained_models_new/predictions/cross_fold/" + str(i) +
            "/unigram_true_bigram_true_laplace_true_stopwords_false.txt")
        fold_acc = metrics.acc(system_a_pred, gt)
        print("Accuracy for fold: ", str(i), " is ", fold_acc)
        overall_acc.append(fold_acc)

    all_acc = sum(overall_acc) / 10.

    print("The overall accuracy is: ", all_acc)

    print("The mean is: ", np.mean(all_acc))
    print("The variance is: ",
          math.sqrt(np.mean(abs(overall_acc - np.mean(overall_acc))**2)))
Esempio n. 31
0
def test(net1, test_data):
    #
    if torch.cuda.is_available():
        net1 = torch.nn.DataParallel(net1, device_ids=[0])
        net1 = net1.cuda()
    #
    label2 = []
    idx2 = []
    for im, label in tqdm(test_data, desc="Processing train data: "):
        im = im.cuda()
        _, feat = net1(im)
        for i in range(feat.size(0)):
            distance = feat[i].cpu().numpy().tolist()
            idx = distance.index(max(distance))
            label2.append(idx)
        label1 = label.numpy()
        for i in label1:
            idx2.append(i)
    t2 = np.array(idx2)
    t1 = np.array(label2)

    return metrics.acc(t2, t1), metrics.nmi(t2, t1)
 def score(self, X, y):
     """Returns a score of how well the classifier is trained to predict the data."""
     return acc(y, self.trainer.testOnClassData(dataset=convert_to_pybrain_dataset(X,y)))
        classifiers = [
            adaboost.train(Xt1, Yt1),
            extra_randomized_trees.train(Xt1, Yt1),
            gradient_boost.train(Xt1, Yt1),
            random_forest.train(Xt1, Yt1),
            logistic_regression.train(Xt1, Yt1),
            ]

        # Train another classifier on the ensembles output training predictions
        # for each sample in the training data
        training_predictions = np.mat([[c.predict(sample)[0] for c in classifiers] for sample in Xt1])

        meta_classifier = logistic_regression.train(training_predictions, Yt1)

        # Check results on training data
        print "Accuracy for individual classifiers:", [acc(Yt2, c.predict(Xt2)) for c in classifiers]
        predictions = np.mat([c.predict(Xt2) for c in classifiers]).transpose()
        print "Accuracy for ensemble classifier:", acc(Yt2, meta_classifier.predict(predictions))

    else:
        # Now, we train each classifier on the training data
        classifiers = [
            adaboost.train(Xt, Yt),
            extra_randomized_trees.train(Xt, Yt),
            gradient_boost.train(Xt, Yt),
            random_forest.train(Xt, Yt),
            logistic_regression.train(Xt, Yt),
            ]
        # Train another classifier on the ensembles output training predictions
        # for each sample in the training data
        training_predictions = np.mat([[c.predict(sample)[0] for c in classifiers] for sample in Xt])
from sklearn import tree

def classify(Xtrain, Ytrain):
    """ Use entirety of provided X, Y to predict

    Arguments
    Xtrain -- Training data
    Ytrain -- Training prediction

    Returns
    ready_tree -- a tree fitted to Xtrain and Ytrain
    """
    ready_tree = tree.DecisionTreeClassifier()
    ready_tree.fit(Xtrain, Ytrain)
    return ready_tree

if __name__ == "__main__":
    # Let's take our training data and train a decision tree
    # on a subset. Scikit-learn provides a good module for cross-
    # validation.

    if len(sys.argv) < 2:
        print "Usage: $ python decision-tree.py /path/to/data/file/"
    else:
        training = sys.argv[1]
        X,Y,n,f = load_data(training)
        Xt, Xv, Yt, Yv = shuffle_split(X,Y)
        tree = classify(Xt, Yt)
        print "Decision Tree Accuracy:",acc(Yv, tree.predict(Xv)),"%"