예제 #1
0
def train(snapshotroot, ensembleType, numTrees, depth, seed=0):
    xtrain, ytrain, xtest, ytest = datasets.load_har()

    # Labels
    ytrain = ytrain.astype(np.int32)
    ytest = ytest.astype(np.int32)

    xtrain, ytrain, xval, yval = balanced_shuffle(xtrain, ytrain, 5514)

    metric = "mlogloss"

    earlyStop = max(1, int(0.1 * numTrees))

    clf = ensembleType(max_depth=depth,
                       use_label_encoder=False,
                       tree_method="exact",
                       n_estimators=numTrees,
                       random_state=seed)
    clf.fit(xtrain,
            ytrain,
            eval_set=[(xtrain, ytrain), (xval, yval)],
            eval_metric=metric,
            verbose=False,
            early_stopping_rounds=earlyStop)

    print(
        f"best iteration = {clf.best_iteration}, best_score = {clf.best_score}, best_ntree_limit = {clf.best_ntree_limit}"
    )

    results = clf.evals_result()
    ypred = clf.predict(xtest)

    acc = (ypred == ytest).mean()

    return acc, np.array(results["validation_1"][metric])
예제 #2
0
def train(snapshotroot, ensembleType, numTrees, depth, seed=0):
    xtrain, ytrain, xtest, ytest = datasets.load_har()
    
    # Labels
    ytrain = ytrain.astype(np.int32)
    ytest = ytest.astype(np.int32)

    clf = ensembleType(random_state=seed, n_estimators=numTrees, max_features="sqrt", max_depth=depth)
    clf.fit(xtrain, ytrain)

    acc = clf.score(xtest, ytest)

    return acc
예제 #3
0
파일: n2d.py 프로젝트: snazari/n2d
    optimizer = 'adam'
    from datasets import load_mnist, load_mnist_test, load_usps, load_pendigits, load_fashion, load_har

    label_names = None
    if args.dataset == 'mnist':
        x, y = load_mnist()
    elif args.dataset == 'mnist-test':
        x, y = load_mnist_test()
    elif args.dataset == 'usps':
        x, y = load_usps()
    elif args.dataset == 'pendigits':
        x, y = load_pendigits()
    elif args.dataset == 'fashion':
        x, y, label_names = load_fashion()
    elif args.dataset == 'har':
        x, y, label_names = load_har()

    shape = [x.shape[-1], 500, 500, 2000, args.n_clusters]
    autoencoder = autoencoder(shape)

    hidden = autoencoder.get_layer(name='encoder_%d' % (len(shape) - 2)).output
    encoder = Model(inputs=autoencoder.input, outputs=hidden)

    pretrain_time = time()

    # Pretrain autoencoders before clustering
    if args.ae_weights is None:
        autoencoder.compile(loss='mse', optimizer=optimizer)
        batch_size = 256
        autoencoder.fit(
            x,
예제 #4
0
def train(snapshotroot, device, forestType, numTrees, depth):
    xtrain, ytrain, xtest, ytest = datasets.load_har()

    # Labels
    ytrain = ytrain.astype(np.int32)
    ytest = ytest.astype(np.int32)

    xtrain, ytrain, xval, yval = balanced_shuffle(xtrain, ytrain, 5514)

    net = Net(forestType, numTrees, depth).to(device)
    criterion = nn.CrossEntropyLoss().to(device)

    # Transfer this data to the device
    xtrain = torch.from_numpy(xtrain).type(torch.float32).to(device)
    ytrain = torch.from_numpy(ytrain).type(torch.long).to(device)
    xval = torch.from_numpy(xval).type(torch.float32).to(device)
    yval = torch.from_numpy(yval).type(torch.long).to(device)
    xtest = torch.from_numpy(xtest).type(torch.float32).to(device)
    ytest = torch.from_numpy(ytest).type(torch.long).to(device)

    optimizer = optim.Adam(net.parameters(), lr=0.001)

    numEpochs = 500
    #batchSize=50
    batchSize = 500

    indices = [i for i in range(xtrain.shape[0])]

    bestEpoch = numEpochs - 1
    bestLoss = 1000.0

    valLosses = np.zeros([numEpochs])

    for epoch in range(numEpochs):
        random.shuffle(indices)

        xtrain = xtrain[indices, :]
        ytrain = ytrain[indices]

        runningLoss = 0.0
        count = 0
        for xbatch, ybatch in batches(xtrain, ytrain, batchSize):
            optimizer.zero_grad()

            outputs = net(xbatch)
            loss = criterion(outputs, ybatch)

            loss.backward()

            optimizer.step()

            runningLoss += loss
            count += 1

        meanLoss = runningLoss / count

        snapshotFile = os.path.join(snapshotroot, f"epoch_{epoch}")
        torch.save(net.state_dict(), snapshotFile)

        runningLoss = 0.0
        count = 0

        with torch.no_grad():
            net.train(False)
            #for xbatch, ybatch in batches(xval, yval, batchSize):
            for xbatch, ybatch in zip([xval], [yval]):
                outputs = net(xbatch)
                loss = criterion(outputs, ybatch)

                runningLoss += loss
                count += 1

            net.train(True)

        valLoss = runningLoss / count

        if valLoss < bestLoss:
            bestLoss = valLoss
            bestEpoch = epoch

        valLosses[epoch] = valLoss

        #print(f"Info: epoch = {epoch}, loss = {meanLoss}, validation loss = {valLoss}")

    snapshotFile = os.path.join(snapshotroot, f"epoch_{bestEpoch}")

    net = Net(forestType, numTrees, depth)
    net.load_state_dict(torch.load(snapshotFile, map_location="cpu"))
    net = net.to(device)

    totalCorrect = 0
    count = 0

    with torch.no_grad():
        net.train(False)
        #for xbatch, ybatch in batches(xtest, ytest, batchSize):
        for xbatch, ybatch in zip([xtest], [ytest]):
            outputs = net(xbatch)
            outputs = torch.argmax(outputs, dim=1)

            tmpCorrect = torch.sum(outputs == ybatch)

            totalCorrect += tmpCorrect
            count += xbatch.shape[0]

    accuracy = float(totalCorrect) / float(count)
    print(
        f"Info: Best epoch = {bestEpoch}, test accuracy = {accuracy}, misclassification rate = {1.0 - accuracy}",
        flush=True)

    return accuracy, valLosses
예제 #5
0
    optimizer = 'adam'
    from datasets import load_mnist, load_mnist_test, load_usps, load_pendigits, load_fashion, load_har

    if args.dataset == 'mnist':
        x, y = load_mnist()
    elif args.dataset == 'mnist-test':
        x, y = load_mnist_test()
    elif args.dataset == 'usps':
        x, y = load_usps('data/usps')
    elif args.dataset == 'pendigits':
        x, y = load_pendigits('data/pendigits')
    elif args.dataset == 'fashion':
        x, y = load_fashion()
    elif args.dataset == 'har':
        x, y = load_har()

    shape = [x.shape[-1], 500, 500, 2000, args.n_clusters]
    autoencoder = autoencoder(shape)

    hidden = autoencoder.get_layer(name='encoder_%d' % (len(shape) - 2)).output
    encoder = Model(inputs=autoencoder.input, outputs=hidden)

    pretrain_time = time()

    # Pretrain autoencoders before clustering
    if args.ae_weights is None:
        autoencoder.compile(loss='mse', optimizer=optimizer)
        batch_size = 256
        autoencoder.fit(x,
                        x,
예제 #6
0
파일: main.py 프로젝트: rymc/N2D-OOP
import n2d as nd
import random as rn
import numpy as np

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use(['seaborn-white', 'seaborn-paper'])
sns.set_context("paper", font_scale=1.3)
matplotlib.use('agg')

import tensorflow as tf
from keras import backend as K

import datasets as data
x, y, y_names = data.load_har()

n_clusters = 6
harcluster = nd.n2d(x, nclust=n_clusters)

harcluster.preTrainEncoder(weights="har-1000-ae_weights.h5")

manifoldGMM = nd.UmapGMM(n_clusters)

harcluster.predict(manifoldGMM)

harcluster.visualize(y, y_names, dataset="har", nclust=n_clusters)
print(harcluster.assess(y))

from sklearn.cluster import SpectralClustering
import umap