コード例 #1
0
def train():
    
    try:
        train_data=utilities.get_data(TRAIN_PATH)
        test_data=utilities.get_data(TEST_PATH)
    except Exception as e:
        print(e)
        num_api = numerapi.NumerAPI(PUBLIC_KEY, SECRET_GUY,verbosity="info")
        num_api.download_current_dataset(dest_path='../data/')
        feature_names=utilities.get_feature_names(TRAIN_PATH)
        train_data=utilities.get_data(TRAIN_PATH)
        test_data=utilities.get_data(TEST_PATH)

    feature_names=utilities.get_feature_names(train_data)
    x_train=train_data[feature_names]
    x_test=test_data[feature_names]
    #call autoencoder for dimensionality reduction
    ae=AutoEncoder(x_train.shape,N_COMPONENTS)
    model=ae.build()
    model.compile(optimizer=OPT, loss=LOSS)
    history=model.fit(x_train, x_train, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=2, validation_data=(x_test,x_test))
    
    #get the autoencoder representation
    x_train_ae = model.predict(x_train)
    x_test_ae = model.predict(x_test)

    #corrupt dataset using gaussian noise
    #mu,sigma=0,0.1
    #noise=np.random.normal(mu,sigma,x_train_pca.shape)
    #x_train_pca_noise=x_train_pca+noise

    #train an LGBMRegressor model - use random search for parameter tuning
    #with cross validation
    lgb=LGBMRegressor()
    lgb_randomsearch=RandomizedSearchCV(estimator=lgb,cv=CV,param_distributions=params, n_iter=100)
    lgb_model=lgb_randomsearch.fit(x_train_ae[:100],train_data['target'][:100])
    lgb_model_best=lgb_model.best_estimator_
    lgb_model_best=lgb_model_best.fit(x_train_ae[:100],train_data['target'][:100])
    
    print("Generating all predictions...")
    train_data['prediction'] = lgb_model.predict(x_train_ae)
    test_data['prediction'] = lgb_model.predict(x_test_ae)

    train_corrs = (evaluation.per_era_score(train_data))
    print('train correlations mean: {}, std: {}'.format(train_corrs.mean(), train_corrs.std(ddof=0)))
    #print('avg per-era payout: {}'.format(evaluation.payout(train_corrs).mean()))

    valid_data = test_data[test_data.data_type == 'validation']
    valid_corrs = evaluation.per_era_score(valid_data)
    #valid_sharpe = evaluation.sharpe(valid_data)
    print('valid correlations mean: {}, std: {}'.format(valid_corrs.mean(), valid_corrs.std(ddof=0)))
    #print('avg per-era payout {}'.format(evaluation.payout(valid_corrs.mean())))
    #print('valid sharpe: {}'.format(valid_sharpe))

    #live_data = test_data[test_data.data_type == "test"]
    #live_corrs = evaluation.per_era_score(test_data)
    #test_sharpe = evaluation.sharpe(test_data)
    #print('live correlations - mean: {}, std: {}'.format(live_corrs.mean(),live_corrs.std(ddof=0)))
    #print('avg per-era payout is {}'.format(evaluation.payout(live_corrs).mean()))
    #print('live Sharpe: {}'.format(test_sharpe))
    
    #pickle and save the model
    with open('lgbm_model_round_253.pkl', 'wb') as f:
        pickle.dump(lgb_model,f)

    #save down predictions
    valid_corrs.to_csv('valid_predictions.csv')
コード例 #2
0
    def __init__(self, **args):

        args = Namespace(**args)

        self.toolbox = base.Toolbox()

        self.stats = tools.Statistics(key=lambda ind: ind.fitness.values[0])
        self.stats.register("avg", np.mean)
        self.stats.register("std", np.std)
        self.stats.register("min", np.min)
        self.stats.register("max", np.max)

        # if not pool:
        #     self.map_func = map
        # else:
        #     self.map_func = pool.map
        self.map_func = map

        if not hasattr(args, 'x'):
            raise ValueError(
                "variable 'x' must be given as numpy array of shape (n x N)")
        else:
            x = args.x

        if not hasattr(args, 'num_features'):
            raise ValueError("variable 'num_features' must be given")
        else:
            num_features = args.num_features

        if not hasattr(args, 'mu'):
            args.mu = 0.5

        if not hasattr(args, 'sigma'):
            args.sigma = 0.5

        if not hasattr(args, 'alpha'):
            args.alpha = 0.9

        if not hasattr(args, 'indpb'):
            args.indpb = 0.1

        if not hasattr(args, 'tournsize'):
            args.tournsize = 2

        if not hasattr(args, 'debug'):
            self.debug = 0
        else:
            self.debug = args.debug

        if not hasattr(args, 'pop_size'):
            self.pop_size = 300
        else:
            self.pop_size = args.pop_size

        if not hasattr(args, 'number_generations'):
            self.num_gen = 100
        else:
            self.num_gen = args.number_generations

        if not hasattr(args, 'cxpb'):
            self.cxpb = 0.9
        else:
            self.cxpb = args.cxpb

        if not hasattr(args, 'mutpb'):
            self.mutpb = 0.1
        else:
            self.mutpb = args.mutpb

        self.ae = AutoEncoder(x, num_features, random_seed=1234, use_gpu=True)
        self.w_shape = (x.shape[0], num_features)

        # Set up ways to define individuals in the population
        self.toolbox.register("attr_x", np.random.normal, 0, 1)
        self.toolbox.register("individual", tools.initRepeat,
                              creator.Individual, self.toolbox.attr_x,
                              num_features * x.shape[0])
        self.toolbox.register("population", tools.initRepeat, list,
                              self.toolbox.individual)

        # Set up ways to change population
        self.toolbox.register("mate", tools.cxBlend, alpha=args.alpha)
        self.toolbox.register("mutate",
                              tools.mutGaussian,
                              mu=args.mu,
                              sigma=args.sigma,
                              indpb=args.indpb)
        self.toolbox.register("select",
                              tools.selTournament,
                              tournsize=args.tournsize)
コード例 #3
0
ファイル: mnist_dem.py プロジェクト: hengyuan-hu/dem

if __name__ == '__main__':
    import keras.backend as K
    import numpy as np
    from autoencoder import AutoEncoder
    from dataset_wrapper import MnistWrapper
    import mnist_ae
    import dem_trainer
    import hmc
    import utils

    sess = utils.create_session()
    K.set_session(sess)
    dataset = MnistWrapper.load_default()
    ae = AutoEncoder(dataset, mnist_ae.encode, mnist_ae.decode,
                     mnist_ae.RELU_MAX, 'test/mnist_dem/ae')
    ae.build_models('test/mnist_dem/ae') # load weights

    l1_weights = ae.encoder.layers[1].get_weights()
    print 'l1 weights sum: %s, bias sum: %s' % (
        l1_weights[0].sum(), l1_weights[1].sum())

    train_autoencoder = False
    if train_autoencoder:
        num_epoch = 10
        lr_schedule = utils.generate_decay_lr_schedule(num_epoch, 0.1, 1)
        ae.train(128, num_epoch, lr_schedule)
        ae.save_models()
        ae.test_models(utils.vis_mnist)
        ae.log()
コード例 #4
0
def startLearning(bs, me, f, p, l):
    #Init Tensorboard
    writer = SummaryWriter()
    batch_size = bs
    max_epochs = me
    factor = f
    patience = p
    lr = l
    #Define batch size the number of epoch

    print("load dataset")
    #Load Dataset
    training_loader = torch.utils.data.DataLoader(dataset=Dataset(
        'training_dataset_pack.h5', "std_training.png", "mean_training.png"),
                                                  batch_size=batch_size,
                                                  shuffle=True,
                                                  num_workers=0)
    validation_loader = torch.utils.data.DataLoader(dataset=Dataset(
        'validation_dataset_pack.h5', "std_validation.png",
        "mean_validation.png"),
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    num_workers=0)

    print("Done")

    #Make model
    model = AutoEncoder(training_loader.dataset.getInputSize()).cuda()

    #Define loss type
    criterion_expressions = nn.CrossEntropyLoss().cuda()
    criterion_landmarks = nn.MSELoss().cuda()

    #Define the optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999))

    #Define the scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode='min',
                                                           factor=factor,
                                                           patience=patience)

    best_loss = None

    #Main loop (epoch)
    for epoch in range(1, max_epochs + 1):

        is_best = False
        print("Training...")
        #Init progress bar for training
        pbart = tqdm.tqdm(total=int(len(training_loader.dataset) / batch_size),
                          postfix={
                              "loss": None,
                              "accuracy": None
                          },
                          desc="Epoch: {}/{}".format(epoch, max_epochs))

        #Init metrics
        loss = 0.
        loss_lm = 0.
        loss_expr = 0.
        acc = 0.
        val_loss = 0.
        val_acc = 0.
        val_loss_lm = 0.
        val_loss_expr = 0.

        #Training loop
        for i, data in enumerate(training_loader, 0):
            #Zero the parameter gradients
            optimizer.zero_grad()

            #Get the inputs
            images, landmarks, expressions = data
            images = images.to(device)
            landmarks = landmarks.to(device).float()
            expressions = expressions.to(device).long()

            #Get the outputs
            outputs = model(images)

            #Calculate metrics
            #Loss
            loss_landmarks = criterion_landmarks(outputs[0], landmarks.float())
            loss_expressions = criterion_expressions(outputs[1], expressions)
            current_loss = loss_landmarks + 0.0001 * loss_expressions
            loss_expr += loss_expressions.item()
            loss_lm += loss_landmarks.item()
            loss += current_loss.item()
            #Accuracy
            _, predicted_expressions = torch.max(outputs[1], 1)
            acc += (predicted_expressions
                    == expressions).sum().float() / batch_size

            #Backpropagation
            current_loss.backward()

            #Reduce learning rate if we are on a plateu
            optimizer.step()

            #Update
            pbart.update(1)
            pbart.set_postfix({
                "loss": loss / (i + 1),
                "e_loss": loss_expr / (i + 1),
                "l_loss": loss_lm / (i + 1),
                "acc_e": acc.item() / (i + 1)
            })
        pbart.close()

        #Calculate metrics on one epoch
        loss /= (len(training_loader.dataset) / batch_size)
        acc /= (len(training_loader.dataset) / batch_size)

        #Save metrics in a log file
        with open("log/training_hourglass5.0.4.log", "a") as f:
            f.write(
                "epoch: {} / {} loss: {} e_loss:{} l_loss: {} accuracy: {}\n".
                format(epoch, max_epochs, loss, loss_expr, loss_lm, acc))
        f.close()

        print("Validation...")
        #Init progress bar for validation
        pbarv = tqdm.tqdm(total=int(
            len(validation_loader.dataset) / batch_size),
                          postfix={
                              "loss": None,
                              "accuracy": None
                          },
                          desc="Epoch: {}/{}".format(epoch, max_epochs))

        #Validation loop
        with torch.no_grad():
            for i, data in enumerate(validation_loader, 0):
                #Get the inputs
                images, landmarks, expressions = data
                images = images.to(device)
                landmarks = landmarks.to(device).float()
                expressions = expressions.to(device).long()

                #Get the outputs
                outputs = model(images)

                #Calculate metrics
                #Loss
                loss_landmarks = criterion_landmarks(outputs[0],
                                                     landmarks.float())
                loss_expressions = criterion_expressions(
                    outputs[1], expressions)
                loss = loss_landmarks + 0.0001 * loss_expressions
                val_loss += loss.item()
                val_loss_expr += loss_expressions.item()
                val_loss_lm += loss_landmarks.item()

                #Accuracy
                _, predicted_expressions = torch.max(outputs[1], 1)
                val_acc += (predicted_expressions
                            == expressions).sum().float() / batch_size

                #Uptate validation progress bar
                pbarv.update(1)
                pbarv.set_postfix({
                    "loss": val_loss / (i + 1),
                    "e_loss": val_loss_expr / (i + 1),
                    "l_loss": val_loss_lm / (i + 1),
                    "acc_e": val_acc.item() / (i + 1)
                })

        pbarv.close()

        #Calculate metrics on one epoch
        val_loss /= (len(validation_loader.dataset) / batch_size)
        val_acc /= (len(validation_loader.dataset) / batch_size)

        #Save the weights of the model
        if best_loss == None or val_loss < best_loss:
            best_loss = val_loss
            is_best = True
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'loss': val_loss,
                'loss_expressions': val_loss_expr,
                'loss_landmarks': val_loss_lm,
                'accuracy': val_acc,
                'optimizer': optimizer.state_dict()
            }, is_best, "save_model/checkpoint_hourglass5.0.4.pth",
            "save_model/best_model_validation5.0.4.pth")
        is_best = False

        scheduler.step(val_loss)

        #Save metrics in a log file
        with open("log/validation_hourglass5.0.4.log", "a") as f:
            f.write(
                "epoch: {} / {} loss: {} e_loss:{} l_loss: {} accuracy: {}\n".
                format(epoch, max_epochs, val_loss, val_loss_expr, val_loss_lm,
                       val_acc))
        f.close()

        #Construct tensorboard graph
        writer.add_scalar('data/Loss training', loss, epoch)
        writer.add_scalar('data/Loss landmarks training', loss_lm, epoch)
        writer.add_scalar('data/Loss expressions training', loss_expr, epoch)

        writer.add_scalar('data/Loss validation', val_loss, epoch)
        writer.add_scalar('data/Loss landmarks validation', val_loss_lm, epoch)
        writer.add_scalar('data/Loss expressions validation', val_loss_expr,
                          epoch)

        writer.add_scalar('data/Accuracy training', acc, epoch)
        writer.add_scalar('data/Accuracy validation', val_acc, epoch)

        if (epoch % 5 == 1 or epoch == max_epochs):
            desc = dict()
            desc["bs"] = batch_size
            desc["lr"] = lr
            desc["f"] = factor
            desc["p"] = patience
            desc["d"] = 0
            desc["weights"] = [1, 0.0001]
            desc["epoch"] = epoch
            desc["nbepochs"] = max_epochs

            try:
                send.sendInfos("4 (Hourglass 5.0.4)", desc, loss, acc, loss_lm,
                               "...", loss_expr, "...", val_loss, val_acc,
                               val_loss_lm, "...", val_loss_expr, val_acc)
            except Exception as e:
                pass
コード例 #5
0
dataloader = DataLoader(unlabeled_dataset,
                        batch_size=batch_size,
                        shuffle=True,
                        num_workers=num_workers,
                        pin_memory=True)

# Define model with hyperparameters
if model_architecture == "Res34":
    model = AutoEncoder_ResEncoder(n_channels=n_channels,
                                   n_decoder_filters=[64, 32, 16],
                                   trainable=True).to(device)
    model.apply(AutoEncoder_ResEncoder.init_weights
                )  # initialize model parameters with normal distribution
else:  # noRes
    model = AutoEncoder(n_channels=n_channels,
                        n_encoder_filters=[32, 64, 64, 16],
                        n_decoder_filters=[64, 64, 32],
                        trainable=False).to(device)
    model.apply(AutoEncoder.init_weights)  # Initialize weights

if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)
print("Now using", torch.cuda.device_count(), "GPU(s) \n")
model.to(device)

# Select loss
if loss_type == "MSE":
    criterion = nn.MSELoss().to(device)
elif loss_type == "L1":
    criterion = nn.L1Loss().to(device)
else:  # loss_type == "SSIM":
    criterion = SSIM_Loss(data_range=1.0).to(device)
コード例 #6
0
num_epochs = 100
batch_size = 20
learning_rate = 1e-3

if args.fit:

    print("Autoencoder ccs item handler has started...")
    mv = MovieLens()
    mv.create_cold_start_items(n_ratings_threshold=5)

    dataloader = DataLoader(mv,
                            batch_size=batch_size,
                            shuffle=True,
                            drop_last=True)
    model = AutoEncoder(input_dim=21, latent_dim=5)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=1e-5)
    AutoEncoder.fit(model, num_epochs, dataloader, criterion, optimizer)

    for ccs_item in tqdm(mv.ccs_items()):
        print('ccs item:', ccs_item)
        while mv.is_ccs(ccs_item):
            u = mv.pick_random_user()
            print('user:', u)
            rated_ncs_items_by_u = mv.rated_ncs_items(u)
            u_rated_latents = [
                model.encode(mv.features(m)) for m in rated_ncs_items_by_u
            ]
コード例 #7
0
from dataset_wrapper import Cifar10Wrapper
import keras.backend as K


def compare_dataset():
    d1 = Cifar10Wrapper.load_from_h5('prod/test_relu6/encoded_cifar10.h5')
    d2 = Cifar10Wrapper.load_from_h5(
        'prod/cifar10_ae2_relu_6/encoded_cifar10.h5')

    return d1, d2


if __name__ == '__main__':
    K.set_session(utils.create_session())
    cifar10_dataset = Cifar10Wrapper.load_default()

    folder = 'prod/test_relu6'
    ae = AutoEncoder(cifar10_dataset, encode, decode, RELU_MAX, folder)
    ae.build_models(folder)  # load previously trained ae

    # num_epoch = 2
    # lr_schedule = utils.generate_decay_lr_schedule(num_epoch, 0.1, 1)
    # ae.train(128, num_epoch, lr_schedule)
    # ae.save_models()
    # ae.test_models(utils.vis_cifar10)
    # ae.log()

    encoded_dataset = ae.encode(Cifar10Wrapper)
    # encoded_dataset.dump_to_h5(os.path.join(folder, 'encoded_cifar10.h5'))
    # encoded_dataset.plot_data_dist(os.path.join(folder, 'encoded_plot.png'))
コード例 #8
0
ファイル: stl_ae.py プロジェクト: hengyuan-hu/dem
    return x


if __name__ == '__main__':
    import keras.backend as K
    from autoencoder import AutoEncoder
    from dataset_wrapper import STL10Wrapper
    import utils

    K.set_session(utils.create_session())
    stl10_dataset = STL10Wrapper.load_from_h5('data/stl10.h5')

    # ----------normal relu pretraining----------
    print 'Training model with normal relu'
    folder = 'prod/stl10_ae_%d_inf' % LATENT_DIM
    ae = AutoEncoder(stl10_dataset, encode, decode, None, folder)
    ae.build_models()

    num_epoch = 150
    lr_schedule = utils.generate_decay_lr_schedule(num_epoch, 0.1, 1)
    ae.train(128, num_epoch, lr_schedule)
    ae.save_models()
    ae.test_models(utils.vis_stl10)
    ae.log()

    encoded_dataset = ae.encode(STL10Wrapper)
    encoded_dataset.dump_to_h5(os.path.join(folder, 'encoded_stl10.h5'))
    # encoded_dataset.plot_data_dist(os.path.join(folder, 'encoded_plot.png'))

    # ----------truncate relu and fine-tune----------
    print 'Training model with relu-%d' % RELU_MAX
コード例 #9
0
    return img


def to_img(img):
    '''
    Re-normalise the image
    '''
    mean = [114, 108, 100]
    std = [46, 52, 51]
    img = img * std + mean
    return img


CUDA = torch.cuda.is_available()

model = AutoEncoder()

if args.load_model == 1:
    model.load_state_dict(torch.load('conv_autoencoder_weight.pt'))
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.learning_rate,
                                 weight_decay=1e-5)
    optimizer.load_state_dict(torch.load('conv_autoencoder_optimizer.pt'))

if CUDA:
    model = AutoEncoder().cuda()
criterion = nn.MSELoss()
if args.load_model == 0:
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=1e-5)
コード例 #10
0
from autoencoder import AutoEncoder 
import torch	
ae1 = AutoEncoder(10, 2)
ae2 = AutoEncoder(5, 2)
def convert(tensor_from_layer1, tensor_from_layer2, tensor_labels):
	'''This function aims to convert tensors to list (single data points)'''
	a1 = []
	a2 = []
	b1 = []
	b2 = []
	all_lst1 = []
	all_lst2 = []
	each_label = []
	for tensor1 in tensor_from_layer1:
		x_out1 = ae1.encode(tensor1)
		a1.append(x_out1.detach().numpy()[0][0])
		b1.append(x_out1.detach().numpy()[0][1])
		all_lst1.append([x_out1.detach().numpy()[0][0], x_out1.detach().numpy()[0][1]])
	
	for tensor2 in tensor_from_layer2:
		x_out2 = ae2.encode(tensor2)
		a2.append(x_out2.detach().numpy()[0][0])
		b2.append(x_out2.detach().numpy()[0][1])
		all_lst2.append([x_out2.detach().numpy()[0][0], x_out2.detach().numpy()[0][1]])

	for tensor_l in tensor_labels:
		each_label.append(tensor_l.numpy()[0][0])

	return a1, b1, all_lst1, a2, b2, all_lst2, each_label

コード例 #11
0
QueryName = [os.path.split(img_path)[1] for img_path in QueryName]
GalleryName = [os.path.split(img_path)[1] for img_path in GalleryName]

# Normalize all images
print("Normalizing query images")
QueryImgs = normalize_img(QueryImgs)
print("Normalizing gallery images")
GalleryImgs = normalize_img(GalleryImgs)

if args.model == 'convAE':

    # Build models
    autoencoderFile = os.path.join(OutputDir, "ConvAE_autoecoder.h5")
    print("autoencoder file", autoencoderFile)
    encoderFile = os.path.join(OutputDir, "ConvAE_encoder.h5")
    model = AutoEncoder(shape_img, autoencoderFile, encoderFile)
    model.set_arch()

    input_shape_model = tuple([int(x) for x in model.encoder.input.shape[1:]])
    output_shape_model = tuple(
        [int(x) for x in model.encoder.output.shape[1:]])

    # Loading model
    model.load_models(loss='mse', optimizer="adam")

    # Convert images to numpy array of right dimensions
    print("\nConverting to numpy array of right dimensions")
    X_query = np.array(QueryImgs).reshape((-1, ) + input_shape_model)
    X_gallery = np.array(GalleryImgs).reshape((-1, ) + input_shape_model)
    print(">>> X_query.shape = " + str(X_query.shape))
    print(">>> X_gallery.shape = " + str(X_gallery.shape))
コード例 #12
0
def makePlot(X_raw, Xs, ys, idx=None):
    print("# Xs: {}".format(Xs.shape))

    if exp_name.startswith("FLOW"):
        model = NICEModel(input_dim=Xs.shape[1] // 2,
                          hidden_sizes=args['hidden_sizes'],
                          device=device).to(device)
    elif exp_name.startswith("AE"):
        model = AutoEncoder(input_dim=Xs.shape[1] // 2,
                            hidden_sizes=args['hidden_sizes'],
                            latent_dim=args['latent_dim']).to(device)
    elif exp_name.startswith("VAE"):
        model = VariationalAutoEncoder(input_dim=Xs.shape[1] // 2,
                                       hidden_sizes=args['hidden_sizes'],
                                       latent_dim=args['latent_dim'],
                                       device=device).to(device)
    else:
        assert (False)
    model.load_state_dict(torch.load(checkpoint_path, map_location=device))

    y_truths = ys
    Xs = Variable(torch.Tensor(Xs), requires_grad=False).to(device)
    print("Xs: {}".format(Xs.shape))

    y_preds = model.predict(Xs).detach().numpy()

    plt.clf()
    min_val = 1e30
    max_val = 0
    # colors = [
    #         'b', 'y', 'g', 'grey', 'orange',
    #         'm', 'k', 'hotpink', 'deepskyblue', 'lime',
    #         'olive', 'sienna', 'tan', 'rosybrown', 'darkred',
    # ]
    if len(X_raw.shape) > 1:
        plt.plot([], [], 'k-', linewidth='0.3', label='feature')
        for k in range(min(14, X_raw.shape[1])):
            offset = 400 * k
            plt.plot(range(X_raw.shape[0]),
                     X_raw[:, k] + offset,
                     'k-',
                     linewidth='0.3')
            mi, ma = min(X_raw[:, k] + offset), max(X_raw[:, k] + offset)
            min_val = min(min_val, mi)
            max_val = max(max_val, ma)
    else:
        plt.plot(range(X_raw.shape[0]), X_raw, 'b-', label="feature")
        min_val, max_val = min(X_raw), max(X_raw)

    prefix = 0  # args['window_size'] - 1
    truthLB = (2 * min_val + max_val) / 3
    truthUB = (-min_val + 9 * max_val) / 8
    plt.plot([], [], 'r--', label="truth")
    for i in range(y_truths.shape[0]):
        if np.sum(y_truths[i]) > 0:
            plt.plot([prefix + i, prefix + i], [truthLB, truthUB],
                     'r--',
                     linewidth='0.5')

    print("# threshold: %.6lf" % (threshold))
    y_preds = y_preds.reshape([-1])
    for i in range(y_truths.shape[0]):
        if y_truths[i, 0] > 0:
            exist = np.sum(y_preds[max(0, i - tau):i + tau + 1])
            if exist > 0:
                y_preds[i - tau:i + tau + 1] *= 0
                y_preds[i] = 1
                # print("# cleaning {} to {}: {}".format(
                #     i - tau, i + tau, np.sum(y_preds[i - tau:i + tau + 1])))
    for i in range(y_preds.shape[0]):
        if y_preds[i] >= threshold and np.sum(
                y_truths[i - tau:i + tau + 1]) == 0:
            pre = [
                k for k in range(max(0, i - tau), i) if y_preds[k] >= threshold
            ]
            if len(pre) > 1:
                y_preds[i] = 0

    predLB = (9 * min_val - max_val) / 8
    predUB = (min_val + 2 * max_val) / 3
    print("y_preds: {}".format(y_preds.shape))
    plt.plot([], [], 'c--', label="prediction")
    tot = 0
    for i in range(y_preds.shape[0]):
        if y_preds[i] >= threshold:
            y_preds[i] = 1
            tot += 1
            plt.plot([prefix + i, prefix + i], [predLB, predUB],
                     'c--',
                     linewidth='0.5')
    print("# tot = {}".format(tot))

    plt.xlabel('time')
    plt.ylabel('amplitude')
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), ncol=4)
    plt.title("{}-{}".format(data_name.upper(), exp_name.split("-")[0]))
    filename = "img/visualization-{}-{}{}.eps".format(
        data_name,
        exp_name.split("-")[0], "" if idx is None else "-{}".format(idx))
    plt.savefig(filename, bbox_inches='tight')

    # save preds, truths, and Xraw
    idxStr = "-{}".format(idx) if idx is not None else ""
    np.save("./log/{}-Xraw{}.npy".format(data_name, idxStr), X_raw)
    np.save("./log/{}-truths{}.npy".format(data_name, idxStr), y_truths)
    np.save(
        "./log/{}-preds-{}{}.npy".format(data_name,
                                         exp_name.split("-")[0], idxStr),
        y_preds)
コード例 #13
0
ファイル: run.py プロジェクト: wangzhupi/kuangjia021tinynn
def main(args):
    if args.seed >= 0:
        random_seed(args.seed)

    # create output directory for saving result images
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    # prepare and read dataset
    train_set, _, test_set = mnist(args.data_dir)
    train_x, train_y = train_set
    test_x, test_y = test_set

    # specify the encoder and decoder net structure
    encoder_net = Net([
        Dense(256),
        ReLU(),
        Dense(64)
    ])
    decoder_net = Net([
        ReLU(),
        Dense(256),
        Tanh(),
        Dense(784),
        Tanh()
    ])
    nets = (encoder_net, decoder_net)
    optimizers = (Adam(args.lr), Adam(args.lr))
    model = AutoEncoder(nets, loss=MSE(), optimizer=optimizers)

    # for pre-trained model, test generated images from latent space
    if args.load_model is not None:
        # load pre-trained model
        model.load(os.path.join(args.output_dir, args.load_model))
        print("Loaded model fom %s" % args.load_model)

        # transition from test[from_idx] to test[to_idx] in n steps
        idx_arr, n = [2, 4, 32, 12, 82], 160
        print("Transition in numbers", [test_y[i] for i in idx_arr],
              "in %d steps ..." % n)
        stops = [model.en_net.forward(test_x[i]) for i in idx_arr]
        k = int(n / (len(idx_arr) - 1))  # number of code per transition
        # generate all transition codes
        code_arr = []
        for i in range(len(stops) - 1):
            t = [c.copy() for c in transition(stops[i], stops[i+1], k)]
            code_arr += t
        # apply decoding all n "code" from latent space...
        batch = None
        for code in code_arr:
            # translate latent space to image
            genn = model.de_net.forward(code)
            # save decoded results in a batch
            if batch is None:
                batch = np.array(genn)
            else:
                batch = np.concatenate((batch, genn))
        output_path = os.path.join(args.output_dir, "genn-latent.png")
        save_batch_as_images(output_path, batch)
        quit()

    # train the auto-encoder
    iterator = BatchIterator(batch_size=args.batch_size)
    for epoch in range(args.num_ep):
        for batch in iterator(train_x, train_y):
            origin_in = batch.inputs

            # make noisy inputs
            m = origin_in.shape[0]  # batch size
            mu = args.gaussian_mean  # mean
            sigma = args.gaussian_std  # standard deviation
            noises = np.random.normal(mu, sigma, (m, 784))
            noises_in = origin_in + noises  # noisy inputs

            # forward
            genn = model.forward(noises_in)
            # back-propagate
            loss, grads = model.backward(genn, origin_in)

            # apply gradients
            model.apply_grads(grads)
        print("Epoch: %d Loss: %.3f" % (epoch, loss))

        # save all the generated images and original inputs for this batch
        noises_in_path = os.path.join(
            args.output_dir, "ep%d-input.png" % epoch)
        genn_path = os.path.join(
            args.output_dir, "ep%d-genn.png" % epoch)
        save_batch_as_images(noises_in_path, noises_in, titles=batch.targets)
        save_batch_as_images(genn_path, genn, titles=batch.targets)

    # save the model after training
    model.save(os.path.join(args.output_dir, args.save_model))
コード例 #14
0
device = 'cpu'  # predict on CPU

# load data and obtain latent representations z
if data_name == "eeg":
    X_raw, y_raw = load_eeg_raw("{}data/raw/".format(base_path))
elif data_name == "syn":
    X_raw, y_raw = load_one_syn_raw("{}data/raw/".format(base_path), data_idx)
elif data_name == "har":
    X_raw, y_raw = load_har_raw("{}data/raw/".format(base_path))
else:
    assert(False)
print("y_raw: {}".format(y_raw.shape))
X_sliding = sliding_window(X_raw, args["window_size"])
X_variable = Variable(torch.Tensor(X_sliding), requires_grad=False).to(device)
auto_encoder = AutoEncoder(input_dim=X_sliding.shape[1],
                           hidden_sizes=args["hidden_sizes"],
                           latent_dim=args["latent_dim"],
                           ).to(device)
auto_encoder.load_state_dict(torch.load(checkpoint_path, map_location=device))
z = auto_encoder.encode(X_variable).detach().numpy()
print(z.shape)


def find_peaks(z):
    dists = np.sqrt(np.sum(np.diff(z, axis=0) ** 2, axis=1))
    print(dists.shape)

    def mean(xs):
        return sum(xs) * 1. / len(xs)

    # inspect width, i.e. for t we inspect [t-d, t+d]
    d = 50
コード例 #15
0
def train(block=200,
          data_name="bookcorpus",
          downsample=-1,
          dropout_rate=0.2,
          history=None,
          device="cuda:0",
          params=None):
    # version 1 - tfidf as feature
    if data_name == "bookcorpus":
        if history is None:
            x_train, y_train = load_tfidf("train",
                                          block,
                                          verbose=True,
                                          redo=False)
            x_test, y_test = load_tfidf("test",
                                        block,
                                        verbose=True,
                                        redo=False)
            x_valid, y_valid = load_tfidf("valid",
                                          block,
                                          verbose=True,
                                          redo=False)
        else:
            x_train, y_train = load_tfidf_long("train",
                                               block,
                                               verbose=True,
                                               redo=False,
                                               history=history)
            x_test, y_test = load_tfidf_long("test",
                                             block,
                                             verbose=True,
                                             redo=False,
                                             history=history)
            x_valid, y_valid = load_tfidf_long("valid",
                                               block,
                                               verbose=True,
                                               redo=False,
                                               history=history)

    elif data_name == "coda19":
        x_train, y_train = coda_load_tfidf("train",
                                           block,
                                           verbose=True,
                                           redo=False)
        x_test, y_test = coda_load_tfidf("test",
                                         block,
                                         verbose=True,
                                         redo=False)
        x_valid, y_valid = coda_load_tfidf("valid",
                                           block,
                                           verbose=True,
                                           redo=False)
    else:
        print("Not supported yet")
        quit()

    if downsample != -1:
        random_index = np.random.RandomState(5516).permutation(
            x_train.shape[0])[:downsample]
        x_train, y_train = x_train[random_index], y_train[random_index]

    # parameter setting
    vocab_size = x_train.shape[1]
    output_size = y_train.shape[1]
    hidden_size = 512 if params is None else params.hidden_size
    epoch_num = 2000 if params is None else params.epoch_num
    batch_size = 512 if params is None else params.batch_size
    layer_num = 5 if params is None else params.layer_num
    learning_rate = 1e-4 if params is None else params.learning_rate
    early_stop_epoch = 20 if params is None else params.early_stop
    device = device

    if downsample == -1:
        note = f"cosine - auto2 - {dropout_rate}"
    else:
        note = f"cosine - auto2 - {dropout_rate} - downsample"

    # build dataset
    training_dataset = TFIDF_Dataset(x_train, y_train)
    training = data.DataLoader(training_dataset,
                               batch_size=batch_size,
                               shuffle=True,
                               num_workers=2)

    testing_dataset = TFIDF_Dataset(x_test, y_test)
    testing = data.DataLoader(testing_dataset,
                              batch_size=batch_size,
                              shuffle=False,
                              num_workers=2)

    valid_dataset = TFIDF_Dataset(x_valid, y_valid)
    valid = data.DataLoader(valid_dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=2)

    # build model
    model = AutoEncoder(
        vocab_size=vocab_size,
        hidden_size=hidden_size,
        output_size=output_size,
        dropout_rate=dropout_rate,
        device=device,
        layer_num=layer_num,
    ).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    loss_function = lambda y_pred, y_batch: 1 - F.cosine_similarity(
        y_pred, y_batch).mean()

    # first evaluation
    evaluate(model, valid, loss_function=loss_function)

    best_epoch = 0
    best_cosine = 0
    best_model = copy.deepcopy(model.state_dict())
    stopper = EarlyStop(mode="max", history=early_stop_epoch)

    # train model
    for epoch in range(1, epoch_num + 1):
        # train
        model.train()
        total_loss = 0
        total_count = np.ceil(x_train.shape[0] / batch_size)
        total_cosine = 0

        for count, (x_batch, y_batch) in enumerate(training, 1):
            x_batch = x_batch.squeeze()
            y_batch = y_batch.squeeze()

            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)

            optimizer.zero_grad()
            y_pred = model(x_batch)
            loss = loss_function(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

            cosine = F.cosine_similarity(y_batch, y_pred)
            total_cosine += cosine.mean().item()

            print(
                "\x1b[2K\rEpoch: {} / {} [{:.2f}%] Loss: {:.4f} Cosine: {:.4f}"
                .format(epoch, epoch_num, 100.0 * count / total_count,
                        total_loss / count, total_cosine / count),
                end="")
        print()

        # valid
        if epoch % 1 == 0 or epoch == epoch_num:
            cosine, _ = evaluate(model, valid, loss_function=loss_function)
            if cosine > best_cosine:
                best_model = copy.deepcopy(model.state_dict())
                best_epoch = epoch
                best_cosine = cosine

            # check early stopping
            if stopper.check(cosine):
                print("Early Stopping at Epoch = ", epoch)
                break

    # load best model & test & save
    print("loading model from epoch {}".format(best_epoch))
    torch.save(
        best_model,
        os.path.join(model_dir, data_name,
                     "{}_autoencoder_{}.pt".format(note, block)))
    model.load_state_dict(best_model)
    cosine, y_pred = evaluate(model,
                              testing,
                              device=device,
                              loss_function=loss_function)
    print("testing cosine:", cosine)

    # config filename
    if history is None:
        filename = os.path.join(result_dir, f"{data_name}_dl_baseline.json")
        prediction_filename = os.path.join(
            predict_dir, "bookcorpus",
            f"block{block}_autoencoder_{note.replace(' ', '')}.h5")
    else:
        filename = os.path.join(result_dir,
                                f"history_exp_{data_name}_dl_baseline.json")
        prediction_filename = os.path.join(
            predict_dir, "bookcorpus",
            f"history_block{block}_autoencoder_{note.replace(' ', '')}.h5")

    print_tfidf_metric(
        {
            "cosine": float(cosine),
            "block": block,
            "model": "autoencoder",
            "note": "clean - autoencoder - tfidf - deep - {}".format(note)
        },
        filename=filename)

    save_prediction(prediction_filename, y_pred)
コード例 #16
0
def main(options):

    if options.num_classes == 3:
        TRAINING_PATH = '/media/ailab/Backup Plus/ADNI/data/train_3classes.txt'
    else:
        TRAINING_PATH = '/media/ailab/Backup Plus/ADNI/data/train_3classes.txt'
    IMG_PATH = '/media/ailab/Backup Plus/ADNI/data/image'

    dset_train = AD_3DRandomPatch(IMG_PATH, TRAINING_PATH)

    train_loader = DataLoader(dset_train,
                              batch_size=options.batch_size,
                              shuffle=True,
                              num_workers=0,
                              drop_last=True)
    sparsity = 0.05
    beta = 0.5

    mean_square_loss = nn.MSELoss()
    #kl_div_loss = nn.KLDivLoss(reduce=False)

    use_gpu = len(options.gpuid) >= 1
    autoencoder = AutoEncoder()

    if (use_gpu):
        autoencoder = autoencoder.cuda()
    else:
        autoencoder = autoencoder.cpu()

    #autoencoder.load_state_dict(torch.load("./autoencoder_pretrained_model19"))

    optimizer = torch.optim.Adam(autoencoder.parameters(),
                                 lr=options.learning_rate,
                                 weight_decay=options.weight_decay)

    last_train_loss = 1e-4
    f = open("autoencoder_loss", 'a')
    for epoch in range(options.epochs):
        train_loss = 0.
        print("At {0}-th epoch.".format(epoch))
        for i, patches in enumerate(train_loader):
            patch = patches['patch']
            for b, batch in enumerate(patch):
                batch = Variable(batch).cuda()
                #batch = out.view(-1, 343)
                output, s_ = autoencoder(batch)
                batch = batch.view(-1, 343)
                loss1 = mean_square_loss(output, batch)
                s = Variable(torch.ones(s_.shape) * sparsity).cuda()
                loss2 = (s * torch.log(s / (s_ + 1e-8)) + (1 - s) * torch.log(
                    (1 - s) / ((1 - s_ + 1e-8)))).sum() / options.batch_size
                #kl_div_loss(mean_activitaion, sparsity)
                loss = loss1 + beta * loss2
                train_loss += loss
                logging.info(
                    "batch {0} training loss is : {1:.5f}, {2:.5f}".format(
                        i * 1000 + b, loss1.data[0], loss2.data[0]))
                f.write("batch {0} training loss is : {1:.3f}\n".format(
                    i * 1000 + b, loss.data[0]))
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
        train_avg_loss = train_loss / (len(train_loader) * 1000)
        print(
            "Average training loss is {0:.5f} at the end of epoch {1}".format(
                train_avg_loss.data[0], epoch))
        if (abs(train_avg_loss.data[0] - last_train_loss) <=
                options.estop) or ((epoch + 1) % 20 == 0):
            torch.save(autoencoder.state_dict(),
                       open("autoencoder_pretrained_model" + str(epoch), 'wb'))
        last_train_loss = train_avg_loss.data[0]
    f.close()
コード例 #17
0
ファイル: run_cross.py プロジェクト: pa1511/REPDX
                    data = [
                        'TCA+', accuracy, precision, recall, f1_score, source,
                        target
                    ]
                    performance_data.append(data)
                except:
                    print("Error while running TCA+")
                    continue
                #============================================================
                try:
                    X_m1_fit, X_m2_fit, y_m1_fit, y_m2_fit = train_test_split(
                        X_train_join, y_train_join, test_size=0.8)
                    #============================================================
                    #REPD
                    #print("REPD")
                    autoencoder = AutoEncoder([48, 24], 0.01, 100, 50)
                    classifer = REPD(autoencoder)
                    classifer.fit(X_m1_fit, y_m1_fit)
                    y_p = classifer.predict(X_test)
                    accuracy, precision, recall, f1_score = calculate_results(
                        y_test, y_p)

                    #Store results
                    data = [
                        'REPD', accuracy, precision, recall, f1_score, source,
                        target
                    ]
                    performance_data.append(data)

                    #REPD_EX
                    #print("REPDX")
コード例 #18
0
def main(args):
    ## load datasets
    train_dataset = dataloader('dogs_cats', 'train')

    ## split train and validation
    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = 5000

    validation_idx = np.random.choice(indices, size=split, replace=False)
    train_idx = list(set(indices) - set(validation_idx))

    train_sampler = SubsetRandomSampler(train_idx)
    validation_sampler = SubsetRandomSampler(validation_idx)

    ## train and validation loader
    train_loader = torch.utils.data.DataLoader(
        train_dataset, 
        batch_size=args.batch_size, 
        sampler=train_sampler)
    valid_loader = torch.utils.data.DataLoader(
        train_dataset, 
        batch_size=args.batch_size, 
        sampler=validation_sampler)

    ## debug
    if args.debug:
        images, _ = next(iter(train_loader))
        grid = torchvision.utils.make_grid(images[:25], nrow=5)
        imshow(grid, 'train')

        images, _ = next(iter(valid_loader))
        grid = torchvision.utils.make_grid(images[:25], nrow=5)
        imshow(grid, 'valid')

    ## define model
    model = AutoEncoder()
    use_gpu = torch.cuda.is_available()
    if use_gpu:
        print('cuda is available!')
        model.cuda()
    
    ## loss and optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.SGD(
        model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-5)

    ## log 
    log_dir = 'logs'
    if not os.path.isdir('logs'):
        os.mkdir('logs')

    ## train and valid
    best_val = 5
    loss_list = []
    val_loss_list = []
    for epoch in range(args.n_epochs):
        loss = train(model, criterion, optimizer, train_loader, use_gpu)
        val_loss = valid(model, criterion, valid_loader, use_gpu)

        print('epoch {:d}, loss: {:.4f} val_loss: {:.4f}'.format(epoch, loss, val_loss))

        if val_loss < best_val:
            print('val_loss improved from {:.5f} to {:.5f}!'.format(best_val, val_loss))
            best_val = val_loss
            model_file = 'epoch{:03d}-{:.3f}.pth'.format(epoch, val_loss)
            torch.save(model.state_dict(), os.path.join(log_dir, model_file))

        loss_list.append(loss)
        val_loss_list.append(val_loss)
コード例 #19
0
    print("accuracy %.6f" % metrics.accuracy_score(y_true, y_pred))
    print("Precision %.6f" % metrics.precision_score(y_true, y_pred))
    print("Recall %.6f" % metrics.recall_score(y_true, y_pred))
    print("f1_score %.6f" % metrics.f1_score(y_true, y_pred))
    fpr, tpr, threshold = metrics.roc_curve(y_true, y_scores)
    print("auc_socre %.6f" % metrics.auc(fpr, tpr))


epoch_num = 50

batch_size = 256

keep_pro = 0.9

loader = DataLoader()
model = AutoEncoder()

config = tf.ConfigProto()
config.gpu_options.allow_growth = True

with tf.Session(config=config) as sess:
    sess.run(tf.global_variables_initializer())
    print('begin training:')
    for epoch in range(epoch_num):
        loader.shuffle()
        for iter, indices in enumerate(range(0, loader.train_size,
                                             batch_size)):
            batch_X = loader.train_X[indices:indices + batch_size]
            loss, _ = sess.run([model.loss, model.train_op],
                               feed_dict={
                                   model.X: batch_X,
コード例 #20
0
                    default = 0,
                    type    = int,
                    help    = 'GPU device ids (CUDA_VISIBLE_DEVICES)')

global args
args = parser.parse_args()

'''set the training gpu''' 
os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpuid)

'''load_data'''
Load_data = load_data()
test_data,test_gt = Load_data.test()

'''init model'''
autoencoder = AutoEncoder()
autoencoder.load_state_dict(torch.load(args.load_weight_dir)) #load pre-train
autoencoder.cuda()
autoencoder.eval()

loss_func = nn.L1Loss() 
loss = 0

with torch.no_grad(): #it can save the memory,prevent it allocated,we dont need to keep the grad during the evualation
    for index in range(0,test_data.size()[0],50):
        x_in = torch.tensor(test_data[index:index+49,:,:,:], dtype=torch.float32).cuda() 
        decoded = autoencoder(x_in)
        loss = loss+loss_func(decoded, x_in)   # L1 loss 
        '''pick some sample to check vision performance'''
        plt.title('autoencoder input')
        plt.imshow(x_in[0,0,:,:].data.cpu().numpy(), cmap='gray')
コード例 #21
0
from dataset_wrapper import MnistWrapper, Cifar10Wrapper
from rbm import RBM, GibbsSampler
from autoencoder import AutoEncoder
from rbm_pretrain import RBMPretrainer
import cifar10_ae
import utils
import os

if __name__ == '__main__':

    sess = utils.create_session()
    K.set_session(sess)

    ae_folder = 'relu_deep_model1_relu_6_reprod'
    ae = AutoEncoder(Cifar10Wrapper.load_default(), cifar10_ae.encode,
                     cifar10_ae.decode, cifar10_ae.RELU_MAX, ae_folder)
    ae.build_models(ae_folder)  # load model
    # ae.test_models(utils.vis_cifar10)
    # encoded_dataset = ae.encode(Cifar10Wrapper)
    # encoded_dataset.plot_data_dist(os.path.join(ae_folder, 'data_dist.png'))

    # dataset = MnistWrapper.load_from_h5('test/mnist_ae_relu_6/encoded_mnist.h5')
    encoded_dataset = Cifar10Wrapper.load_from_h5(
        os.path.join(ae_folder, 'encoded_cifar10.h5'))

    encoded_dataset.reshape((1024, ))
    encoded_dataset.train_xs = encoded_dataset.train_xs / 6.0
    encoded_dataset.test_xs = encoded_dataset.test_xs / 6.0

    assert len(encoded_dataset.x_shape) == 1
    num_vis = encoded_dataset.x_shape[0]
コード例 #22
0
ファイル: train.py プロジェクト: Adamits/autoextendVN
# Chunk into batches of dims
print(all_embeddings.size()[0])
embed_batches = [all_embeddings[:, i:i + batch_size]\
    for i in range(0, all_embeddings.size()[1], batch_size)]

#embed_batch1 = all_embeddings[:, 260:280]
#embed_batch2 = all_embeddings[:, 280:300]
#embed_batches = [embed_batch1, embed_batch2]

for d, embeds in enumerate(embed_batches):
    dim_start = d * batch_size
    dim_end = (d+1) * batch_size
    
    print("Initializing AutoEncoder for dims %i to %i"\
          % (dim_start, dim_end))
    model = AutoEncoder(embeds, word2class, class2word, USE_CUDA)
    model = model.cuda() if USE_CUDA else model
    
    # Ignore any parameters with requires_grad = False
    # aka the pretrained embeddings
    params = filter(lambda x: x.requires_grad, model.parameters())
    optimizer = torch.optim.SGD(params, lr=lr)

    num_dims = model.embedding_dims
    num_words = model.num_words
    num_classes = model.num_classes
    last_loss = float("inf")

    for i in range(epochs):
        print("Epoch %i" % i)
コード例 #23
0

if __name__ == '__main__':
    #dataset processing:
    dir_name1 = "/Users/dean/python_stuff_mac/machine_learning/autoencoder/data/v2vassignment2"
    csv_file = "img314_77-128maxpool.csv"
    csv_file_small = "img314_27-45maxpool.csv"
    # model_file = os.path.join(dir_name1,"model_DSimg314_77-128maxpool_epoch550.pkl")
    #========Dataset stuff ==========
    shared = load_csv(csv_file, dir_name1)
    dataset = Dataset(shared, csv_file)
    dim = dataset.vector_size
    #========Trainer building========
    x = T.matrix('x')
    rng = np.random.RandomState(1234)
    auto = AutoEncoder(x, [dim, 500, dim], rng)
    # auto.save_params('test.pkl')
    # auto.load_set_params('model_DSimg314_27-45maxpool_epoch450.pkl')
    sgd = SGDAutoEncoder(auto, dataset)
    # sgd.model.cur_epoch = 450
    # for i in xrange(300):
    # 	sgd.plot_result((27,45),i)
    sgd.train_model(n_epochs=1500,
                    save_rate=10,
                    epoch_offset=0,
                    minibatch_size=10,
                    lr=0.001,
                    plot=True,
                    save=False)
    # print(auto.params)
    # print(auto.layers[0].n_out,auto.layers[1].n_out)
コード例 #24
0
ファイル: extract_ae.py プロジェクト: flucoma/FluidCorpusMap
def get_stats(ftr):
    return (np.mean(ftr, 0), np.std(ftr, 0), np.min(ftr, 0), np.max(ftr, 0))


def get_learnt_features(spectrogram):
    v = Variable(torch.from_numpy(spectrogram.astype(np.float32)))
    ftr = ae.get_hidden(v).detach().numpy()
    ftr_dif = np.diff(ftr, 1, 0)
    f1, f2, f3, f4 = get_stats(ftr)
    f5, f6, f7, f8 = get_stats(ftr_dif)
    summary = np.concatenate((f1, f2, f3, f4, f5, f6, f7, f8))
    return summary


dsFile = sys.argv[1]
outFile = sys.argv[2]

eps = np.spacing(1)
batch_size = 400
num_iterations = 10
spectrograms = []
input_matrix = np.empty((0, 513))
output_matrix = np.empty((0, 104))
ae = AutoEncoder(513, 13)
extract_spectrograms(dsFile)
train_ae()
for s in spectrograms:
    x = get_learnt_features(s)
    output_matrix = np.append(output_matrix, [x], axis=0)
np.savetxt(outFile, output_matrix)
コード例 #25
0
def feature_test_mnist(verbose=True):
    print("... loading date")
    # load train data
    mnist = fetch_mldata('MNIST original')
    X_origin = mnist.data
    y = mnist.target
    target_names = np.unique(y)
    # standardize
    X_origin = X_origin.astype(np.float64)
    X_origin /= X_origin.max()
    print("--- done")

    print("... encoding with denoising auto-encoder")
    # get feature & create input
    ae = AutoEncoder(X=X_origin,
                     hidden_size=22 * 22,
                     activation_function=T.nnet.sigmoid,
                     output_function=T.nnet.sigmoid)
    ae.train(n_epochs=5, mini_batch_size=20)
    X = ae.get_hidden(data=X_origin)[0]
    print("--- done")

    # get classifier
    clf = nn.NN(ni=X.shape[1],
                nh=int(0.16 * X.shape[1]),
                no=len(target_names),
                learning_rate=0.3,
                inertia_rate=0.12,
                corruption_level=0.0,
                epochs=150000)

    # cross validation
    skf = StratifiedKFold(y, n_folds=3)
    scores = np.zeros(len(skf))
    for i, (train_index, test_index) in enumerate(skf):
        # train the model
        clf.fit(X[train_index], y[train_index])
        # get score
        score = clf.score(X[test_index], y[test_index])
        scores[i] = score

    # stdout of the score
    if verbose is True:
        print(scores)

    print("... plotting the autoencoder hidden layer")
    # get tiled image
    p = np.random.randint(0, len(X), 400)
    tile = tile_raster_images(X[p], (22, 22), (20, 20),
                              scale_rows_to_unit_interval=True,
                              output_pixel_vals=True,
                              tile_spacing=(1, 1))
    # save tiled data's image
    plt.axis('off')
    plt.title('MNIST dataset')
    plt.imshow(tile, cmap=plt.cm.gray_r)
    plt.savefig('../output/tiled_autoencoder_hidden_mnist.png')
    print("--- done")

    print("... saving the results")
    data = {
        'scores': scores,
        'hidden layer': X,
    }
    with gzip.open('../output/feature_test_mnist.pkl.gz', 'wb') as f:
        cPickle.dump(data, f)
    print("--- done")
コード例 #26
0
def achieve_reduced_features_data(input_file, epochs=1):
    start_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
    st = time.time()
    print('It starts at ', start_time)
    # ----be careful with the ' ' in items
    # all_features= "Flow ID, Source IP, Source Port, Destination IP, Destination Port, Protocol, Timestamp, Flow Duration," \
    #                " Total Fwd Packets, Total Backward Packets,Total Length of Fwd Packets, Total Length of Bwd Packets," \
    #                " Fwd Packet Length Max, Fwd Packet Length Min, Fwd Packet Length Mean, Fwd Packet Length Std," \
    #                "Bwd Packet Length Max, Bwd Packet Length Min, Bwd Packet Length Mean, Bwd Packet Length Std,Flow Bytes/s," \
    #                " Flow Packets/s, Flow IAT Mean, Flow IAT Std, Flow IAT Max, Flow IAT Min,Fwd IAT Total, Fwd IAT Mean," \
    #                " Fwd IAT Std, Fwd IAT Max, Fwd IAT Min,Bwd IAT Total, Bwd IAT Mean, Bwd IAT Std, Bwd IAT Max, Bwd IAT Min," \
    #                "Fwd PSH Flags, Bwd PSH Flags, Fwd URG Flags, Bwd URG Flags, Fwd Header Length, Bwd Header Length," \
    #                "Fwd Packets/s, Bwd Packets/s, Min Packet Length, Max Packet Length, Packet Length Mean, Packet Length Std," \
    #                " Packet Length Variance,FIN Flag Count, SYN Flag Count, RST Flag Count, PSH Flag Count, ACK Flag Count," \
    #                " URG Flag Count, CWE Flag Count, ECE Flag Count, Down/Up Ratio, Average Packet Size, Avg Fwd Segment Size," \
    #                " Avg Bwd Segment Size, Fwd Header Length,Fwd Avg Bytes/Bulk, Fwd Avg Packets/Bulk, Fwd Avg Bulk Rate," \
    #                " Bwd Avg Bytes/Bulk, Bwd Avg Packets/Bulk,Bwd Avg Bulk Rate,Subflow Fwd Packets, Subflow Fwd Bytes," \
    #                " Subflow Bwd Packets, Subflow Bwd Bytes,Init_Win_bytes_forward, Init_Win_bytes_backward, act_data_pkt_fwd," \
    #                " min_seg_size_forward,Active Mean, Active Std, Active Max, Active Min,Idle Mean, Idle Std, Idle Max, Idle Min," \
    #                " Label"
    selected_features = " Source Port, Destination Port, Protocol, Flow Duration," \
                        " Total Fwd Packets, Total Backward Packets,Total Length of Fwd Packets, Total Length of Bwd Packets," \
                        " Fwd Packet Length Max, Fwd Packet Length Min, Fwd Packet Length Mean, Fwd Packet Length Std," \
                        "Bwd Packet Length Max, Bwd Packet Length Min, Bwd Packet Length Mean, Bwd Packet Length Std,Flow Bytes/s," \
                        " Flow Packets/s, Flow IAT Mean, Flow IAT Std, Flow IAT Max, Flow IAT Min,Fwd IAT Total, Fwd IAT Mean," \
                        " Fwd IAT Std, Fwd IAT Max, Fwd IAT Min,Bwd IAT Total, Bwd IAT Mean, Bwd IAT Std, Bwd IAT Max, Bwd IAT Min," \
                        "Fwd PSH Flags, Bwd PSH Flags, Fwd URG Flags, Bwd URG Flags, Fwd Header Length, Bwd Header Length," \
                        "Fwd Packets/s, Bwd Packets/s, Min Packet Length, Max Packet Length, Packet Length Mean, Packet Length Std," \
                        " Packet Length Variance,FIN Flag Count, SYN Flag Count, RST Flag Count, PSH Flag Count, ACK Flag Count," \
                        " URG Flag Count, CWE Flag Count, ECE Flag Count, Down/Up Ratio, Average Packet Size, Avg Fwd Segment Size," \
                        " Avg Bwd Segment Size, Fwd Header Length,Fwd Avg Bytes/Bulk, Fwd Avg Packets/Bulk, Fwd Avg Bulk Rate," \
                        " Bwd Avg Bytes/Bulk, Bwd Avg Packets/Bulk,Bwd Avg Bulk Rate,Subflow Fwd Packets, Subflow Fwd Bytes," \
                        " Subflow Bwd Packets, Subflow Bwd Bytes,Init_Win_bytes_forward, Init_Win_bytes_backward, act_data_pkt_fwd," \
                        " min_seg_size_forward,Active Mean, Active Std, Active Max, Active Min,Idle Mean, Idle Std, Idle Max, Idle Min"

    # input_file = '../original_data_no_sample/Wednesday-workingHours.pcap_ISCX_demo.csv'
    output_file = '../original_data_no_sample/features_selected_Wednesday-workingHours.pcap_ISCX.csv'
    invalid_file = '../original_data_no_sample/invalid_data_Wednesday-workingHours.pcap_ISCX.csv'
    selected_features_list = selected_features.split(',')
    _, _, output_file = select_features_from_file(input_file, selected_features_list, output_file, invalid_file)
    X, Y = load_data_from_file(output_file)
    new_X = normalize_data(X, axis=0, low=-1, high=1, eps=1e-5)
    new_Y = change_labels(Y, labels=[1, 0])  # 'BENIGN=1, others=0'
    output_file = '../original_data_no_sample/features_selected_Normalized_Wednesday-workingHours.pcap_ISCX.csv'
    save_data_in_autoencoder(new_X, new_Y, output_file)

    model = AutoEncoder(new_X, new_Y, epochs)
    # 1. train proposed_algorithms
    model.train()
    # torch.save(proposed_algorithms.state_dict(), './sim_autoencoder.pth')

    # 2. encoding input_data and save the encoding input_data
    reduced_output_file = '../original_data_no_sample/features_selected_Normalized_Reduced_data_Wednesday-workingHours.pcap_ISCX.csv'
    reduced_features_data = model.encoder(torch.Tensor(new_X))
    reduced_features_data = normalize_data(reduced_features_data.tolist(), axis=0, low=0, high=1, eps=1e-5)
    save_data_in_autoencoder(reduced_features_data, new_Y, reduced_output_file)

    end_time = time.strftime('%Y-%h-%d %H:%M:%S', time.localtime())
    print('It ends at ', end_time)
    print('All takes %.4f s', time.time() - st)

    return reduced_output_file
コード例 #27
0
ファイル: runoncsv.py プロジェクト: wmcars/ML
        accuracy.append(acc)
    average = np.mean(accuracy)
    std = np.std(accuracy)
    ret_acc = []
    for i in range(len(test_y)-1):
        if test_y[i] != 0:
            acc = 100 - (np.abs(predicted_data[i] - test_y[i]))/test_y[i] * 100
            ret_acc.append(acc)
    ret_avg = np.mean(ret_acc)
    ret_std = np.std(ret_acc)
    pd.DataFrame(np.reshape(ret_acc, (len(ret_acc, )))).to_csv(return_acc)
    prediction = np.exp(model.predict(np.reshape(test_data[-2], (1, 20))))*price[-2]
    print(prediction)

    return dataset, average, std



# if __name__ == "__main__":
preprocess = PreProcessing(0.8, 0.25,"stock_data.csv","preprocessing/rbm_train.csv","preprocessing/rbm_test.csv","preprocessing/log_train.csv")
preprocess.make_wavelet_train()
preprocess.make_test_data()

# if __name__ == "__main__":
autoencoder = AutoEncoder(20,True,"preprocessing/rbm_train.csv","preprocessing/rbm_test.csv","features/autoencoded_data.csv","preprocessing/log_train.csv")
autoencoder.build_train_model(55, 40, 30, 30, 40)

# if __name__ == "__main__":
dataset, average, std = nnmodel(500, 0.05, 0.01,"features/autoencoded_data.csv","60_return_forex/encoded_return_test_data.csv","preprocessing/log_train.csv","forex_y/log_test_y.csv","forex_y/test_price.csv","60_return_forex/predicted_price.csv","60_return_forex/price.csv","60_return_forex/ret_acc.csv")
print(f"Price Accuracy Average = {average} \nPrice Accuracy Standard Deviation = {std}")
コード例 #28
0
    print("loading word embeddings ... ", end='')
    sys.stdout.flush()
    vecs = json.load(open(args.vectors, 'r'))
    print("done")

    vec_dim = -1
    for w, vec in vecs.items():
        if vec is not None:
            vec_dim = len(vec)
            break
    if vec_dim is None:
        raise RuntimeError("couln'\t set embeddings dimensionality")

    X, Y = {}, {}

    feat_extractor = AutoEncoder(exp=args.exp, weights_path=args.weights_path)
    print("loading features ... ", end='')
    sys.stdout.flush()
    for set_ in [k for k in anno.keys() if k != "tags"]:
        imid_list = sorted(list(anno[set_].keys()))  # only for reproducibility
        X[set_] = None
        Y[set_] = [None for _ in range(len(imid_list))]

        if args.exp == 1:
            for i, imid in tqdm(enumerate(imid_list), total=len(imid_list)):
                # set image features
                fname = splitext(anno[set_][imid]["file_name"])[0] + ".dat"
                x = load(join(args.features_path, set_, fname))
                x = normalize_rows(x.reshape(1, -1)).squeeze()
                x = feat_extractor.predict(kind="img", x=x)
                if i == 0:
コード例 #29
0
def main():
    if len(sys.argv) != 2 or sys.argv[1] not in {
            "autoencoder", "cluster", "lifetime"
    }:
        print("USAGE: python main.py <Model Type>")
        print("<Model Type>: [autoencoder/cluster/lifetime]")
        return
    '''
    Read numpy array type data which has n_events x 2700 (time slices) x 4 (number of channels) and desolves it into (1) pulse data, n_pulses x 2700 (time slices), 
    (2) label, n_pulses dimension which represents how many channels detect signal per events, (3) event index, n_pulses dimension which represents where desolved pulses come from,
    (4) channel index, n_pulses dimension which represents which channels(scintillator ind.) desolved pulses come from. 
    Without specification, the number of test dataset is 0.01 of that of training dataset. 
    Depending on modes (autoencoder, cluster and lifetime), the main function runs autoencoder training, cluster training, lifetime calculation respectively, taken previously saved checkpoint.
    '''
    #pulse_data, label, test_data, test_label, train_evt_ind, test_evt_ind, train_ch_ind, test_ch_ind = preprocess.get_data("../testData11_14bit_100mV.npy")
    pulse_data1, label1, test_data1, test_label1, train_evt_ind1, test_evt_ind1, train_ch_ind1, test_ch_ind1 = preprocess.get_data(
        filename="../DL_additional_data/muon_data_deep_learning_0_1.npy",
        make_delta_t=False)
    print('first data loaded')
    pulse_data2, label2, test_data2, test_label2, train_evt_ind2, test_evt_ind2, train_ch_ind2, test_ch_ind2 = preprocess.get_data(
        filename="../DL_additional_data/muon_data_deep_learning_0_2.npy",
        make_delta_t=False)
    print('second data loaded')
    pulse_data3, label3, test_data3, test_label3, train_evt_ind3, test_evt_ind3, train_ch_ind3, test_ch_ind3 = preprocess.get_data(
        filename="../DL_additional_data/muon_data_deep_learning_1_1.npy",
        make_delta_t=False)
    print('third data loaded')
    pulse_data4, label4, test_data4, test_label4, train_evt_ind4, test_evt_ind4, train_ch_ind4, test_ch_ind4 = preprocess.get_data(
        filename="../DL_additional_data/muon_data_deep_learning_1_2.npy",
        make_delta_t=False)
    print('fourth data loaded')
    pulse_data5, label5, test_data5, test_label5, train_evt_ind5, test_evt_ind5, train_ch_ind5, test_ch_ind5 = preprocess.get_data(
        filename="../DL_additional_data/muon_data_deep_learning_2_1.npy",
        make_delta_t=False)
    print('data loading finished')

    pulse_data = np.concatenate(
        [pulse_data1, pulse_data2, pulse_data3, pulse_data4, pulse_data5])
    del pulse_data1, pulse_data2, pulse_data3, pulse_data4, pulse_data5

    label = np.concatenate([label1, label2, label3, label4, label5])
    del label1, label2, label3, label4, label5

    test_data = np.concatenate(
        [test_data1, test_data2, test_data3, test_data4, test_data5])
    del test_data1, test_data2, test_data3, test_data4, test_data5

    test_label = np.concatenate(
        [test_label1, test_label2, test_label3, test_label4, test_label5])
    del test_label1, test_label2, test_label3, test_label4, test_label5

    train_evt_ind = np.concatenate([
        train_evt_ind1, train_evt_ind2, train_evt_ind3, train_evt_ind4,
        train_evt_ind5
    ])
    del train_evt_ind1, train_evt_ind2, train_evt_ind3, train_evt_ind4, train_evt_ind5

    #test_evt_ind = np.concat([test_evt_ind1, test_evt_ind2, test_evt_ind3, test_evt_ind4, test_evt_ind5])
    del test_evt_ind1, test_evt_ind2, test_evt_ind3, test_evt_ind4, test_evt_ind5

    train_ch_ind = np.concatenate([
        train_ch_ind1, train_ch_ind2, train_ch_ind3, train_ch_ind4,
        train_ch_ind5
    ])
    del train_ch_ind1, train_ch_ind2, train_ch_ind3, train_ch_ind4, train_ch_ind5

    #test_ch_ind = np.concat([test_ch_ind1, test_ch_ind2, test_ch_ind3, test_ch_ind4, test_ch_ind5])
    del test_ch_ind1, test_ch_ind2, test_ch_ind3, test_ch_ind4, test_ch_ind5

    #delta_t_origin = preprocess.get_delta_t("../testData11_14bit_100mV.npz", train_evt_ind)
    train_delta_t1, test_delta_t1 = preprocess.get_delta_t2(
        "../DL_additional_data/muon_data_deep_learning_0_1.npz"
    )  #delta_t_origin[train_evt_ind, train_ch_ind]
    train_delta_t2, test_delta_t2 = preprocess.get_delta_t2(
        "../DL_additional_data/muon_data_deep_learning_0_2.npz")
    train_delta_t3, test_delta_t3 = preprocess.get_delta_t2(
        "../DL_additional_data/muon_data_deep_learning_1_1.npz")
    train_delta_t4, test_delta_t4 = preprocess.get_delta_t2(
        "../DL_additional_data/muon_data_deep_learning_1_2.npz")
    train_delta_t5, test_delta_t5 = preprocess.get_delta_t2(
        "../DL_additional_data/muon_data_deep_learning_2_1.npz")

    train_delta_t = np.concatenate([
        train_delta_t1, train_delta_t2, train_delta_t3, train_delta_t4,
        train_delta_t5
    ])
    del train_delta_t1, train_delta_t2, train_delta_t3, train_delta_t4, train_delta_t5

    #test_delta_t = np.concat([test_delta_t1, test_delta_t2, test_delta_t3, test_delta_t4, test_delta_t5])
    del test_delta_t1, test_delta_t2, test_delta_t3, test_delta_t4, test_delta_t5

    model = AutoEncoder()
    checkpoint_dir = './checkpoint'
    checkpoint = tf.train.Checkpoint(model=model)
    manager = tf.train.CheckpointManager(checkpoint,
                                         checkpoint_dir,
                                         max_to_keep=3)

    if sys.argv[1] == "autoencoder":
        start = time.time()

        num_epochs = 1
        curr_loss = 0
        epoch = 0
        for i in range(num_epochs):
            print(epoch + 1, 'th epoch:')
            tot_loss = train(model, pulse_data)
            curr_loss += tot_loss
            epoch += 1

        print("Test loss:", test(model, test_data))
        print("Process time : {} s".format(int(time.time() - start)))
        print("Saving Checkpoint...")
        manager.save()

        visualization.plot_1ch(
            test_data[7],
            tf.squeeze(model.call(tf.reshape(test_data[7],
                                             (1, 1300, 1)))).numpy())
        visualization.plot_1ch(
            test_data[33],
            tf.squeeze(model.call(tf.reshape(test_data[33],
                                             (1, 1300, 1)))).numpy())
        visualization.plot_1ch(
            test_data[46],
            tf.squeeze(model.call(tf.reshape(test_data[46],
                                             (1, 1300, 1)))).numpy())
        visualization.plot_1ch(
            test_data[25],
            tf.squeeze(model.call(tf.reshape(test_data[25],
                                             (1, 1300, 1)))).numpy())
        visualization.feature_v_proj(model.encoder, test_data, test_label)

    elif sys.argv[1] == "cluster":
        checkpoint.restore(manager.latest_checkpoint)
        visualization.feature_v_proj(model.encoder, test_data, test_label)

    model_cluster = clustering(model.encoder)
    checkpoint_dir_cluster = './checkpoint_cluster'
    checkpoint_cluster = tf.train.Checkpoint(model=model_cluster)
    manager_cluster = tf.train.CheckpointManager(checkpoint_cluster,
                                                 checkpoint_dir_cluster,
                                                 max_to_keep=3)

    if sys.argv[1] == "cluster":
        #checkpoint_cluster.restore(manager_cluster.latest_checkpoint)

        kmeans = KMeans(n_clusters=3,
                        init='k-means++',
                        n_init=20,
                        max_iter=400)
        cluster_pred = kmeans.fit_predict(
            model.encoder(
                tf.reshape(pulse_data[:min(len(pulse_data), 10000)],
                           (-1, 1300, 1))))
        model_cluster.cluster.set_weights([kmeans.cluster_centers_])

        num_iter = 30
        cnt_iter = 0

        p = None

        for i in range(num_iter):
            print(cnt_iter + 1, 'th iteration:')
            tot_loss, p = train_cluster(model_cluster, model, pulse_data,
                                        cnt_iter, p, train_ch_ind,
                                        train_delta_t)
            cnt_iter += 1
            prbs = model_cluster.call(
                tf.cast(tf.reshape(pulse_data[:10000], (-1, 1300, 1)),
                        dtype=tf.float32))
            ind = tf.argmax(prbs, axis=1)
            visualization.feature_v_proj(model.encoder, pulse_data[:10000],
                                         ind)

            num_bkgcluster = tf.reduce_sum(
                tf.cast(tf.logical_and(tf.not_equal(train_ch_ind[:10000], 0),
                                       tf.not_equal(ind, 0)),
                        dtype=tf.float32))
            num_bkg = tf.reduce_sum(
                tf.cast(tf.not_equal(train_ch_ind[:10000], 0),
                        dtype=tf.float32))

            print('%dth epochs, \tAccuracy: %f' %
                  (cnt_iter + 1,
                   tf.cast(num_bkgcluster / num_bkg, dtype=tf.float32)))
            if cnt_iter % 10 == 0 and cnt_iter != 0:
                print("Saving Checkpoint...")
                manager_cluster.save()

        visualization.feature_v_proj(model.encoder, test_data, test_label)
        manager.save()

    elif sys.argv[1] == "lifetime":
        checkpoint.restore(manager.latest_checkpoint)
        checkpoint_cluster.restore(manager_cluster.latest_checkpoint)
        lifetime_calc(model_cluster, model.encoder, pulse_data, train_delta_t,
                      train_evt_ind, train_ch_ind)
コード例 #30
0
parser.add_argument("--learning_rate", type=float, default=1e-1)
parser.add_argument("--reg", type=float, default=0)

# Model
parser.add_argument("--window_size", type=int)
parser.add_argument("--hidden_sizes", type=int, nargs="+")
parser.add_argument("--latent_dim", type=int)

args = parser.parse_args()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

input_dim, tensors = load_and_build_tensors(args.data, args, device)

auto_encoder = AutoEncoder(input_dim=input_dim,
                           hidden_sizes=args.hidden_sizes,
                           latent_dim=args.latent_dim).to(device)
print(auto_encoder)

exp_folder = os.path.join(REPO_DIR,
                          "experiments/baseline-" + str(datetime.now()))
os.mkdir(exp_folder)
with open(os.path.join(exp_folder, "args.txt"), "w") as fp:
    fp.write(json.dumps(vars(args)))

model = auto_encoder
X_train = tensors['X_train']
y_train = tensors['y_train']
X_dev = tensors['X_dev']
y_dev = tensors['y_dev']
iter_max = args.iter_max