Esempio n. 1
0
    def test(self, checkpoint_path):
        self.session = tf.Session()
        with self.session as sess:
            # 参数初始化
            sess.run(tf.global_variables_initializer())
            # 加载模型
            self.saver.restore(sess, checkpoint_path)
            print('Restored from: {}'.format(checkpoint_path))
            # test
            new_state = sess.run(self.initial_state)
            #pre_output = np.zeros(shape=(self.lstm_size, 1), dtype=tf.float32)
            # for x, y in batch_generator:
            arrX = batch_generator("sz_maxmin_a_test.xlsx", self.batch_size,
                                   self.seq_len)

            predictions = []
            for i in range(len(arrX)):
                # 逐条取测试数据
                x = arrX[i:i + self.batch_size, :, :-1]
                y = arrX[i:i + self.batch_size, -1:, -1:]
                # print(x)
                # print(y)
                feed = {
                    self.inputs: x,
                    self.keep_prob: 1,
                    self.initial_state: new_state
                }
                preds, output, new_state = sess.run(
                    [self.prediction, self.lstm_outputs, self.final_state],
                    feed_dict=feed)

                print(y, preds)
                predictions.append(preds)

        out_excel(predictions, "predictions.xlsx")
Esempio n. 2
0
    def train(self, max_steps, save_path, save_every_n, log_every_n):
        self.session = tf.Session()
        with self.session as sess:
            sess.run(tf.global_variables_initializer())
            # Train network
            step = 0
            new_state = sess.run(self.initial_state)
            print(123)
            # for x, y in batch_generator:
            arrX = batch_generator("sz_maxmin_a_train.xlsx", self.batch_size,
                                   self.seq_len)
            total_loss = []
            while True:
                step += 1
                n = random.randint(0, len(arrX) - self.batch_size)
                x = arrX[n:n + self.batch_size, :, :-1]
                y = arrX[n:n + self.batch_size, -1:, -1:]

                start = time.time()
                feed = {
                    self.inputs: x,
                    self.targets: y,
                    self.keep_prob: self.train_keep_prob,
                    self.initial_state: new_state
                }
                batch_loss, new_state, _ = sess.run(
                    [self.loss, self.final_state, self.optimizer],
                    feed_dict=feed)

                end = time.time()
                # control the print lines
                if step % log_every_n == 0:
                    print('step: {}/{}... '.format(step, max_steps),
                          'loss: {:.4f}... '.format(batch_loss),
                          '{:.4f} sec/batch'.format((end - start)))
                    # 存一下loss
                    total_loss.append(batch_loss)
                if (step % save_every_n == 0):
                    self.saver.save(sess,
                                    os.path.join(save_path, 'model'),
                                    global_step=step)
                if step >= max_steps:
                    break
            self.saver.save(sess,
                            os.path.join(save_path, 'model'),
                            global_step=step)

        out_excel(total_loss, "total_loss.xlsx")
Esempio n. 3
0
args = parser.parse_args()

torch.manual_seed(args.seed)
np.random.seed(args.seed)

assert torch.cuda.is_available()
device = torch.device('cuda')
torch.set_default_tensor_type('torch.cuda.FloatTensor')

# create data
train_dataset = data_.load_dataset(args.dataset_name, split='train')
train_loader = data.DataLoader(train_dataset,
                               batch_size=args.train_batch_size,
                               shuffle=True,
                               drop_last=True)
train_generator = data_.batch_generator(train_loader)
test_batch = next(iter(train_loader)).to(device)

# validation set
val_dataset = data_.load_dataset(args.dataset_name,
                                 split='val',
                                 frac=args.val_frac)
val_loader = data.DataLoader(dataset=val_dataset,
                             batch_size=args.val_batch_size,
                             shuffle=True,
                             drop_last=True)

# test set
test_dataset = data_.load_dataset(args.dataset_name, split='test')
test_loader = data.DataLoader(dataset=test_dataset,
                              batch_size=args.val_batch_size,
Esempio n. 4
0
def train(test_subject, parameters):
    # Load parameters
    learning_rate = parameters['learning_rate']
    metrics = parameters['metrics']
    batch_size = parameters['batch_size']
    nb_epoch = parameters['nb_epoch']
    saved_weights_file = (parameters['saved_weights_file_path'] +
                            '_{}.h5'.format(test_subject))
    plot_folder = parameters['plots_folder']
    classes_file = parameters['classes_file']
    
    # Create any necessary folder
    if not os.path.exists(plot_folder):
        os.makedirs(plot_folder)
    
    # Load the network
    model = two_stream_network(parameters)
    
    # Load the optimizer and compile the model
    adam = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999,
                epsilon=1e-08, decay=0.0005)
    model.compile(optimizer=adam, loss='categorical_crossentropy',
                  metrics=metrics[1:])
    
    # Load the dataset
    training_set, validation_set = load_train_val_image_dataset(
                                                parameters, test_subject
                                    )
    nb_inputs_train = training_set['inputs_per_video'][-1]
    nb_batches_train = nb_inputs_train // batch_size
    if nb_inputs_train % batch_size > 0:
        nb_batches_train += 1
    nb_inputs_val = validation_set['inputs_per_video'][-1]
    nb_batches_val = nb_inputs_val // batch_size
    if nb_inputs_val % batch_size > 0:
        nb_batches_val += 1
    
    # Train the model (validate with validation set in each epoch) 
    best_loss, best_epoch = 1e8, 0
    losses = {'train': [], 'val': []}
    accuracies = {'train': [], 'val': []}
    for e in range(nb_epoch):
        next_batch_train = batch_generator('train', parameters, training_set)
        next_batch_val = batch_generator('val', parameters, validation_set)
        train_acc, train_loss = 0, 0
        # Training
        train_time = time.time()
        for b in range(nb_batches_train):
            image, ofstack, label = next_batch_train.next()
            loss, accuracy = model.train_on_batch([image, ofstack], label)
            train_acc += accuracy
            train_loss += loss
        
        losses['train'].append(float(train_loss)/float(nb_batches_train))
        accuracies['train'].append(float(train_acc)/float(nb_batches_train)) 
        train_time = time.time() - train_time
        
        preds, gt = np.zeros((nb_inputs_val)), np.zeros((nb_inputs_val))
        val_loss = 0
        #Validation
        val_time = time.time()
        for b in range(nb_batches_val):
            image, ofstack, label = next_batch_val.next()
            pred = model.predict([image, ofstack], batch_size=batch_size)
            gt[b*batch_size:b*batch_size+label.shape[0]] = np.argmax(label,1)
            preds[b*batch_size:b*batch_size+pred.shape[0]] = np.argmax(pred,1)
            loss, _ = model.test_on_batch([image, ofstack], label)
            val_loss += loss
        
        val_acc = accuracy_score(gt, preds)
        losses['val'].append(float(val_loss)/float(nb_batches_val))
        accuracies['val'].append(val_acc) 
        val_f1 = f1_score(gt, preds, average='macro')
        val_time = time.time() - val_time
        
        print('Epoch {} - Train Loss: {}, Train Acc: {}, Train time: {} s |||'
              'Val Acc: {}, Val F1: {}, Val time: {} s'.format(
                      e, np.mean(losses['train']), 
                      np.mean(accuracies['train']),
                      train_time, val_acc, val_f1, val_time
                  )
              )
              
        # Plot training and validation loss and accuracy
        plot_training_info(
            test_subject, parameters, metrics, True, losses, accuracies
        )
        # Save weights of the model if a better loss is found
        if losses['val'] < best_loss:
            best_epoch = e
            best_loss = losses['val']
            model.save_weights(saved_weights_file)
    
    del training_set, validation_set
    gc.collect()
    
    print('Best validation loss: {} (epoch {})'.format(best_loss, best_epoch))

    # Load best model
    model.load_weights(saved_weights_file)
    print('Best weights loaded')

    # Load the test set
    test_set = load_test_image_dataset(parameters, test_subject)
    nb_inputs_test = test_set['inputs_per_video'][-1]
    nb_batches_test = nb_inputs_test // batch_size
    if nb_inputs_test % batch_size > 0:
        nb_batches_test += 1
        
    next_batch_test = batch_generator('test', parameters, test_set)
    preds, gt = np.zeros((nb_inputs_val)), np.zeros((nb_inputs_val))
    # Test
    test_time = time.time()
    for i in range(nb_batches_test):
        image, ofstack, label = next_batch_test.next()
        gt[b*batch_size:b*batch_size+label.shape[0]] = np.argmax(label,1)
        preds[b*batch_size:b*batch_size+pred.shape[0]] = np.argmax(pred,1)
        preds += np.argmax(pred,1)
    test_time = time.time() - test_time
    print('Time to complete the test: {} seconds'.format(test_time))
    cm = confusion_matrix(gt, preds)
    title = 'Normalized confusion matrix in test set ({} fold)'.format(
        test_subject
    )
    cm_path = '{}cm_{}.pdf'.format(plot_folder, test_subject)
    classes = get_classes(classes_file)
    # Save the confusion matrix
    plot_confusion_matrix(
        cm, classes, cm_path, normalize=True, title=title, cmap='coolwarm',
        font_size=5
    )    
    
    metrics = calculate_evaluation_metrics(gt, preds)
    print "Scikit metrics"
    print 'accuracy: ', metrics['acc']
    print 'precision:', metrics['precision']
    print 'recall:', metrics['recall']
    print 'f1:', metrics['f1'] 
def train(param, args):
    source = os.path.basename(args.source).split('.')[0]
    target = os.path.basename(args.target).split('.')[0]
    # setup model
    inp_shape = (param["inp_dims"], 1)
    embsz = param['embsz']
    inp, embedding = model.build_embedding(inp_shape, embsz)

    classifier = model.build_classifier_conv(param, embedding)
    discriminator = model.build_discriminator_conv(param, embedding)

    combined_classifier = model.build_combined_classifier(inp, classifier)
    combined_discriminator = model.build_combined_discriminator(inp, discriminator)
    combined_model = model.build_combined_model(inp, [classifier, discriminator])

    combined_classifier.compile(optimizer=optimizer.opt_classifier(param),
                                loss='categorical_crossentropy',
                                metrics=['accuracy'])
    combined_discriminator.compile(optimizer=optimizer.opt_discriminator(param),
                                   loss='binary_crossentropy',
                                   metrics=['accuracy'])

    loss_dict = {}
    loss_dict['class_act_last'] = 'categorical_crossentropy'
    loss_dict['dis_act_last'] = 'binary_crossentropy'

    loss_weight_dict = {}
    loss_weight_dict['class_act_last'] = param["class_loss_weight"],
    loss_weight_dict['dis_act_last'] = param["dis_loss_weight"]

    combined_model.compile(optimizer=optimizer.opt_combined(param),
                           loss=loss_dict,
                           loss_weights=loss_weight_dict,
                           metrics=['accuracy'])

    if args.plotModel:
        from keras.utils import plot_model
        plot_model(combined_model, to_file='multi_model_{}.png'.format(inp_shape[0]), dpi=200)
        sys.exit(1)

    # load the data
    Xs1, ys1 = param["source_data"], param["source_label"]
    Xt, yt = param["target_data"], param["target_label"]

    # Source domain is represented by label 0 and Target by 1
    ys_adv1 = np.array(([0.] * param["batch_size"]))
    yt_adv = np.array(([1.] * param["batch_size"]))

    y_advb_1 = np.array(([0] * param["batch_size"] + [1] * param["batch_size"]))  # For gradient reversal
    y_advb_2 = np.array(([1] * param["batch_size"] + [0] * param["batch_size"]))

    weight_class = np.array(([1] * param["batch_size"] + [0] * param["batch_size"]))
    weight_adv = np.ones((param["batch_size"] * 2,))

    S_1_batches = data.batch_generator([Xs1, ys1], param["batch_size"])
    T_batches = data.batch_generator([Xt, np.zeros(shape=(len(Xt),))], param["batch_size"])

    # start the training
    start = time.time()
    logs = []
    for i in range(param["num_iterations"]):
        Xsb1, ysb1 = next(S_1_batches)
        Xtb, ytb = next(T_batches)
        X_adv = np.concatenate([Xsb1, Xtb])
        y_class1 = np.concatenate([ysb1, np.zeros_like(ysb1)])

        # 'Epoch {}: train the classifier'.format(i)
        adv_weights = []
        for layer in combined_model.layers:
            if (layer.name.startswith("dis_")):
                adv_weights.append(layer.get_weights())
        stats1 = combined_model.train_on_batch(X_adv, [y_class1, y_advb_1], sample_weight=[weight_class, weight_adv])

        k = 0
        for layer in combined_model.layers:
            if (layer.name.startswith("dis_")):
                layer.set_weights(adv_weights[k])
                k += 1

        # 'Epoch {}: train the discriminator'.format(i)
        class_weights = []
        for layer in combined_model.layers:
            if (not layer.name.startswith("dis_")):
                class_weights.append(layer.get_weights())
        stats2 = combined_discriminator.train_on_batch(X_adv, y_advb_2)

        k = 0
        for layer in combined_model.layers:
            if (not layer.name.startswith("dis_")):
                layer.set_weights(class_weights[k])
                k += 1

        # show the intermediate results
        if ((i + 1) % param["test_interval"] == 0):
            ys1_pred = combined_classifier.predict(Xsb1)
            # yt_pred = combined_classifier.predict(Xt)
            ys1_adv_pred = combined_discriminator.predict(Xsb1)
            yt_adv_pred = combined_discriminator.predict(Xtb)

            source1_accuracy = accuracy_score(ysb1.argmax(1), ys1_pred.argmax(1))
            source_domain1_accuracy = accuracy_score(ys_adv1, np.argmax(ys1_adv_pred, axis=1))
            target_domain_accuracy = accuracy_score(yt_adv, np.argmax(yt_adv_pred, axis=1))

            log_str = ["iter: {:05d}:".format(i),
                       "LABEL CLASSIFICATION: source_1_acc: {:.5f}".format(source1_accuracy * 100),
                       "DOMAIN DISCRIMINATION: source_domain1_accuracy: {:.5f}, target_domain_accuracy: {:.5f} \n".format(source_domain1_accuracy * 100, target_domain_accuracy * 100)]
            log_str = '\n'.join(log_str)
            print(log_str + '\n')
            logs.append(log_str)

    last = time.time() - start
    tmpLine = 'total training time is: {:f} sec\n'.format(last)
    logs.append(tmpLine)
    contents = '\n'.join(logs)
    reportPath = os.path.join(ResDir, 'trainReport_oneClassifer_source_{}_target_{}.txt'.format(source, target))
    with open(reportPath, 'w') as f:
        f.write(contents)
    classifier_path = os.path.join(modelDir, "oneClassifier_source_{}_target_{}.h5".format(source, target))
    combined_classifier.save(classifier_path)

    return classifier_path, last
Esempio n. 6
0
        pairs_label = np.load('pairs_label.npy')

        print('pairs shape is:', pairs.shape)
        print('pairs label shape is:', pairs_label.shape)
        time.sleep(1)
        # np.save('pairs.npy',pairs)
        # np.save('pairs_label.npy',pairs_label)
        # steps_each_epoch = int(math.ceil(float(len(pairs)) / batch_size)) # 每一轮的步数
        for epoch in range(epoches):
            print('The next epoch is ....', epoch)
            time.sleep(1)
            # shuffle the pairs each epoch
            shuffle = np.random.permutation(pairs.shape[0])
            pairs = pairs[shuffle]
            pairs_label = pairs_label[shuffle]
            data_generator = batch_generator(pairs, pairs_label, batch_size)
            steps = 0
            # 这个for循环只是用来读取数据的。
            for ([
                    batch_x1, batch_x2
            ], y_true) in data_generator:  # get batch data from data generator
                x1 = batch_x1
                x2 = batch_x2
                y_true = y_true

                summary, _, losses, s = sess.run(
                    [merged, train_step, siam.loss, siam.similarity],
                    feed_dict={
                        siam.x1: x1,
                        siam.x2: x2,
                        siam.y_true: y_true,
Esempio n. 7
0
def run(seed):

    assert torch.cuda.is_available()
    device = torch.device('cuda')
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

    np.random.seed(seed)
    torch.manual_seed(seed)

    # Create training data.
    data_transform = tvtransforms.Compose(
        [tvtransforms.ToTensor(),
         tvtransforms.Lambda(torch.bernoulli)])

    if args.dataset_name == 'mnist':
        dataset = datasets.MNIST(root=os.path.join(utils.get_data_root(),
                                                   'mnist'),
                                 train=True,
                                 download=True,
                                 transform=data_transform)
        test_dataset = datasets.MNIST(root=os.path.join(
            utils.get_data_root(), 'mnist'),
                                      train=False,
                                      download=True,
                                      transform=data_transform)
    elif args.dataset_name == 'fashion-mnist':
        dataset = datasets.FashionMNIST(root=os.path.join(
            utils.get_data_root(), 'fashion-mnist'),
                                        train=True,
                                        download=True,
                                        transform=data_transform)
        test_dataset = datasets.FashionMNIST(root=os.path.join(
            utils.get_data_root(), 'fashion-mnist'),
                                             train=False,
                                             download=True,
                                             transform=data_transform)
    elif args.dataset_name == 'omniglot':
        dataset = data_.OmniglotDataset(split='train',
                                        transform=data_transform)
        test_dataset = data_.OmniglotDataset(split='test',
                                             transform=data_transform)
    elif args.dataset_name == 'emnist':
        rotate = partial(tvF.rotate, angle=-90)
        hflip = tvF.hflip
        data_transform = tvtransforms.Compose([
            tvtransforms.Lambda(rotate),
            tvtransforms.Lambda(hflip),
            tvtransforms.ToTensor(),
            tvtransforms.Lambda(torch.bernoulli)
        ])
        dataset = datasets.EMNIST(root=os.path.join(utils.get_data_root(),
                                                    'emnist'),
                                  split='letters',
                                  train=True,
                                  transform=data_transform,
                                  download=True)
        test_dataset = datasets.EMNIST(root=os.path.join(
            utils.get_data_root(), 'emnist'),
                                       split='letters',
                                       train=False,
                                       transform=data_transform,
                                       download=True)
    else:
        raise ValueError

    if args.dataset_name == 'omniglot':
        split = -1345
    elif args.dataset_name == 'emnist':
        split = -20000
    else:
        split = -10000
    indices = np.arange(len(dataset))
    np.random.shuffle(indices)
    train_indices, val_indices = indices[:split], indices[split:]
    train_sampler = SubsetRandomSampler(train_indices)
    val_sampler = SubsetRandomSampler(val_indices)
    train_loader = data.DataLoader(
        dataset=dataset,
        batch_size=args.batch_size,
        sampler=train_sampler,
        num_workers=4 if args.dataset_name == 'emnist' else 0)
    train_generator = data_.batch_generator(train_loader)
    val_loader = data.DataLoader(dataset=dataset,
                                 batch_size=1024,
                                 sampler=val_sampler,
                                 shuffle=False,
                                 drop_last=False)
    val_batch = next(iter(val_loader))[0]
    test_loader = data.DataLoader(
        test_dataset,
        batch_size=16,
        shuffle=False,
        drop_last=False,
    )

    # from matplotlib import pyplot as plt
    # from experiments import cutils
    # from torchvision.utils import make_grid
    # fig, ax = plt.subplots(1, 1, figsize=(5, 5))
    # cutils.gridimshow(make_grid(val_batch[:64], nrow=8), ax)
    # plt.show()
    # quit()

    def create_linear_transform():
        if args.linear_type == 'lu':
            return transforms.CompositeTransform([
                transforms.RandomPermutation(args.latent_features),
                transforms.LULinear(args.latent_features, identity_init=True)
            ])
        elif args.linear_type == 'svd':
            return transforms.SVDLinear(args.latent_features,
                                        num_householder=4,
                                        identity_init=True)
        elif args.linear_type == 'perm':
            return transforms.RandomPermutation(args.latent_features)
        else:
            raise ValueError

    def create_base_transform(i, context_features=None):
        if args.prior_type == 'affine-coupling':
            return transforms.AffineCouplingTransform(
                mask=utils.create_alternating_binary_mask(
                    features=args.latent_features, even=(i % 2 == 0)),
                transform_net_create_fn=lambda in_features, out_features: nn_.
                ResidualNet(in_features=in_features,
                            out_features=out_features,
                            hidden_features=args.hidden_features,
                            context_features=context_features,
                            num_blocks=args.num_transform_blocks,
                            activation=F.relu,
                            dropout_probability=args.dropout_probability,
                            use_batch_norm=args.use_batch_norm))
        elif args.prior_type == 'rq-coupling':
            return transforms.PiecewiseRationalQuadraticCouplingTransform(
                mask=utils.create_alternating_binary_mask(
                    features=args.latent_features, even=(i % 2 == 0)),
                transform_net_create_fn=lambda in_features, out_features: nn_.
                ResidualNet(in_features=in_features,
                            out_features=out_features,
                            hidden_features=args.hidden_features,
                            context_features=context_features,
                            num_blocks=args.num_transform_blocks,
                            activation=F.relu,
                            dropout_probability=args.dropout_probability,
                            use_batch_norm=args.use_batch_norm),
                num_bins=args.num_bins,
                tails='linear',
                tail_bound=args.tail_bound,
                apply_unconditional_transform=args.
                apply_unconditional_transform,
            )
        elif args.prior_type == 'affine-autoregressive':
            return transforms.MaskedAffineAutoregressiveTransform(
                features=args.latent_features,
                hidden_features=args.hidden_features,
                context_features=context_features,
                num_blocks=args.num_transform_blocks,
                use_residual_blocks=True,
                random_mask=False,
                activation=F.relu,
                dropout_probability=args.dropout_probability,
                use_batch_norm=args.use_batch_norm)
        elif args.prior_type == 'rq-autoregressive':
            return transforms.MaskedPiecewiseRationalQuadraticAutoregressiveTransform(
                features=args.latent_features,
                hidden_features=args.hidden_features,
                context_features=context_features,
                num_bins=args.num_bins,
                tails='linear',
                tail_bound=args.tail_bound,
                num_blocks=args.num_transform_blocks,
                use_residual_blocks=True,
                random_mask=False,
                activation=F.relu,
                dropout_probability=args.dropout_probability,
                use_batch_norm=args.use_batch_norm)
        else:
            raise ValueError

    # ---------------
    # prior
    # ---------------
    def create_prior():
        if args.prior_type == 'standard-normal':
            prior = distributions_.StandardNormal((args.latent_features, ))

        else:
            distribution = distributions_.StandardNormal(
                (args.latent_features, ))
            transform = transforms.CompositeTransform([
                transforms.CompositeTransform(
                    [create_linear_transform(),
                     create_base_transform(i)])
                for i in range(args.num_flow_steps)
            ])
            transform = transforms.CompositeTransform(
                [transform, create_linear_transform()])
            prior = flows.Flow(transform, distribution)

        return prior

    # ---------------
    # inputs encoder
    # ---------------
    def create_inputs_encoder():
        if args.approximate_posterior_type == 'diagonal-normal':
            inputs_encoder = None
        else:
            inputs_encoder = nn_.ConvEncoder(
                context_features=args.context_features,
                channels_multiplier=16,
                dropout_probability=args.dropout_probability_encoder_decoder)
        return inputs_encoder

    # ---------------
    # approximate posterior
    # ---------------
    def create_approximate_posterior():
        if args.approximate_posterior_type == 'diagonal-normal':
            context_encoder = nn_.ConvEncoder(
                context_features=args.context_features,
                channels_multiplier=16,
                dropout_probability=args.dropout_probability_encoder_decoder)
            approximate_posterior = distributions_.ConditionalDiagonalNormal(
                shape=[args.latent_features], context_encoder=context_encoder)

        else:
            context_encoder = nn.Linear(args.context_features,
                                        2 * args.latent_features)
            distribution = distributions_.ConditionalDiagonalNormal(
                shape=[args.latent_features], context_encoder=context_encoder)

            transform = transforms.CompositeTransform([
                transforms.CompositeTransform([
                    create_linear_transform(),
                    create_base_transform(
                        i, context_features=args.context_features)
                ]) for i in range(args.num_flow_steps)
            ])
            transform = transforms.CompositeTransform(
                [transform, create_linear_transform()])
            approximate_posterior = flows.Flow(
                transforms.InverseTransform(transform), distribution)

        return approximate_posterior

    # ---------------
    # likelihood
    # ---------------
    def create_likelihood():
        latent_decoder = nn_.ConvDecoder(
            latent_features=args.latent_features,
            channels_multiplier=16,
            dropout_probability=args.dropout_probability_encoder_decoder)

        likelihood = distributions_.ConditionalIndependentBernoulli(
            shape=[1, 28, 28], context_encoder=latent_decoder)

        return likelihood

    prior = create_prior()
    approximate_posterior = create_approximate_posterior()
    likelihood = create_likelihood()
    inputs_encoder = create_inputs_encoder()

    model = vae.VariationalAutoencoder(
        prior=prior,
        approximate_posterior=approximate_posterior,
        likelihood=likelihood,
        inputs_encoder=inputs_encoder)

    # with torch.no_grad():
    #     # elbo = model.stochastic_elbo(val_batch[:16].to(device)).mean()
    #     # print(elbo)
    #     elbo = model.stochastic_elbo(val_batch[:16].to(device), num_samples=100).mean()
    #     print(elbo)
    #     log_prob = model.log_prob_lower_bound(val_batch[:16].to(device), num_samples=1200).mean()
    #     print(log_prob)
    # quit()

    n_params = utils.get_num_parameters(model)
    print('There are {} trainable parameters in this model.'.format(n_params))

    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(
        optimizer=optimizer, T_max=args.num_training_steps, eta_min=0)

    def get_kl_multiplier(step):
        if args.kl_multiplier_schedule == 'constant':
            return args.kl_multiplier_initial
        elif args.kl_multiplier_schedule == 'linear':
            multiplier = min(
                step / (args.num_training_steps * args.kl_warmup_fraction), 1.)
            return args.kl_multiplier_initial * (1. + multiplier)

    # create summary writer and write to log directory
    timestamp = cutils.get_timestamp()
    if cutils.on_cluster():
        timestamp += '||{}'.format(os.environ['SLURM_JOB_ID'])
    log_dir = os.path.join(cutils.get_log_root(), args.dataset_name, timestamp)
    while True:
        try:
            writer = SummaryWriter(log_dir=log_dir, max_queue=20)
            break
        except FileExistsError:
            sleep(5)
    filename = os.path.join(log_dir, 'config.json')
    with open(filename, 'w') as file:
        json.dump(vars(args), file)

    best_val_elbo = -np.inf
    tbar = tqdm(range(args.num_training_steps))
    for step in tbar:
        model.train()
        optimizer.zero_grad()
        scheduler.step(step)

        batch = next(train_generator)[0].to(device)
        elbo = model.stochastic_elbo(batch,
                                     kl_multiplier=get_kl_multiplier(step))
        loss = -torch.mean(elbo)
        loss.backward()
        optimizer.step()

        if (step + 1) % args.monitor_interval == 0:
            model.eval()
            with torch.no_grad():
                elbo = model.stochastic_elbo(val_batch.to(device))
                mean_val_elbo = elbo.mean()

            if mean_val_elbo > best_val_elbo:
                best_val_elbo = mean_val_elbo
                path = os.path.join(
                    cutils.get_checkpoint_root(),
                    '{}-best-val-{}.t'.format(args.dataset_name, timestamp))
                torch.save(model.state_dict(), path)

            writer.add_scalar(tag='val-elbo',
                              scalar_value=mean_val_elbo,
                              global_step=step)

            writer.add_scalar(tag='best-val-elbo',
                              scalar_value=best_val_elbo,
                              global_step=step)

            with torch.no_grad():
                samples = model.sample(64)
            fig, ax = plt.subplots(figsize=(10, 10))
            cutils.gridimshow(make_grid(samples.view(64, 1, 28, 28), nrow=8),
                              ax)
            writer.add_figure(tag='vae-samples', figure=fig, global_step=step)
            plt.close()

    # load best val model
    path = os.path.join(
        cutils.get_checkpoint_root(),
        '{}-best-val-{}.t'.format(args.dataset_name, timestamp))
    model.load_state_dict(torch.load(path))
    model.eval()

    np.random.seed(5)
    torch.manual_seed(5)

    # compute elbo on test set
    with torch.no_grad():
        elbo = torch.Tensor([])
        log_prob_lower_bound = torch.Tensor([])
        for batch in tqdm(test_loader):
            elbo_ = model.stochastic_elbo(batch[0].to(device))
            elbo = torch.cat([elbo, elbo_])
            log_prob_lower_bound_ = model.log_prob_lower_bound(
                batch[0].to(device), num_samples=1000)
            log_prob_lower_bound = torch.cat(
                [log_prob_lower_bound, log_prob_lower_bound_])
    path = os.path.join(
        log_dir, '{}-prior-{}-posterior-{}-elbo.npy'.format(
            args.dataset_name, args.prior_type,
            args.approximate_posterior_type))
    np.save(path, utils.tensor2numpy(elbo))
    path = os.path.join(
        log_dir, '{}-prior-{}-posterior-{}-log-prob-lower-bound.npy'.format(
            args.dataset_name, args.prior_type,
            args.approximate_posterior_type))
    np.save(path, utils.tensor2numpy(log_prob_lower_bound))

    # save elbo and log prob lower bound
    mean_elbo = elbo.mean()
    std_elbo = elbo.std()
    mean_log_prob_lower_bound = log_prob_lower_bound.mean()
    std_log_prob_lower_bound = log_prob_lower_bound.std()
    s = 'ELBO: {:.2f} +- {:.2f}, LOG PROB LOWER BOUND: {:.2f} +- {:.2f}'.format(
        mean_elbo.item(), 2 * std_elbo.item() / np.sqrt(len(test_dataset)),
        mean_log_prob_lower_bound.item(),
        2 * std_log_prob_lower_bound.item() / np.sqrt(len(test_dataset)))
    filename = os.path.join(log_dir, 'test-results.txt')
    with open(filename, 'w') as file:
        file.write(s)
Esempio n. 8
0
from model import SegNet
from data import batch_generator

net = SegNet()
net.fit_generator(batch_generator(), samples_per_epoch=13434, nb_epoch=5)
Esempio n. 9
0
test_images = data.flatten_images(test_images)
print(train_images.shape, val_images.shape)
with tf.Session() as sess:
    # Here is how you initialize weights of the model according to their
    # Initialization parameters.
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    val_feed = {input: val_images}
    true_val = np.argmax(val_labels, axis=1)

    val_res = sess.run([output], feed_dict=val_feed)
    val_acc = np.sum(true_val.dot(np.argmax(val_res[0], axis=1)))/np.sum(true_val)
    print("Validation accuracy:", val_acc)
    saver.save(sess, experiment)
    for i in range(num_epochs):
        batch_iter = data.batch_generator(train_images, train_labels, batch_size)
        for (images, labels) in batch_iter:
            feed = {input: images, onehot_labels: labels}
            train_loss, op = sess.run([loss, train_op], feed_dict=feed)
        print("Epoch:", i)
        feed = {input: val_images, onehot_labels: val_labels}
        val_res = sess.run([output], feed_dict=val_feed)
        val_acc = np.sum(true_val.dot(np.argmax(val_res[0], axis=1)))/np.sum(true_val)
        print("Validation accuracy:", val_acc)
        saver.save(sess, experiment)


    feed = {input: train_images}
    res = sess.run([output], feed_dict=feed)
    true_class = np.argmax(train_labels, axis=1)
    print("Test accuracy:", np.sum(true_class.dot(np.argmax(res[0], axis=1)))/np.sum(true_class))
Esempio n. 10
0
cont_data = scipy.io.loadmat(
    r'C:\Users\justjo\Downloads\public_datasets\SVHN.mat')
cont_data = np.moveaxis(cont_data['X'], 3, 0)
cont_data = np.reshape(cont_data, (cont_data.shape[0], -1)) / 128. - 1.
# cont_data = cont_data[np.random.choice(cont_data.shape[0],10000, False), :]
#################################################

train_dataset = torch.utils.data.TensorDataset(
    torch.from_numpy(
        dtrain[train_idx[:-int(args.val_frac * dtrain.shape[0])]]).float())
# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.train_batch_size, shuffle=True)
train_loader = data.DataLoader(train_dataset,
                               batch_size=args.train_batch_size,
                               shuffle=True,
                               drop_last=True)
train_generator = data_.batch_generator(train_loader)
test_batch = next(iter(train_loader))[0].to(device)

val_dataset = torch.utils.data.TensorDataset(
    torch.from_numpy(
        dtrain[train_idx[-int(args.val_frac * dtrain.shape[0]):]]).float())
# val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.val_batch_size, shuffle=True)
val_loader = data.DataLoader(dataset=val_dataset,
                             batch_size=args.val_batch_size,
                             shuffle=True,
                             drop_last=True)
val_generator = data_.batch_generator(val_loader)

test_dataset = torch.utils.data.TensorDataset(torch.from_numpy(dtest).float())
# test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.val_batch_size, shuffle=False)
test_loader = data.DataLoader(dataset=test_dataset,
        print("Model created. Compiling ...")

    # Regression network so we use the mean squared error != cross entropy -> classification network
    model.compile(loss=parameter['loss_function'], optimizer=parameter['optimizer'])

    print("Model compiled. Training ...")

    #history_object = model.fit(X_train, y_train,
                                # validation_split=0.2,
                                # shuffle=True,
                                # nb_epoch=parameter['epochs'],
                                # callbacks=[earlyStopping],
                                # verbose=1)

    history_object = model.fit_generator(batch_generator(image_paths_train,
                                                        steering_angles_train,
                                                        parameter, True),
                                        parameter['samples_per_epochs'],
                                        nb_epoch=parameter['epochs'],
                                        max_q_size=1,
                                        validation_data=batch_generator(image_paths_valid,
                                                                        steering_angles_valid,
                                                                        parameter, False),
                                        nb_val_samples=image_paths_train.shape[0],
                                        callbacks=[earlyStopping, tbCallBack],
                                        verbose=1)


    model.save(parameter['saved_model'])
    print("Model saved to file : " + parameter['saved_model'])
Esempio n. 12
0
def train(train_data, train_labels, mod, params=None):
    """
    """

    batch_size = 25
    lam = .234
    eta = 5.59
    num_iterations = 20
    a = 28.0
    b = 33.0
    A = 74.1
    gamma = 0.882
    t = 0.658
    s = 4.13

    # n = len(train_data[0])
    # print("n: %d" % n)

    #Save parameters
    if params is None:
        print("No params")
        params = np.random.normal(loc=0.0, scale=1.0, size=mod.count)
    else:
        print("Params provided")
        #2*np.random.random(mod.count)-1

        #2*math.pi*np.random.rand(mod.count)
    dim = len(params)
    print("Number of parameters in circuit: %d " % dim)

    v = np.zeros(params.shape)
    print("Number of training samples =", len(train_data))
    print("Batch size =", batch_size)
    for k in tqdm(range(num_iterations)):
        #Good choice for Delta is the Radechemar distribution acc 2*np.random.random(dim)-1to wiki
        delta = 2 * np.random.random(
            dim) - 1  #np.random.binomial(n=1, p=0.5, size=dim)
        alpha = a / (k + 1 + A)**s
        beta = b / (k + 1)**t
        # perturb = params + alpha*delta
        batch_iter = data.batch_generator(train_data, train_labels, batch_size)
        j = 0
        for (images, labels) in batch_iter:
            print("Epoch ", k, " batch ", j)
            j += 1
            perturb = params + alpha * delta
            # start = time.time()
            L1 = mod.get_loss(perturb, images, labels, lam, eta, len(images))
            perturb = params - alpha * delta
            L2 = mod.get_loss(perturb, images, labels, lam, eta, len(images))
            # end = time.time()
            # print("Time for update for a single batch =", end-start)
            g = (L1 - L2) / (2 * alpha)
            v = gamma * v - g * beta * delta
            params = params + v
            utils.save_params(params)
        # L1 = mod.get_loss(perturb, train_data, train_labels, lam, eta, batch_size)
        # perturb = params - alpha*delta
        # L2 = mod.get_loss(perturb, train_data, train_labels, lam, eta, batch_size)
        # g = (L1-L2)/(2*alpha)
        # v = gamma*v - g*beta*delta
        # params = params + v
        # utils.save_params(params)

    print(params)

    print("number of training circuit runs = ", mod.num_runs)
    return params, mod