Beispiel #1
0
                print('Training model...', flush=True)
                for (batch_i, batch) in islice(enumerate(train_loader.get_batches(batch_size, dev)), train_params['batches_per_epoch']):
                    train_stats['loss'][batch_i], train_stats['acc'][batch_i], train_stats['sat'][batch_i], train_stats['pred'][batch_i], train_stats['TP'][batch_i], train_stats['FP'][batch_i], train_stats['TN'][batch_i], train_stats['FN'][batch_i] = run_batch(sess, GNN, batch, batch_i, epoch_i, time_steps, train=True, verbose=True)
                #end
                summarize_epoch(epoch_i,train_stats['loss'],train_stats['acc'],train_stats['sat'],train_stats['pred'],train=True)

                print('Testing model...', flush=True)
                for (batch_i, batch) in islice(enumerate(test_loader.get_batches(batch_size, dev)), test_params['batches_per_epoch']):
                    test_stats['loss'][batch_i], test_stats['acc'][batch_i], test_stats['sat'][batch_i], test_stats['pred'][batch_i], test_stats['TP'][batch_i], test_stats['FP'][batch_i], test_stats['TN'][batch_i], test_stats['FN'][batch_i] = run_batch(sess, GNN, batch, batch_i, epoch_i, time_steps, train=False, verbose=True)
                #end
                summarize_epoch(epoch_i,test_stats['loss'],test_stats['acc'],test_stats['sat'],test_stats['pred'],train=False)

                # Save weights
                savepath = 'training/dev={dev}/checkpoints/epoch={epoch}'.format(dev=dev,epoch=int(round(100*np.ceil((epoch_i+1)/100))))
                os.makedirs(savepath, exist_ok=True)
                if save_checkpoints: save_weights(sess, savepath);

                logfile.write('{epoch_i} {trloss} {tracc} {trsat} {trpred} {trTP} {trFP} {trTN} {trFN} {tstloss} {tstacc} {tstsat} {tstpred} {tstTP} {tstFP} {tstTN} {tstFN}\n'.format(
                    
                    epoch_i = epoch_i,

                    trloss = np.mean(train_stats['loss']),
                    tracc = np.mean(train_stats['acc']),
                    trsat = np.mean(train_stats['sat']),
                    trpred = np.mean(train_stats['pred']),
                    trTP = np.mean(train_stats['TP']),
                    trFP = np.mean(train_stats['FP']),
                    trTN = np.mean(train_stats['TN']),
                    trFN = np.mean(train_stats['FN']),

                    tstloss = np.mean(test_stats['loss']),
Beispiel #2
0
                            n=n),
                    flush=True)

            #end

            M_C, M_G, is_given = create_batch(puzzles[0:1], 1)

            prediction = sess.run(GNN["prediction"],
                                  feed_dict={
                                      GNN["gnn"].matrix_placeholders["M_C"]:
                                      M_C,
                                      GNN["gnn"].matrix_placeholders["M_G"]:
                                      M_G,
                                      GNN["gnn"].time_steps: time_steps,
                                      GNN["is_given"]: is_given
                                  })

            print("Puzzle:\t\t{}".format(puzzles[0]))
            print("Prediction:\t{}".format(''.join(
                [str(x + 1) for x in np.argmax(prediction, axis=1)])))
            print("Diff:\t\t{}\n".format(''.join([
                str(x)
                for x in ((np.argmax(prediction, axis=1) +
                           1) != [int(x) for x in puzzles[0]]).astype(int)
            ])))

            save_weights(sess, "./sudoku-checkpoints")

        #end
    #end
#end
def main(args):
    data = DataLoader(pca=args.PCA, norm=args.norm)

    train_captions, train_feature, train_url, train_len = data.get_Training_data(
        args.training)
    test_captions, test_feature, test_url, test_len = data.get_val_data(
        args.testing)
    f, c, _ = data.eval_data()

    writer = SummaryWriter()

    encoder = Encoder(input_size=train_feature.shape[1],
                      hidden_size=args.hidden_size) \
        .to(device)

    decoder = Decoder(embed_size=args.embed_size,
                      hidden_size=args.hidden_size, attention_dim=args.attention_size,
                      vocab_size=len(data.word_to_idx)) \
        .to(device)

    if args.load_weight:
        load_weights(encoder, args.model_path + "Jul28_10-04-57encoder")
        load_weights(decoder, args.model_path + "Jul28_10-04-57decoder")

    for epoch in range(args.num_epochs):
        params = list(decoder.parameters()) + list(encoder.parameters())
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(params=params, lr=args.learning_rate)

        # if epoch >= 100:
        training_loss = step(encoder=encoder,
                             decoder=decoder,
                             criterion=criterion,
                             data=(train_captions, train_feature, train_len),
                             optimizer=optimizer)
        # if epoch + 1 % 5 == 0:
        #     a = evaluate(encoder, decoder, train_feature[0:2], train_captions[0:2], 5, data.word_to_idx)
        #     print("bleu4 ", a)

        with torch.no_grad():
            test_loss = step(encoder=encoder,
                             decoder=decoder,
                             criterion=criterion,
                             data=(test_captions, test_feature, test_len))

        # if epoch > 1:
        b1, b2, b3, b4 = evaluate(encoder, decoder, f, c, 5, data.word_to_idx,
                                  data.idx_to_word)
        writer.add_scalars('BLEU', {
            'BLEU1': b1,
            'BLEU2': b2,
            'BLEU3': b3,
            'BLEU4': b4
        }, epoch + 1)
        if (epoch % 30) == 0:
            save_weights(encoder, args.model_path + "encoder" + str(epoch))
            save_weights(decoder, args.model_path + "decoder" + str(epoch))

        writer.add_scalars('loss', {
            'train': training_loss,
            'val': test_loss
        }, epoch + 1)

        print(
            'Epoch [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}, TestLoss: {:.4f}, TestPerplexity: {:5.4f}'
            .format(epoch + 1, args.num_epochs, training_loss,
                    np.exp(training_loss), test_loss, np.exp(test_loss)))

        args.learning_rate *= 0.995
        if args.save_weight:
            save_weights(encoder, args.model_path + "encoder" + str(epoch))
            save_weights(decoder, args.model_path + "decoder" + str(epoch))

    if args.save_weight:
        save_weights(encoder, args.model_path + "encoder")
        save_weights(decoder, args.model_path + "decoder")

    if args.predict:

        sample = Sample(encoder=encoder, decoder=decoder, device=device)

        train_mask = [
            random.randint(0, train_captions.shape[0] - 1)
            for _ in range(args.numOfpredection)
        ]
        test_mask = [
            random.randint(0, test_captions.shape[0] - 1)
            for _ in range(args.numOfpredection)
        ]

        train_featur = torch.from_numpy(train_feature[train_mask])
        train_featur = train_featur.to(device)
        train_encoder_out = encoder(train_featur)

        test_featur = torch.from_numpy(test_feature[test_mask])
        test_featur = test_featur.to(device)
        test_encoder_out = encoder(test_featur)

        train_output = []
        test_output = []

        for i in range(len(test_mask)):
            print(i)
            pre = sample.caption_image_beam_search(
                train_encoder_out[i].reshape(1, args.embed_size),
                data.word_to_idx, 2)
            train_output.append(pre)
            pre = sample.caption_image_beam_search(
                test_encoder_out[i].reshape(1, args.embed_size),
                data.word_to_idx, 50)
            test_output.append(pre)

        print_output(output=test_output,
                     sample=0,
                     gt=test_captions[test_mask],
                     img=test_url[test_mask],
                     title="val",
                     show_image=args.show_image,
                     idx_to_word=data.idx_to_word)

        print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
        print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
        print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
        print("")

        print_output(output=train_output,
                     sample=0,
                     gt=train_captions[train_mask],
                     img=train_url[train_mask],
                     title="traning",
                     show_image=args.show_image,
                     idx_to_word=data.idx_to_word)
Beispiel #4
0
                    GNN["prob_degree_predict_acc"], GNN["rank10_labels"],
                    GNN["rank10_predicted"]
                ],
                feed_dict={
                    GNN["gnn"].matrix_placeholders["M"]: sparse_to_dense(M),
                    GNN["gnn"].time_steps: time_steps,
                    GNN["labels"]: sparse_to_dense(labels),
                    GNN["nodes_n"]: nodesPerProblem
                })
            test_degprecision10 = precisionAt10(test_rank10_labels,
                                                test_rank10_predicted)
            print(
                "{timestamp}\t{memory}\tEpoch {epoch}\tBatch {batch} (n,m,i): ({n},{m},{i})\t| Loss(T:{loss:.5f},D:{degree_cost:.5f}) Acc(D:{degree_acc:.5f}) PrecAt10(D:{degree_prec10:.5f}))"
                .format(timestamp=timestamp(),
                        memory=memory_usage(),
                        epoch=epoch,
                        batch="rnd",
                        loss=test_loss,
                        degree_cost=test_degc,
                        degree_acc=test_degacc,
                        degree_prec10=test_degprecision10,
                        n=test_n,
                        m=test_m,
                        i=instances),
                flush=True)

            #end if
            save_weights(sess, "degreecentrality-checkpoints")
        #end for(epochs)
    #end Session
Beispiel #5
0
def main():
    if feature_actions == 'load-from-wav':
        (X_train,
         y_train), (X_test,
                    y_test), (X_valid,
                              y_valid) = get_dataset(class_type='speaker')

        x_audio_training = get_mfccs(X_train)
        x_audio_validation = get_mfccs(X_valid)
        x_audio_testing = get_mfccs(X_test)

        if feature_store:
            save_to_pkl(x_audio_training, 'training-speaker-x.pkl')
            save_to_pkl(x_audio_validation, 'validation-speaker-x.pkl')
            save_to_pkl(x_audio_testing, 'testing-speaker-x.pkl')
            save_to_pkl(y_train, 'training-speaker-y.pkl')
            save_to_pkl(y_valid, 'validation-speaker-y.pkl')
            save_to_pkl(y_test, 'testing-speaker-y.pkl')

    elif feature_actions == 'load-from-pkl':
        x_audio_training = get_mfccs(pickle_file='training-speaker-x.pkl')
        x_audio_validation = get_mfccs(pickle_file='validation-speaker-x.pkl')
        x_audio_testing = get_mfccs(pickle_file='testing-speaker-x.pkl')
        y_train = load_from_pkl('training-speaker-y.pkl')
        y_valid = load_from_pkl('validation-speaker-y.pkl')
        y_test = load_from_pkl('testing-speaker-y.pkl')

    else:
        print("Error in 'feature_actions'")
        return

    print("Training length: {}".format(len(x_audio_training)))
    print("Validation length: {}".format(len(x_audio_validation)))
    print("Testing length: {}".format(len(x_audio_testing)))

    model = Sequential()

    model.add(
        TimeDistributed(Conv1D(filters=16,
                               kernel_size=4,
                               padding='same',
                               activation=tf.nn.relu,
                               data_format='channels_last'),
                        input_shape=(NUM_MFCC, NUM_FRAMES, 1)))

    model.add(
        TimeDistributed(
            Conv1D(filters=8,
                   kernel_size=2,
                   padding='same',
                   activation=tf.nn.relu)))
    model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(50, return_sequences=True))
    model.add(Dropout(0.3))
    model.add(Flatten())
    model.add(Dense(units=512, activation=tf.nn.tanh))
    model.add(Dense(units=256, activation=tf.nn.tanh))
    model.add(
        Dense(units=y_train.shape[1],
              activation=tf.nn.softmax,
              name='top_layer'))

    model.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
                  optimizer=tf.keras.optimizers.SGD(lr=1e-4,
                                                    decay=1e-6,
                                                    momentum=0.9,
                                                    nesterov=True),
                  metrics=['accuracy'])  # optimizer was 'Adam'

    model.summary()

    # model.load_weights('Libri_Speaker_v1.1.h5')

    x_train = np.reshape(x_audio_training,
                         [len(x_audio_training), NUM_MFCC, NUM_FRAMES, 1])
    x_valid = np.reshape(x_audio_validation,
                         [len(x_audio_validation), NUM_MFCC, NUM_FRAMES, 1])

    print("Start Fitting")
    history = model.fit(x_train,
                        y_train,
                        batch_size=16,
                        epochs=200,
                        verbose=1,
                        validation_data=(x_valid, y_valid))

    model_name = 'Libri_Speaker_v1.3'
    print("Saving model as {}".format(model_name))
    model.save_weights(model_name + '.h5')
    model.save(model_name + '-model.h5')

    save_weights(model, model_name)

    write_history(history, filename='history-' + model_name + '.csv')

    test(x_audio_testing, y_test, model)
Beispiel #6
0
def main():
    # Set variables.
    img_dim = [64, 64]
    latent_dim = 32
    hidden_dim = 1024
    num_epochs = 100
    save_freq = 25
    batch_size = 64
    shuffle = True
    num_loader_workers = 2
    std_dev = 1.
    mu = 0.
    cuda = True
    learning_rate = 0.001
    save_dir = os.path.dirname(os.path.realpath(__file__))

    # fix seed for experiment.
    util.fix_seed()

    # Load Encoder, Decoder.
    aae_net = aae.AAE(latent_dim, hidden_dim)
    if cuda:
        aae_net.cuda()

    # Set loss fn.
    loss_fn = aae.loss_fn

    # Load optimizer.
    optimizer = optim.Adam(aae_net.parameters(), lr=learning_rate)

    # Load Dataset.
    anime_data = data_util.AnimeFaceData(img_dim, batch_size, shuffle,
                                         num_loader_workers)

    # Epoch loop
    ones = torch.Tensor(np.ones(batch_size))
    if cuda:
        ones = ones.cuda()
    zeroes = torch.Tensor(np.zeros(batch_size))
    if cuda:
        zeroes = zeroes.cuda()

    for epoch in range(num_epochs):
        print('Epoch {} of {}'.format(epoch + 1, num_epochs))

        # Batch loop.
        for i_batch, batch_data in enumerate(anime_data.data_loader, 0):
            print('Batch {}'.format(i_batch + 1))

            # Load batch.
            x, _ = batch_data
            if cuda:
                x = x.cuda()

            # Reset gradient.
            optimizer.zero_grad()

            # Run batch, calculate loss, and backprop.
            # Train autoencoder and gan on real batch.
            x_reconst, real_critic = aae_net.forward(x)
            loss = loss_fn(x, x_reconst, real_critic, ones)
            loss.backward()
            optimizer.step()

            # Train gan on fake batch.
            fake_z = torch.Tensor(std_dev *
                                  np.random.randn(batch_size, latent_dim) + mu)
            if cuda:
                fake_z = fake_z.cuda()
            fake_critic = aae_net.gan_fake_forward(fake_z)
            loss = F.binary_cross_entropy(fake_critic, zeroes, reduction='sum')
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if epoch % save_freq == 0:
            util.save_weights(
                vae_net, os.path.join(save_dir, 'aae_{}.pth'.format(epoch)))

        end = time.time()
        print('loss: ', loss)
        print('Took {}'.format(end - start))
Beispiel #7
0
def train(data_df,
          agent,
          num_episodes,
          limit_iterations,
          num_warmup_iterations,
          volatility_lookback,
          log_interval_steps,
          log_comet,
          comet_log_level,
          experiment,
          checkpoints_interval,
          checkpoints_dir,
          save_checkpoints,
          is_test=False,
          results_dir=None):

    data = data_df.to_numpy()

    agent.is_training = not is_test
    phase = 'test' if is_test else 'train'

    num_days = data.shape[0]

    # init custom OpenAI gym env for stocks portfolio
    env = PortfolioEnv(data, volatiltiy_lookback=volatility_lookback)

    output = []

    # training
    total_iterations_counter = 0  # counter for total iterations. num_episodes * num_days
    for episode in range(num_episodes):
        agent.reset_action_noise_process(
        )  # init random process for new episode
        current_state = env.reset()  # get initial state s(t)

        results = defaultdict(list)  # for logging
        for t in range(num_days):
            if limit_iterations is not None and total_iterations_counter >= limit_iterations:
                # option for hard limit on iterations for debugging
                break

            if not is_test and total_iterations_counter < num_warmup_iterations:
                # warmup to fill up the buffer with random actions
                current_action = agent.select_random_action()
            else:
                # regular training. Let agent select action based on observation
                current_action = agent.select_action(current_state)

            if is_test:
                output.append(current_action)

            # execute action on environment, observe new state and reward
            next_state, current_reward, done, _ = env.step(current_action)

            # logging
            results['reward'].append(current_reward)
            results['current_volatility'].append(env.current_volatility)
            results['current_gains'].append(env.current_gains)
            if t % log_interval_steps == 0:
                avg_reward = util.avg_results(results,
                                              'reward',
                                              lookback=log_interval_steps)
                avg_vol = util.avg_results(results,
                                           'current_volatility',
                                           lookback=log_interval_steps)
                avg_gains = util.avg_results(results,
                                             'current_gains',
                                             lookback=log_interval_steps)
                total_gains = env.total_gains

                print(
                    '{} episode: {} | step: {} | avg_reward: {:.5f} | avg_vol: {:.2f} | avg_step_gains: {:.2f} | total_gains: {:.2f}'
                    .format(phase, episode, t, avg_reward, avg_vol, avg_gains,
                            total_gains))

                env.render()
                if log_comet and comet_log_level in ['interval']:
                    experiment.log_metric('{}_interval_reward'.format(phase),
                                          avg_reward,
                                          step=total_iterations_counter)
                    experiment.log_metric('{}_interval_avg_vol'.format(phase),
                                          avg_vol,
                                          step=total_iterations_counter)
                    experiment.log_metric(
                        '{}_interval_avg_gains'.format(phase),
                        avg_gains,
                        step=total_iterations_counter)
                    experiment.log_metric(
                        '{}_interval_total_gains'.format(phase),
                        total_gains,
                        step=total_iterations_counter)

            # TODO: might need to add episode done states to limit batches not to cross over episodes

            if total_iterations_counter >= num_warmup_iterations:
                # we only want to update the policy after the random state warmup

                # store transition in R (s(t), a(t), r(t), s(t+1))
                agent.append_observation(current_state, current_action,
                                         current_reward, next_state)

                # update policy
                critic_loss_val, actor_loss_val = agent.update_policy()

                # logging
                results['critic'].append(critic_loss_val)
                results['actor'].append(actor_loss_val)
                if log_comet and comet_log_level in ['interval']:
                    avg_critic_loss = util.avg_results(
                        results, 'critic', lookback=log_interval_steps)
                    avg_actor_loss = util.avg_results(
                        results, 'actor', lookback=log_interval_steps)
                    experiment.log_metric(
                        '{}_interval_critic_loss'.format(phase),
                        avg_critic_loss,
                        step=total_iterations_counter)
                    experiment.log_metric(
                        '{}_interval_actor_loss'.format(phase),
                        avg_actor_loss,
                        step=total_iterations_counter)

            current_state = next_state
            total_iterations_counter += 1

        if limit_iterations is not None and total_iterations_counter >= limit_iterations:
            # option for hard limit on iterations for debugging
            break

        if save_checkpoints and (episode + 1) % checkpoints_interval == 0:
            agent.save_model(checkpoints_dir, identifier=episode + 1)

        # logging
        avg_reward = util.avg_results(results, 'reward')
        avg_vol = util.avg_results(results, 'current_volatility')
        avg_gains = util.avg_results(results, 'current_gains')
        total_gains = env.total_gains
        avg_critic_loss = util.avg_results(results, 'critic')
        avg_actor_loss = util.avg_results(results, 'actor')

        print(
            'Train episode {} results - reward: {:.2f} | avg_vol: {:.2f} | avg_gains: {:.2f} | total_gains: {:.2f}'
            .format(episode, avg_reward, avg_vol, avg_gains, total_gains))
        if log_comet and comet_log_level in ['episode', 'interval']:

            experiment.log_metric('{}_avg_episode_reward'.format(phase),
                                  avg_reward,
                                  step=episode)
            experiment.log_metric('{}_avg_episode_critic_loss'.format(phase),
                                  avg_critic_loss,
                                  step=episode)
            experiment.log_metric('{}_avg_episode_actor_loss'.format(phase),
                                  avg_actor_loss,
                                  step=episode)
            experiment.log_metric('{}_final_episode_avg_vol'.format(phase),
                                  avg_vol,
                                  step=episode)
            experiment.log_metric('{}_final_episode_avg_gains'.format(phase),
                                  avg_gains,
                                  step=episode)
            experiment.log_metric('{}_final_episode_total_gains'.format(phase),
                                  total_gains,
                                  step=episode)

        env.render()

    if save_checkpoints:
        agent.save_model(checkpoints_dir, identifier='final')

    if is_test:
        util.save_weights(output,
                          columns=data_df.keys(),
                          results_dir=results_dir)
Beispiel #8
0
def main():
    # Set variables.
    img_dim = [64, 64]
    codebook_size = 256
    latent_dim = 32
    hidden_dim = 1024
    num_epochs = 100
    save_freq = 25
    batch_size = 64
    shuffle = True
    num_loader_workers = 4
    beta = 1.0
    cuda = True
    learning_rate = 0.001
    save_dir = os.path.dirname(os.path.realpath(__file__))

    # fix seed for experiment.
    util.fix_seed()

    # Load Encoder, Decoder.
    model_net = vqvae.VQVAE(latent_dim, hidden_dim, codebook_size)
    if cuda:
        model_net.cuda()

    # Set loss fn.
    loss_fn = vqvae.loss_fn

    # Load optimizer.
    optimizer = optim.Adam(model_net.parameters(), lr=learning_rate)

    # Load Dataset.
    anime_data = data_util.AnimeFaceData(img_dim, batch_size, shuffle, num_loader_workers)

    # Epoch loop
    for epoch in range(num_epochs):
        print('Epoch {} of {}'.format(epoch, num_epochs))
        start = time.time()

        # Batch loop.
        for i_batch, batch_data in enumerate(anime_data.data_loader, 0):
            print('Batch {}'.format(i_batch+1))

            # Load batch.
            x, _ = batch_data
            if cuda:
                x = x.cuda()

            # Reset gradient.
            optimizer.zero_grad()

            # Run batch, calculate loss, and backprop.
            x_reconst, embed_loss, _ = model_net.forward(x)
            loss = loss_fn(x, x_reconst, embed_loss)
            loss.backward()
            optimizer.step()

        if epoch % save_freq == 0:
            util.save_weights(vae_net, os.path.join(save_dir, 'vqvae_{}.pth'.format(epoch)))

        end = time.time()
        print('loss: ', train_loss / len(anime_data.img_folder))
        print('Took {}'.format(end - start))
Beispiel #9
0
def main():
    # Set variables.
    img_dim = [64, 64]
    latent_dim = 32
    hidden_dim = 1024
    num_epochs = 20
    save_freq = 5
    batch_size = 128
    shuffle = True
    num_loader_workers = 3
    beta = 1.
    cuda = True
    learning_rate = 0.001
    adaptive = False  # True
    save_dir = os.path.dirname(os.path.realpath(__file__))

    # fix seed for experiment.
    util.fix_seed()

    # Load Encoder, Decoder.
    vae_net = vae.VAE(latent_dim, hidden_dim)
    if cuda:
        vae_net.cuda()

    # Set loss fn.
    loss_fn = vae.loss_fn

    # Load Dataset.
    anime_data = data_util.AnimeFaceData(img_dim, batch_size, shuffle,
                                         num_loader_workers)

    # Load optimizer.
    if adaptive:
        optimizer = optim.Adam(vae_net.parameters(), lr=learning_rate)
    else:
        optimizer = optim.SGD(vae_net.parameters(), lr=learning_rate)
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer,
                                                  max_lr=1e-1,
                                                  epochs=num_epochs,
                                                  steps_per_epoch=10)

    # Epoch loop
    for epoch in range(1, num_epochs + 1):
        print('Epoch {} of {}'.format(epoch, num_epochs))
        start = time.time()
        train_loss = 0

        # Batch loop.
        for i_batch, batch_data in enumerate(anime_data.data_loader, 0):
            # print('Batch {}'.format(i_batch+1))

            # Load batch.
            x, _ = batch_data
            if cuda:
                x = x.cuda()

            # Reset gradient.
            optimizer.zero_grad()

            # Run batch, calculate loss, and backprop.
            x_reconst, mu, logvar = vae_net.forward(x)
            loss = loss_fn(x, x_reconst, mu, logvar, beta)
            train_loss += loss.item()
            loss.backward()
            optimizer.step()
            if not adaptive:
                scheduler.step()

        if epoch % save_freq == 0:
            if adaptive:
                o = 'adaptive'
            else:
                o = 'cyclic'
            util.save_weights(
                vae_net,
                os.path.join(save_dir, 'vae_{}_{}.pth'.format(o, epoch)))

        end = time.time()
        print('loss: ', train_loss / len(anime_data.img_folder))
        print('Took {}'.format(end - start))

        if adaptive:
            o = 'adaptive'
        else:
            o = 'cyclic'
        util.save_weights(
            vae_net, os.path.join(save_dir, 'vae_{}_{}.pth'.format(o, epoch)))
Beispiel #10
0
                    GNN["labels_eig"]:
                    sparse_to_dense(test_batch["eigenvector_compare"]),
                    GNN["nodes_n"]:
                    test_batch["problem_n"]
                })

            print(
                "{timestamp}\t{memory}\tEpoch {epoch}\tBatch {batch} (n,m,i): ({n},{m},{i})\t| Loss(T:{loss:.5f},D:{degree_cost:.5f}, B:{bet_cost:.5f}, C:{clo_cost:.5f}, E:{eig_cost:.5f}) Acc(T:{acc:.5f}, D:{degree_acc:.5f}, B:{bet_acc:.5f}, C:{clo_acc:.5f}, E:{eig_acc:.5f}) "
                .format(timestamp=timestamp(),
                        memory=memory_usage(),
                        epoch=epoch,
                        batch="rnd",
                        loss=test_loss,
                        acc=test_acc,
                        degree_cost=test_degc,
                        degree_acc=test_degacc,
                        bet_cost=test_betc,
                        bet_acc=test_betacc,
                        clo_cost=test_cloc,
                        clo_acc=test_cloacc,
                        eig_cost=test_eigc,
                        eig_acc=test_eigacc,
                        n=n,
                        m=m,
                        i=len(batch["problem_n"])),
                flush=True)

            save_weights(sess, "rank-centrality-checkpoints")
        #end for(epochs)
    #end Session
Beispiel #11
0
def sgd_minibatches(iters, delta_0, w, minibatches=[], parses=[], batch_size=20,
                    sparse=False, log=False, bar=True, 
                    prob_log=False, log_last=False,
                    check_convergence=False,
                    scale_weight=False,
                    regularizer=False,
                    lmbda=2.0,
                    savepath=False,
                    prediction=False,
                    shuffle=False,
                    prediction_length=10):
    """
    Performs stochastic gradient descent on the weights vector w on
    minibatches = [minibatch_1, minibatch_2,....,minibatch_N].

    We are decaying the learning rate after each minibatch. We follow the following rule
    from http://cilvr.cs.nyu.edu/diglib/lsml/bottou-sgd-tricks-2012.pdf section 5.2:

    delta_k = delta_0 * (1 + delta_0*lmbda*k)**(−1)

    where k is the index of the minibatch and delta_0 is the initial learning rate,
    and lmbda is another hyperparameter that controls the rate of decay.
    """ 
    likelihoods = list()
    avg_likelihoods = list()
    ws = []
    delta_ws = []
    for i in range(iters):
        
        print('Iteration {0}/{1}'.format(i+1, iters))

        learning_rates = list()
        if bar and not (i==iters-1 and log_last): bar = progressbar.ProgressBar(max_value=len(minibatches))
            
        if shuffle:
            minibatches = partition(random.sample(parses, len(parses)), batch_size)

        for k, minibatch in enumerate(minibatches):
            delta_w = 0.0
            w_new = defaultdict(float)
            
            delta_k = delta_0 * (1 + delta_0*(lmbda*(i*len(minibatches)+k)))**(-1) # this is delta_k = delta_0 when k=0 and i=0
            
            learning_rates.append(delta_k)

            if bar and not (i==iters-1 and log_last): bar.update(k)

            for m, parse in enumerate(minibatch):
                # unpack parse

                target_forest, ref_forest, src_fsa, tgt_sent = parse
                
                ### D_n(x) ###

                tgt_edge2fmap, _ = featurize_edges(target_forest, src_fsa, tgt_sent=tgt_sent,
                                                   sparse_del=sparse, sparse_ins=sparse, sparse_trans=sparse)

                # recompute edge weights
                tgt_edge_weights = {edge: np.exp(weight_function(edge, tgt_edge2fmap[edge], w)) for edge in target_forest}
                # compute inside and outside
                tgt_tsort = top_sort(target_forest)
                root_tgt = Nonterminal("D_n(x)")
                I_tgt = inside_algorithm(target_forest, tgt_tsort, tgt_edge_weights)
                O_tgt = outside_algorithm(target_forest, tgt_tsort, tgt_edge_weights, I_tgt, root_tgt)
                # compute expected features
                expected_features_Dn_x = expected_feature_vector(target_forest, I_tgt, O_tgt, tgt_edge2fmap)

                ### D(x,y) ###

                ref_edge2fmap, _ = featurize_edges(ref_forest, src_fsa, tgt_sent=tgt_sent,
                                                   sparse_del=sparse, sparse_ins=sparse, sparse_trans=sparse)
                # recompute edge weights
                ref_edge_weights = {edge: np.exp(weight_function(edge, ref_edge2fmap[edge], w)) for edge in ref_forest}

                # compute inside and outside
                tsort = top_sort(ref_forest)
                root_ref = Nonterminal("D(x,y)")
                I_ref = inside_algorithm(ref_forest, tsort, ref_edge_weights)
                O_ref = outside_algorithm(ref_forest, tsort, ref_edge_weights, I_ref, root_ref)
                # compute expected features
                expected_features_D_xy = expected_feature_vector(ref_forest, I_ref, O_ref, ref_edge2fmap)
                # update w
                w_step, d_w = update_w(w, expected_features_D_xy, expected_features_Dn_x, delta=delta_k, regularizer=regularizer)
                
                # store likelihoods
                if I_ref and I_tgt: # for the case of an empty forest! since log(0) = -inf
                    # compute the likelihood of the target sentence
                    l = np.log(I_ref[root_ref]) - np.log(I_tgt[root_tgt])
                    if np.isfinite(l):
                        likelihoods.append(l)
                    else:
                        likelihoods.append(likelihoods[-1])
                else:
                    likelihoods.append(likelihoods[-1])
                avg_likelihood = sum(likelihoods) / len(likelihoods)
                avg_likelihoods.append(avg_likelihood)

                # the update is averaged over the minibatch
                delta_w += d_w / len(minibatch)
                for feature, value in w_step.items():
                    w_new[feature] += value / len(minibatch)

                if log or (i==iters-1 and log_last):
                    print("x = '{}'".format(src_fsa.sent))
                    print("y = '{}'".format(tgt_sent))
                    
                    print('Viterbi')
                    d = viterbi(target_forest, tgt_tsort, tgt_edge_weights, I_tgt, root_tgt) # use exp!
                    candidates = write_derrivation(d)
                    print("Best y = '{}'".format(candidates.pop()))
                    print('P(y,d|x) = {}'.format(joint_prob(d, tgt_edge_weights, I_tgt, root_tgt, log=prob_log)))
                    
                    n = 100
                    d, count = ancestral_sample(n, target_forest, tgt_tsort, tgt_edge_weights, I_tgt, root_tgt) # use exp!
                    candidates = write_derrivation(d)
                    print('Most sampled: {0}/{1}'.format(count, n))
                    print("Best y = '{}'".format(candidates.pop()))
                    print('P(y,d|x) = {}\n'.format(joint_prob(d, tgt_edge_weights, I_tgt, root_tgt, log=prob_log)))

            if bar and not (i==iters-1 and log_last): bar.update(k+1)

            # hack: scale weights so that they are at most of the scale 10**scale_weight
            if scale_weight:
                abs_max = max(map(abs, w_new.values()))
                if np.isfinite(abs_max):
                    for k, v in w_new.items():
                        w_new[k] = v / 10**(int(np.log10(abs_max))+1 - scale_weight)
                    # update
                    w = w_new
                else:
                    # return to previous weight
                    print('inf or nan')
                    w = ws[-2]
                    print(tgt_sent)


            # update after each minibatch
            # w = w_new        
            ws.append(w)
            delta_ws.append(delta_w)

        if bar and not (i==iters-1 and log_last): bar.finish()

        if savepath:
            save_weights(w, savepath + 'trained-{}-'.format(i+1))

        if check_convergence:
            print('delta w: {}\n'.format([ds / len(w.keys()) for ds in delta_ws]))
            print('Learning rates: {}'.format(learning_rates))

        # if prediction and i%5==0: # save every 5 iterations
        predict(parses[0:prediction_length], w, i+1, prediction)

    return ws, delta_ws, avg_likelihoods