print('Training model...', flush=True) for (batch_i, batch) in islice(enumerate(train_loader.get_batches(batch_size, dev)), train_params['batches_per_epoch']): train_stats['loss'][batch_i], train_stats['acc'][batch_i], train_stats['sat'][batch_i], train_stats['pred'][batch_i], train_stats['TP'][batch_i], train_stats['FP'][batch_i], train_stats['TN'][batch_i], train_stats['FN'][batch_i] = run_batch(sess, GNN, batch, batch_i, epoch_i, time_steps, train=True, verbose=True) #end summarize_epoch(epoch_i,train_stats['loss'],train_stats['acc'],train_stats['sat'],train_stats['pred'],train=True) print('Testing model...', flush=True) for (batch_i, batch) in islice(enumerate(test_loader.get_batches(batch_size, dev)), test_params['batches_per_epoch']): test_stats['loss'][batch_i], test_stats['acc'][batch_i], test_stats['sat'][batch_i], test_stats['pred'][batch_i], test_stats['TP'][batch_i], test_stats['FP'][batch_i], test_stats['TN'][batch_i], test_stats['FN'][batch_i] = run_batch(sess, GNN, batch, batch_i, epoch_i, time_steps, train=False, verbose=True) #end summarize_epoch(epoch_i,test_stats['loss'],test_stats['acc'],test_stats['sat'],test_stats['pred'],train=False) # Save weights savepath = 'training/dev={dev}/checkpoints/epoch={epoch}'.format(dev=dev,epoch=int(round(100*np.ceil((epoch_i+1)/100)))) os.makedirs(savepath, exist_ok=True) if save_checkpoints: save_weights(sess, savepath); logfile.write('{epoch_i} {trloss} {tracc} {trsat} {trpred} {trTP} {trFP} {trTN} {trFN} {tstloss} {tstacc} {tstsat} {tstpred} {tstTP} {tstFP} {tstTN} {tstFN}\n'.format( epoch_i = epoch_i, trloss = np.mean(train_stats['loss']), tracc = np.mean(train_stats['acc']), trsat = np.mean(train_stats['sat']), trpred = np.mean(train_stats['pred']), trTP = np.mean(train_stats['TP']), trFP = np.mean(train_stats['FP']), trTN = np.mean(train_stats['TN']), trFN = np.mean(train_stats['FN']), tstloss = np.mean(test_stats['loss']),
n=n), flush=True) #end M_C, M_G, is_given = create_batch(puzzles[0:1], 1) prediction = sess.run(GNN["prediction"], feed_dict={ GNN["gnn"].matrix_placeholders["M_C"]: M_C, GNN["gnn"].matrix_placeholders["M_G"]: M_G, GNN["gnn"].time_steps: time_steps, GNN["is_given"]: is_given }) print("Puzzle:\t\t{}".format(puzzles[0])) print("Prediction:\t{}".format(''.join( [str(x + 1) for x in np.argmax(prediction, axis=1)]))) print("Diff:\t\t{}\n".format(''.join([ str(x) for x in ((np.argmax(prediction, axis=1) + 1) != [int(x) for x in puzzles[0]]).astype(int) ]))) save_weights(sess, "./sudoku-checkpoints") #end #end #end
def main(args): data = DataLoader(pca=args.PCA, norm=args.norm) train_captions, train_feature, train_url, train_len = data.get_Training_data( args.training) test_captions, test_feature, test_url, test_len = data.get_val_data( args.testing) f, c, _ = data.eval_data() writer = SummaryWriter() encoder = Encoder(input_size=train_feature.shape[1], hidden_size=args.hidden_size) \ .to(device) decoder = Decoder(embed_size=args.embed_size, hidden_size=args.hidden_size, attention_dim=args.attention_size, vocab_size=len(data.word_to_idx)) \ .to(device) if args.load_weight: load_weights(encoder, args.model_path + "Jul28_10-04-57encoder") load_weights(decoder, args.model_path + "Jul28_10-04-57decoder") for epoch in range(args.num_epochs): params = list(decoder.parameters()) + list(encoder.parameters()) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(params=params, lr=args.learning_rate) # if epoch >= 100: training_loss = step(encoder=encoder, decoder=decoder, criterion=criterion, data=(train_captions, train_feature, train_len), optimizer=optimizer) # if epoch + 1 % 5 == 0: # a = evaluate(encoder, decoder, train_feature[0:2], train_captions[0:2], 5, data.word_to_idx) # print("bleu4 ", a) with torch.no_grad(): test_loss = step(encoder=encoder, decoder=decoder, criterion=criterion, data=(test_captions, test_feature, test_len)) # if epoch > 1: b1, b2, b3, b4 = evaluate(encoder, decoder, f, c, 5, data.word_to_idx, data.idx_to_word) writer.add_scalars('BLEU', { 'BLEU1': b1, 'BLEU2': b2, 'BLEU3': b3, 'BLEU4': b4 }, epoch + 1) if (epoch % 30) == 0: save_weights(encoder, args.model_path + "encoder" + str(epoch)) save_weights(decoder, args.model_path + "decoder" + str(epoch)) writer.add_scalars('loss', { 'train': training_loss, 'val': test_loss }, epoch + 1) print( 'Epoch [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}, TestLoss: {:.4f}, TestPerplexity: {:5.4f}' .format(epoch + 1, args.num_epochs, training_loss, np.exp(training_loss), test_loss, np.exp(test_loss))) args.learning_rate *= 0.995 if args.save_weight: save_weights(encoder, args.model_path + "encoder" + str(epoch)) save_weights(decoder, args.model_path + "decoder" + str(epoch)) if args.save_weight: save_weights(encoder, args.model_path + "encoder") save_weights(decoder, args.model_path + "decoder") if args.predict: sample = Sample(encoder=encoder, decoder=decoder, device=device) train_mask = [ random.randint(0, train_captions.shape[0] - 1) for _ in range(args.numOfpredection) ] test_mask = [ random.randint(0, test_captions.shape[0] - 1) for _ in range(args.numOfpredection) ] train_featur = torch.from_numpy(train_feature[train_mask]) train_featur = train_featur.to(device) train_encoder_out = encoder(train_featur) test_featur = torch.from_numpy(test_feature[test_mask]) test_featur = test_featur.to(device) test_encoder_out = encoder(test_featur) train_output = [] test_output = [] for i in range(len(test_mask)): print(i) pre = sample.caption_image_beam_search( train_encoder_out[i].reshape(1, args.embed_size), data.word_to_idx, 2) train_output.append(pre) pre = sample.caption_image_beam_search( test_encoder_out[i].reshape(1, args.embed_size), data.word_to_idx, 50) test_output.append(pre) print_output(output=test_output, sample=0, gt=test_captions[test_mask], img=test_url[test_mask], title="val", show_image=args.show_image, idx_to_word=data.idx_to_word) print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX") print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX") print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX") print("") print_output(output=train_output, sample=0, gt=train_captions[train_mask], img=train_url[train_mask], title="traning", show_image=args.show_image, idx_to_word=data.idx_to_word)
GNN["prob_degree_predict_acc"], GNN["rank10_labels"], GNN["rank10_predicted"] ], feed_dict={ GNN["gnn"].matrix_placeholders["M"]: sparse_to_dense(M), GNN["gnn"].time_steps: time_steps, GNN["labels"]: sparse_to_dense(labels), GNN["nodes_n"]: nodesPerProblem }) test_degprecision10 = precisionAt10(test_rank10_labels, test_rank10_predicted) print( "{timestamp}\t{memory}\tEpoch {epoch}\tBatch {batch} (n,m,i): ({n},{m},{i})\t| Loss(T:{loss:.5f},D:{degree_cost:.5f}) Acc(D:{degree_acc:.5f}) PrecAt10(D:{degree_prec10:.5f}))" .format(timestamp=timestamp(), memory=memory_usage(), epoch=epoch, batch="rnd", loss=test_loss, degree_cost=test_degc, degree_acc=test_degacc, degree_prec10=test_degprecision10, n=test_n, m=test_m, i=instances), flush=True) #end if save_weights(sess, "degreecentrality-checkpoints") #end for(epochs) #end Session
def main(): if feature_actions == 'load-from-wav': (X_train, y_train), (X_test, y_test), (X_valid, y_valid) = get_dataset(class_type='speaker') x_audio_training = get_mfccs(X_train) x_audio_validation = get_mfccs(X_valid) x_audio_testing = get_mfccs(X_test) if feature_store: save_to_pkl(x_audio_training, 'training-speaker-x.pkl') save_to_pkl(x_audio_validation, 'validation-speaker-x.pkl') save_to_pkl(x_audio_testing, 'testing-speaker-x.pkl') save_to_pkl(y_train, 'training-speaker-y.pkl') save_to_pkl(y_valid, 'validation-speaker-y.pkl') save_to_pkl(y_test, 'testing-speaker-y.pkl') elif feature_actions == 'load-from-pkl': x_audio_training = get_mfccs(pickle_file='training-speaker-x.pkl') x_audio_validation = get_mfccs(pickle_file='validation-speaker-x.pkl') x_audio_testing = get_mfccs(pickle_file='testing-speaker-x.pkl') y_train = load_from_pkl('training-speaker-y.pkl') y_valid = load_from_pkl('validation-speaker-y.pkl') y_test = load_from_pkl('testing-speaker-y.pkl') else: print("Error in 'feature_actions'") return print("Training length: {}".format(len(x_audio_training))) print("Validation length: {}".format(len(x_audio_validation))) print("Testing length: {}".format(len(x_audio_testing))) model = Sequential() model.add( TimeDistributed(Conv1D(filters=16, kernel_size=4, padding='same', activation=tf.nn.relu, data_format='channels_last'), input_shape=(NUM_MFCC, NUM_FRAMES, 1))) model.add( TimeDistributed( Conv1D(filters=8, kernel_size=2, padding='same', activation=tf.nn.relu))) model.add(TimeDistributed(MaxPooling1D(pool_size=2))) model.add(TimeDistributed(Flatten())) model.add(LSTM(50, return_sequences=True)) model.add(Dropout(0.3)) model.add(Flatten()) model.add(Dense(units=512, activation=tf.nn.tanh)) model.add(Dense(units=256, activation=tf.nn.tanh)) model.add( Dense(units=y_train.shape[1], activation=tf.nn.softmax, name='top_layer')) model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=tf.keras.optimizers.SGD(lr=1e-4, decay=1e-6, momentum=0.9, nesterov=True), metrics=['accuracy']) # optimizer was 'Adam' model.summary() # model.load_weights('Libri_Speaker_v1.1.h5') x_train = np.reshape(x_audio_training, [len(x_audio_training), NUM_MFCC, NUM_FRAMES, 1]) x_valid = np.reshape(x_audio_validation, [len(x_audio_validation), NUM_MFCC, NUM_FRAMES, 1]) print("Start Fitting") history = model.fit(x_train, y_train, batch_size=16, epochs=200, verbose=1, validation_data=(x_valid, y_valid)) model_name = 'Libri_Speaker_v1.3' print("Saving model as {}".format(model_name)) model.save_weights(model_name + '.h5') model.save(model_name + '-model.h5') save_weights(model, model_name) write_history(history, filename='history-' + model_name + '.csv') test(x_audio_testing, y_test, model)
def main(): # Set variables. img_dim = [64, 64] latent_dim = 32 hidden_dim = 1024 num_epochs = 100 save_freq = 25 batch_size = 64 shuffle = True num_loader_workers = 2 std_dev = 1. mu = 0. cuda = True learning_rate = 0.001 save_dir = os.path.dirname(os.path.realpath(__file__)) # fix seed for experiment. util.fix_seed() # Load Encoder, Decoder. aae_net = aae.AAE(latent_dim, hidden_dim) if cuda: aae_net.cuda() # Set loss fn. loss_fn = aae.loss_fn # Load optimizer. optimizer = optim.Adam(aae_net.parameters(), lr=learning_rate) # Load Dataset. anime_data = data_util.AnimeFaceData(img_dim, batch_size, shuffle, num_loader_workers) # Epoch loop ones = torch.Tensor(np.ones(batch_size)) if cuda: ones = ones.cuda() zeroes = torch.Tensor(np.zeros(batch_size)) if cuda: zeroes = zeroes.cuda() for epoch in range(num_epochs): print('Epoch {} of {}'.format(epoch + 1, num_epochs)) # Batch loop. for i_batch, batch_data in enumerate(anime_data.data_loader, 0): print('Batch {}'.format(i_batch + 1)) # Load batch. x, _ = batch_data if cuda: x = x.cuda() # Reset gradient. optimizer.zero_grad() # Run batch, calculate loss, and backprop. # Train autoencoder and gan on real batch. x_reconst, real_critic = aae_net.forward(x) loss = loss_fn(x, x_reconst, real_critic, ones) loss.backward() optimizer.step() # Train gan on fake batch. fake_z = torch.Tensor(std_dev * np.random.randn(batch_size, latent_dim) + mu) if cuda: fake_z = fake_z.cuda() fake_critic = aae_net.gan_fake_forward(fake_z) loss = F.binary_cross_entropy(fake_critic, zeroes, reduction='sum') optimizer.zero_grad() loss.backward() optimizer.step() if epoch % save_freq == 0: util.save_weights( vae_net, os.path.join(save_dir, 'aae_{}.pth'.format(epoch))) end = time.time() print('loss: ', loss) print('Took {}'.format(end - start))
def train(data_df, agent, num_episodes, limit_iterations, num_warmup_iterations, volatility_lookback, log_interval_steps, log_comet, comet_log_level, experiment, checkpoints_interval, checkpoints_dir, save_checkpoints, is_test=False, results_dir=None): data = data_df.to_numpy() agent.is_training = not is_test phase = 'test' if is_test else 'train' num_days = data.shape[0] # init custom OpenAI gym env for stocks portfolio env = PortfolioEnv(data, volatiltiy_lookback=volatility_lookback) output = [] # training total_iterations_counter = 0 # counter for total iterations. num_episodes * num_days for episode in range(num_episodes): agent.reset_action_noise_process( ) # init random process for new episode current_state = env.reset() # get initial state s(t) results = defaultdict(list) # for logging for t in range(num_days): if limit_iterations is not None and total_iterations_counter >= limit_iterations: # option for hard limit on iterations for debugging break if not is_test and total_iterations_counter < num_warmup_iterations: # warmup to fill up the buffer with random actions current_action = agent.select_random_action() else: # regular training. Let agent select action based on observation current_action = agent.select_action(current_state) if is_test: output.append(current_action) # execute action on environment, observe new state and reward next_state, current_reward, done, _ = env.step(current_action) # logging results['reward'].append(current_reward) results['current_volatility'].append(env.current_volatility) results['current_gains'].append(env.current_gains) if t % log_interval_steps == 0: avg_reward = util.avg_results(results, 'reward', lookback=log_interval_steps) avg_vol = util.avg_results(results, 'current_volatility', lookback=log_interval_steps) avg_gains = util.avg_results(results, 'current_gains', lookback=log_interval_steps) total_gains = env.total_gains print( '{} episode: {} | step: {} | avg_reward: {:.5f} | avg_vol: {:.2f} | avg_step_gains: {:.2f} | total_gains: {:.2f}' .format(phase, episode, t, avg_reward, avg_vol, avg_gains, total_gains)) env.render() if log_comet and comet_log_level in ['interval']: experiment.log_metric('{}_interval_reward'.format(phase), avg_reward, step=total_iterations_counter) experiment.log_metric('{}_interval_avg_vol'.format(phase), avg_vol, step=total_iterations_counter) experiment.log_metric( '{}_interval_avg_gains'.format(phase), avg_gains, step=total_iterations_counter) experiment.log_metric( '{}_interval_total_gains'.format(phase), total_gains, step=total_iterations_counter) # TODO: might need to add episode done states to limit batches not to cross over episodes if total_iterations_counter >= num_warmup_iterations: # we only want to update the policy after the random state warmup # store transition in R (s(t), a(t), r(t), s(t+1)) agent.append_observation(current_state, current_action, current_reward, next_state) # update policy critic_loss_val, actor_loss_val = agent.update_policy() # logging results['critic'].append(critic_loss_val) results['actor'].append(actor_loss_val) if log_comet and comet_log_level in ['interval']: avg_critic_loss = util.avg_results( results, 'critic', lookback=log_interval_steps) avg_actor_loss = util.avg_results( results, 'actor', lookback=log_interval_steps) experiment.log_metric( '{}_interval_critic_loss'.format(phase), avg_critic_loss, step=total_iterations_counter) experiment.log_metric( '{}_interval_actor_loss'.format(phase), avg_actor_loss, step=total_iterations_counter) current_state = next_state total_iterations_counter += 1 if limit_iterations is not None and total_iterations_counter >= limit_iterations: # option for hard limit on iterations for debugging break if save_checkpoints and (episode + 1) % checkpoints_interval == 0: agent.save_model(checkpoints_dir, identifier=episode + 1) # logging avg_reward = util.avg_results(results, 'reward') avg_vol = util.avg_results(results, 'current_volatility') avg_gains = util.avg_results(results, 'current_gains') total_gains = env.total_gains avg_critic_loss = util.avg_results(results, 'critic') avg_actor_loss = util.avg_results(results, 'actor') print( 'Train episode {} results - reward: {:.2f} | avg_vol: {:.2f} | avg_gains: {:.2f} | total_gains: {:.2f}' .format(episode, avg_reward, avg_vol, avg_gains, total_gains)) if log_comet and comet_log_level in ['episode', 'interval']: experiment.log_metric('{}_avg_episode_reward'.format(phase), avg_reward, step=episode) experiment.log_metric('{}_avg_episode_critic_loss'.format(phase), avg_critic_loss, step=episode) experiment.log_metric('{}_avg_episode_actor_loss'.format(phase), avg_actor_loss, step=episode) experiment.log_metric('{}_final_episode_avg_vol'.format(phase), avg_vol, step=episode) experiment.log_metric('{}_final_episode_avg_gains'.format(phase), avg_gains, step=episode) experiment.log_metric('{}_final_episode_total_gains'.format(phase), total_gains, step=episode) env.render() if save_checkpoints: agent.save_model(checkpoints_dir, identifier='final') if is_test: util.save_weights(output, columns=data_df.keys(), results_dir=results_dir)
def main(): # Set variables. img_dim = [64, 64] codebook_size = 256 latent_dim = 32 hidden_dim = 1024 num_epochs = 100 save_freq = 25 batch_size = 64 shuffle = True num_loader_workers = 4 beta = 1.0 cuda = True learning_rate = 0.001 save_dir = os.path.dirname(os.path.realpath(__file__)) # fix seed for experiment. util.fix_seed() # Load Encoder, Decoder. model_net = vqvae.VQVAE(latent_dim, hidden_dim, codebook_size) if cuda: model_net.cuda() # Set loss fn. loss_fn = vqvae.loss_fn # Load optimizer. optimizer = optim.Adam(model_net.parameters(), lr=learning_rate) # Load Dataset. anime_data = data_util.AnimeFaceData(img_dim, batch_size, shuffle, num_loader_workers) # Epoch loop for epoch in range(num_epochs): print('Epoch {} of {}'.format(epoch, num_epochs)) start = time.time() # Batch loop. for i_batch, batch_data in enumerate(anime_data.data_loader, 0): print('Batch {}'.format(i_batch+1)) # Load batch. x, _ = batch_data if cuda: x = x.cuda() # Reset gradient. optimizer.zero_grad() # Run batch, calculate loss, and backprop. x_reconst, embed_loss, _ = model_net.forward(x) loss = loss_fn(x, x_reconst, embed_loss) loss.backward() optimizer.step() if epoch % save_freq == 0: util.save_weights(vae_net, os.path.join(save_dir, 'vqvae_{}.pth'.format(epoch))) end = time.time() print('loss: ', train_loss / len(anime_data.img_folder)) print('Took {}'.format(end - start))
def main(): # Set variables. img_dim = [64, 64] latent_dim = 32 hidden_dim = 1024 num_epochs = 20 save_freq = 5 batch_size = 128 shuffle = True num_loader_workers = 3 beta = 1. cuda = True learning_rate = 0.001 adaptive = False # True save_dir = os.path.dirname(os.path.realpath(__file__)) # fix seed for experiment. util.fix_seed() # Load Encoder, Decoder. vae_net = vae.VAE(latent_dim, hidden_dim) if cuda: vae_net.cuda() # Set loss fn. loss_fn = vae.loss_fn # Load Dataset. anime_data = data_util.AnimeFaceData(img_dim, batch_size, shuffle, num_loader_workers) # Load optimizer. if adaptive: optimizer = optim.Adam(vae_net.parameters(), lr=learning_rate) else: optimizer = optim.SGD(vae_net.parameters(), lr=learning_rate) scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=1e-1, epochs=num_epochs, steps_per_epoch=10) # Epoch loop for epoch in range(1, num_epochs + 1): print('Epoch {} of {}'.format(epoch, num_epochs)) start = time.time() train_loss = 0 # Batch loop. for i_batch, batch_data in enumerate(anime_data.data_loader, 0): # print('Batch {}'.format(i_batch+1)) # Load batch. x, _ = batch_data if cuda: x = x.cuda() # Reset gradient. optimizer.zero_grad() # Run batch, calculate loss, and backprop. x_reconst, mu, logvar = vae_net.forward(x) loss = loss_fn(x, x_reconst, mu, logvar, beta) train_loss += loss.item() loss.backward() optimizer.step() if not adaptive: scheduler.step() if epoch % save_freq == 0: if adaptive: o = 'adaptive' else: o = 'cyclic' util.save_weights( vae_net, os.path.join(save_dir, 'vae_{}_{}.pth'.format(o, epoch))) end = time.time() print('loss: ', train_loss / len(anime_data.img_folder)) print('Took {}'.format(end - start)) if adaptive: o = 'adaptive' else: o = 'cyclic' util.save_weights( vae_net, os.path.join(save_dir, 'vae_{}_{}.pth'.format(o, epoch)))
GNN["labels_eig"]: sparse_to_dense(test_batch["eigenvector_compare"]), GNN["nodes_n"]: test_batch["problem_n"] }) print( "{timestamp}\t{memory}\tEpoch {epoch}\tBatch {batch} (n,m,i): ({n},{m},{i})\t| Loss(T:{loss:.5f},D:{degree_cost:.5f}, B:{bet_cost:.5f}, C:{clo_cost:.5f}, E:{eig_cost:.5f}) Acc(T:{acc:.5f}, D:{degree_acc:.5f}, B:{bet_acc:.5f}, C:{clo_acc:.5f}, E:{eig_acc:.5f}) " .format(timestamp=timestamp(), memory=memory_usage(), epoch=epoch, batch="rnd", loss=test_loss, acc=test_acc, degree_cost=test_degc, degree_acc=test_degacc, bet_cost=test_betc, bet_acc=test_betacc, clo_cost=test_cloc, clo_acc=test_cloacc, eig_cost=test_eigc, eig_acc=test_eigacc, n=n, m=m, i=len(batch["problem_n"])), flush=True) save_weights(sess, "rank-centrality-checkpoints") #end for(epochs) #end Session
def sgd_minibatches(iters, delta_0, w, minibatches=[], parses=[], batch_size=20, sparse=False, log=False, bar=True, prob_log=False, log_last=False, check_convergence=False, scale_weight=False, regularizer=False, lmbda=2.0, savepath=False, prediction=False, shuffle=False, prediction_length=10): """ Performs stochastic gradient descent on the weights vector w on minibatches = [minibatch_1, minibatch_2,....,minibatch_N]. We are decaying the learning rate after each minibatch. We follow the following rule from http://cilvr.cs.nyu.edu/diglib/lsml/bottou-sgd-tricks-2012.pdf section 5.2: delta_k = delta_0 * (1 + delta_0*lmbda*k)**(−1) where k is the index of the minibatch and delta_0 is the initial learning rate, and lmbda is another hyperparameter that controls the rate of decay. """ likelihoods = list() avg_likelihoods = list() ws = [] delta_ws = [] for i in range(iters): print('Iteration {0}/{1}'.format(i+1, iters)) learning_rates = list() if bar and not (i==iters-1 and log_last): bar = progressbar.ProgressBar(max_value=len(minibatches)) if shuffle: minibatches = partition(random.sample(parses, len(parses)), batch_size) for k, minibatch in enumerate(minibatches): delta_w = 0.0 w_new = defaultdict(float) delta_k = delta_0 * (1 + delta_0*(lmbda*(i*len(minibatches)+k)))**(-1) # this is delta_k = delta_0 when k=0 and i=0 learning_rates.append(delta_k) if bar and not (i==iters-1 and log_last): bar.update(k) for m, parse in enumerate(minibatch): # unpack parse target_forest, ref_forest, src_fsa, tgt_sent = parse ### D_n(x) ### tgt_edge2fmap, _ = featurize_edges(target_forest, src_fsa, tgt_sent=tgt_sent, sparse_del=sparse, sparse_ins=sparse, sparse_trans=sparse) # recompute edge weights tgt_edge_weights = {edge: np.exp(weight_function(edge, tgt_edge2fmap[edge], w)) for edge in target_forest} # compute inside and outside tgt_tsort = top_sort(target_forest) root_tgt = Nonterminal("D_n(x)") I_tgt = inside_algorithm(target_forest, tgt_tsort, tgt_edge_weights) O_tgt = outside_algorithm(target_forest, tgt_tsort, tgt_edge_weights, I_tgt, root_tgt) # compute expected features expected_features_Dn_x = expected_feature_vector(target_forest, I_tgt, O_tgt, tgt_edge2fmap) ### D(x,y) ### ref_edge2fmap, _ = featurize_edges(ref_forest, src_fsa, tgt_sent=tgt_sent, sparse_del=sparse, sparse_ins=sparse, sparse_trans=sparse) # recompute edge weights ref_edge_weights = {edge: np.exp(weight_function(edge, ref_edge2fmap[edge], w)) for edge in ref_forest} # compute inside and outside tsort = top_sort(ref_forest) root_ref = Nonterminal("D(x,y)") I_ref = inside_algorithm(ref_forest, tsort, ref_edge_weights) O_ref = outside_algorithm(ref_forest, tsort, ref_edge_weights, I_ref, root_ref) # compute expected features expected_features_D_xy = expected_feature_vector(ref_forest, I_ref, O_ref, ref_edge2fmap) # update w w_step, d_w = update_w(w, expected_features_D_xy, expected_features_Dn_x, delta=delta_k, regularizer=regularizer) # store likelihoods if I_ref and I_tgt: # for the case of an empty forest! since log(0) = -inf # compute the likelihood of the target sentence l = np.log(I_ref[root_ref]) - np.log(I_tgt[root_tgt]) if np.isfinite(l): likelihoods.append(l) else: likelihoods.append(likelihoods[-1]) else: likelihoods.append(likelihoods[-1]) avg_likelihood = sum(likelihoods) / len(likelihoods) avg_likelihoods.append(avg_likelihood) # the update is averaged over the minibatch delta_w += d_w / len(minibatch) for feature, value in w_step.items(): w_new[feature] += value / len(minibatch) if log or (i==iters-1 and log_last): print("x = '{}'".format(src_fsa.sent)) print("y = '{}'".format(tgt_sent)) print('Viterbi') d = viterbi(target_forest, tgt_tsort, tgt_edge_weights, I_tgt, root_tgt) # use exp! candidates = write_derrivation(d) print("Best y = '{}'".format(candidates.pop())) print('P(y,d|x) = {}'.format(joint_prob(d, tgt_edge_weights, I_tgt, root_tgt, log=prob_log))) n = 100 d, count = ancestral_sample(n, target_forest, tgt_tsort, tgt_edge_weights, I_tgt, root_tgt) # use exp! candidates = write_derrivation(d) print('Most sampled: {0}/{1}'.format(count, n)) print("Best y = '{}'".format(candidates.pop())) print('P(y,d|x) = {}\n'.format(joint_prob(d, tgt_edge_weights, I_tgt, root_tgt, log=prob_log))) if bar and not (i==iters-1 and log_last): bar.update(k+1) # hack: scale weights so that they are at most of the scale 10**scale_weight if scale_weight: abs_max = max(map(abs, w_new.values())) if np.isfinite(abs_max): for k, v in w_new.items(): w_new[k] = v / 10**(int(np.log10(abs_max))+1 - scale_weight) # update w = w_new else: # return to previous weight print('inf or nan') w = ws[-2] print(tgt_sent) # update after each minibatch # w = w_new ws.append(w) delta_ws.append(delta_w) if bar and not (i==iters-1 and log_last): bar.finish() if savepath: save_weights(w, savepath + 'trained-{}-'.format(i+1)) if check_convergence: print('delta w: {}\n'.format([ds / len(w.keys()) for ds in delta_ws])) print('Learning rates: {}'.format(learning_rates)) # if prediction and i%5==0: # save every 5 iterations predict(parses[0:prediction_length], w, i+1, prediction) return ws, delta_ws, avg_likelihoods