def single_test(bin_id, model, sess, nprint, batch_size, dev, p, print_out=True, offset=None, beam_model=None): """Test model on test data of length l using the given session.""" if not dev[p][bin_id]: data.print_out(" bin %d (%d)\t%s\tppl NA errors NA seq-errors NA" % (bin_id, data.bins[bin_id], p)) return 1.0, 1.0, 0.0 inpt, target = data.get_batch( bin_id, batch_size, dev[p], FLAGS.height, offset) if FLAGS.beam_size > 1 and beam_model: loss, res, new_tgt, scores = m_step( model, beam_model, sess, batch_size, inpt, target, bin_id, FLAGS.eval_beam_steps, p) score_avgs = [sum(s) / float(len(s)) for s in scores] score_maxs = [max(s) for s in scores] score_str = ["(%.2f, %.2f)" % (score_avgs[i], score_maxs[i]) for i in xrange(FLAGS.eval_beam_steps)] data.print_out(" == scores (avg, max): %s" % "; ".join(score_str)) errors, total, seq_err = data.accuracy(inpt, res, target, batch_size, nprint, new_tgt, scores[-1]) else: loss, res, _, _ = model.step(sess, inpt, target, False) errors, total, seq_err = data.accuracy(inpt, res, target, batch_size, nprint) seq_err = float(seq_err) / batch_size if total > 0: errors = float(errors) / total if print_out: data.print_out(" bin %d (%d)\t%s\tppl %.2f errors %.2f seq-errors %.2f" % (bin_id, data.bins[bin_id], p, data.safe_exp(loss), 100 * errors, 100 * seq_err)) return (errors, seq_err, loss)
def single_test(l, model, sess, task, nprint, batch_size, print_out=True, offset=None, ensemble=None, get_steps=False): """Test model on test data of length l using the given session.""" inpt, target = data.get_batch(l, batch_size, False, task, offset) _, res, _, steps = model.step(sess, inpt, target, False, get_steps=get_steps) errors, total, seq_err = data.accuracy(inpt, res, target, batch_size, nprint) seq_err = float(seq_err) / batch_size if total > 0: errors = float(errors) / total if print_out: data.print_out(" %s len %d errors %.2f sequence-errors %.2f" % (task, l, 100 * errors, 100 * seq_err)) # Ensemble eval. if ensemble: results = [] for m in ensemble: model.saver.restore(sess, m) _, result, _, _ = model.step(sess, inpt, target, False) m_errors, m_total, m_seq_err = data.accuracy( inpt, result, target, batch_size, nprint) m_seq_err = float(m_seq_err) / batch_size if total > 0: m_errors = float(m_errors) / m_total data.print_out( " %s len %d m-errors %.2f m-sequence-errors %.2f" % (task, l, 100 * m_errors, 100 * m_seq_err)) results.append(result) ens = [sum(o) for o in zip(*results)] errors, total, seq_err = data.accuracy(inpt, ens, target, batch_size, nprint) seq_err = float(seq_err) / batch_size if total > 0: errors = float(errors) / total if print_out: data.print_out( " %s len %d ens-errors %.2f ens-sequence-errors %.2f" % (task, l, 100 * errors, 100 * seq_err)) return errors, seq_err, (steps, inpt, [np.argmax(o, axis=1) for o in res])
def run_epoch(sess, model, iter_obj, val=False, verbose=True): epoch_pred = [] epoch_label = [] epoch_loss = [] step = 0 for X, seq_len, y in iter_obj: feed = model.build_feeddict(X, seq_len, y, val=val) if val: class_pred, batch_loss = sess.run([model.pred, model.loss], feed_dict=feed) else: class_pred, batch_loss, _ = sess.run( [model.pred, model.loss, model.train_op], feed_dict=feed) epoch_pred.append(class_pred) epoch_label.append(y) epoch_loss.append(batch_loss) step += 1 if verbose and not val: sys.stdout.write('\r{} / {} : loss = {}'.format( step * model.config.batch_size, len(model.X_train), np.mean(epoch_loss))) sys.stdout.flush() predictions = np.concatenate(epoch_pred, axis=0) labels = np.concatenate(epoch_label, axis=0) acc = accuracy(labels, predictions) return epoch_loss, acc
def test(): model.eval() output = model(features, adj) loss_test = criterion(output[idx_test], labels[idx_test]) acc_test = accuracy(output[idx_test], labels[idx_test]) print("Test set results:", "loss= {:.4f}".format(loss_test.item()), "accuracy= {:.4f}".format(acc_test.item()))
def compute_test(): model.eval() losses_batch = [] acc_batch = [] for _ in range(len(test_loader)): try: (X, A, A2), label = next(iter(test_loader)) if args.cuda: X = X.cuda() A = A.cuda() A2 = A2.cuda() label = label.cuda() output = model(X=X.squeeze(), A=A.squeeze(), A2=A2.squeeze()) loss_test = criterion(output, label.view(-1)) acc_test = accuracy(output, label.view(-1)) losses_batch.append(loss_test) acc_batch.append(acc_test) except BaseException as e: print(e) avg_loss = torch.mean(torch.Tensor(losses_batch)) avg_acc = torch.mean(torch.Tensor(acc_batch)) print("Test set results:", "loss= {:.4f}".format(avg_loss.data), "accuracy= {:.4f}".format(avg_acc.data))
def compute_test(): model.eval() losses_batch = [] acc_batch = [] for _ in range(args.batch_size): try: (X, A, D), label = next(iter(test_loader)) if args.cuda: X = X.cuda() A = A.cuda() D = D.cuda() label = label.cuda() output = model(X=X.squeeze(), A=A.squeeze(), D=D.squeeze()) loss_test = F.nll_loss(output.unsqueeze(0), label.long()) acc_test = accuracy(output, label) losses_batch.append(loss_test) acc_batch.append(acc_test) except BaseException as e: print(e) print(losses_batch) print(acc_batch) avg_loss = torch.mean(torch.Tensor(losses_batch)) avg_acc = torch.mean(torch.Tensor(acc_batch)) print("Test set results:", "loss= {:.4f}".format(avg_loss.data), "accuracy= {:.4f}".format(avg_acc.data))
def evaluate(args, model, tokenizer, eval_dataset): results = {} if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: os.makedirs(args.output_dir) args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) # Note that DistributedSampler samples randomly eval_sampler = SequentialSampler( eval_dataset) if args.local_rank == -1 else DistributedSampler( eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size, collate_fn=mCollateFn) # Eval! logger.info("***** Running evaluation *****") logger.info(" Num examples = %d", len(eval_dataset)) logger.info(" Batch size = %d", args.eval_batch_size) CE = torch.nn.CrossEntropyLoss(reduction='none', ignore_index=tokenizer.mask_token_id) preds = [] out_label_ids = [] for batch in tqdm(eval_dataloader, desc="Evaluating"): model.eval() with torch.no_grad(): b_size, num_cand, seq_len = batch[0].shape input_ids = batch[0].view(-1, seq_len).cuda() attention_mask = batch[1].view(-1, seq_len).cuda() input_labels = batch[2].view(-1, seq_len).cuda() shift_labels = input_labels[..., 1:].contiguous().view(-1) inputs = {'input_ids': input_ids, 'attention_mask': attention_mask} outputs = model(**inputs) shift_logits = outputs[0][..., :-1, :].contiguous().view( -1, outputs[0].size(-1)) ce_loss = CE(shift_logits, shift_labels) ce_loss = ce_loss.view(outputs[0].size(0), -1).sum(1) valid_tokens = (input_ids != tokenizer.mask_token_id).long().sum(1) ce_loss /= valid_tokens ce_loss = -ce_loss.view(b_size, num_cand) preds.append(ce_loss) out_label_ids.append(batch[3].numpy()) preds = torch.cat(preds, dim=0).cpu().numpy() save_logits(preds.tolist(), os.path.join(args.output_dir, args.logits_file)) preds = np.argmax(preds, axis=1) result = accuracy(preds, np.concatenate(out_label_ids)) results.update(result) output_eval_file = os.path.join(args.output_dir, args.results_file) with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) return results
def single_test(l, model, sess, task, nprint, batch_size, print_out=True, offset=None): """Test model on test data of length l using the given session.""" inpt, target = data.get_batch(l, batch_size, False, task, offset) _, res, _, steps = model.step(sess, inpt, target, False) errors, total, seq_err = data.accuracy(inpt, res, target, batch_size, nprint) seq_err = float(seq_err) / batch_size if total > 0: errors = float(errors) / total if print_out: data.print_out(" %s len %d errors %.2f sequence-errors %.2f" % (task, l, 100*errors, 100*seq_err)) return errors, seq_err, (steps, inpt, [np.argmax(o, axis=1) for o in res])
def train(epoch): t = time.time() model.train() optimizer.zero_grad() output = model(features, adj) loss_train = criterion(output[idx_train], labels[idx_train]) #only labeled acc_train = accuracy(output[idx_train], labels[idx_train]) loss_train.backward() optimizer.step() model.eval() output = model(features, adj) loss_val = criterion(output[idx_val], labels[idx_val]) acc_val = accuracy(output[idx_val], labels[idx_val]) print('Epoch: {:04d}'.format(epoch + 1), 'loss_train: {:.4f}'.format(loss_train.item()), 'acc_train: {:.4f}'.format(acc_train.item()), 'loss_val: {:.4f}'.format(loss_val.item()), 'acc_val: {:.4f}'.format(acc_val.item()), 'time: {:.4f}s'.format(time.time() - t)) return loss_val
def single_test(l, model, sess, task, nprint, batch_size, print_out=True, offset=None, ensemble=None, get_steps=False): """Test model on test data of length l using the given session.""" inpt, target = data.get_batch(l, batch_size, False, task, offset) _, res, _, steps = model.step(sess, inpt, target, False, get_steps=get_steps) errors, total, seq_err = data.accuracy(inpt, res, target, batch_size, nprint) seq_err = float(seq_err) / batch_size if total > 0: errors = float(errors) / total if print_out: data.print_out(" %s len %d errors %.2f sequence-errors %.2f" % (task, l, 100*errors, 100*seq_err)) # Ensemble eval. if ensemble: results = [] for m in ensemble: model.saver.restore(sess, m) _, result, _, _ = model.step(sess, inpt, target, False) m_errors, m_total, m_seq_err = data.accuracy(inpt, result, target, batch_size, nprint) m_seq_err = float(m_seq_err) / batch_size if total > 0: m_errors = float(m_errors) / m_total data.print_out(" %s len %d m-errors %.2f m-sequence-errors %.2f" % (task, l, 100*m_errors, 100*m_seq_err)) results.append(result) ens = [sum(o) for o in zip(*results)] errors, total, seq_err = data.accuracy(inpt, ens, target, batch_size, nprint) seq_err = float(seq_err) / batch_size if total > 0: errors = float(errors) / total if print_out: data.print_out(" %s len %d ens-errors %.2f ens-sequence-errors %.2f" % (task, l, 100*errors, 100*seq_err)) return errors, seq_err, (steps, inpt, [np.argmax(o, axis=1) for o in res])
def train(epoch): t = time.time() model.train() optimizer.zero_grad() losses_batch = [] acc_batch = [] for _ in range(args.batch_size): # not really "the batch" try: (X, A, A2), label = next(iter(train_loader)) if args.cuda: X = X.cuda() A = A.cuda() A2 = A2.cuda() label = label.cuda() output = model(X=X.squeeze(), A=A.squeeze(), A2=A2.squeeze()) loss_train = criterion(output, label.view(-1)) acc_train = accuracy(output, label.view(-1)) losses_batch.append(loss_train) acc_batch.append(acc_train) loss_train.backward() optimizer.step() except BaseException as e: print(e) pass if len(losses_batch) > 0: # tmp solution to deal with the corrupt data avg_loss = torch.mean(torch.Tensor(losses_batch)) avg_acc = torch.mean(torch.Tensor(acc_batch)) writer.add_scalar('Training Loss', avg_loss.data.item(), epoch) writer.add_scalar('Training Accuracy', avg_acc.data.item(), epoch) print('Epoch: {:04d}'.format(epoch + 1), 'loss_train: {:.4f}'.format(avg_loss.data.item()), 'acc_train: {:.4f}'.format(avg_acc.data.item()), 'time: {:.4f}s'.format(time.time() - t)) return avg_loss.data.item() else: return
def train(epoch): t = time.time() model.train() optimizer.zero_grad() losses_batch = [] acc_batch = [] for _ in range(args.batch_size): # not really "the batch" try: (X, A, D), label = next(iter(train_loader)) if args.cuda: X = X.cuda() A = A.cuda() D = D.cuda() label = label.cuda() output = model(X=X.squeeze(), A=A.squeeze(), D=D.squeeze()) loss_train = F.nll_loss(output.unsqueeze(0), label.long()) acc_train = accuracy(output, label) losses_batch.append(loss_train) acc_batch.append(acc_train) loss_train.backward() optimizer.step() except BaseException as e: print(e) pass avg_loss = torch.mean(torch.Tensor(losses_batch)) avg_acc = torch.mean(torch.Tensor(acc_batch)) print('Epoch: {:04d}'.format(epoch + 1), 'loss_train: {:.4f}'.format(avg_loss.data.item()), 'acc_train: {:.4f}'.format(avg_acc.data.item()), 'time: {:.4f}s'.format(time.time() - t)) return avg_loss.data.item()
def evaluate(epoch): model.eval() losses_batch = [] acc_batch = [] for _ in range(args.batch_size): try: (X, A, A2), label = next(iter(val_loader)) if args.cuda: X = X.cuda() A = A.cuda() A2 = A2.cuda() label = label.cuda() output = model(X=X.squeeze(), A=A.squeeze(), A2=A2.squeeze()) loss_val = criterion(output, label.view(-1)) acc_val = accuracy(output, label.view(-1)) losses_batch.append(loss_val) acc_batch.append(acc_val) except BaseException as e: print(e) if len(losses_batch) > 0: avg_loss = torch.mean(torch.Tensor(losses_batch)) avg_acc = torch.mean(torch.Tensor(acc_batch)) writer.add_scalar('Validation Loss', avg_loss.data.item(), epoch) writer.add_scalar('Validation Accuracy', avg_acc.data.item(), epoch) print("Validation set results:", "loss= {:.4f}".format(avg_loss.data), "accuracy= {:.4f}".format(avg_acc.data)) return avg_loss.data.item() else: return
outputs, state = tf.nn.dynamic_rnn(cell = multi_rnn_cell, inputs=embeddings_tensor, dtype=tf.float32) # Logit layer logits = tf.layers.dense(outputs[:, -1, :], 2, name="logits") # label placeholder label_placeholder = tf.placeholder(tf.uint8, shape=[None, 2]) # loss function loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=label_placeholder, logits=logits)) # backpropagation algorithm train = tf.train.AdamOptimizer().minimize(loss) accuracy = data_utils.accuracy(logits, label_placeholder) # summaries tf.summary.scalar('accuracy', accuracy) tf.summary.scalar('loss', loss) tf.summary.histogram("logits", logits) tf.summary.histogram("labels", label_placeholder) summary_tensor = tf.summary.merge_all() saver = tf.train.Saver() # Make tensorflow session with tf.Session() as sess: training_summary_writer = tf.summary.FileWriter(TENSORBOARD_LOGDIR + "/training", sess.graph) test_summary_writer = tf.summary.FileWriter(TENSORBOARD_LOGDIR + "/test" , sess.graph)
def train(): """Train the model.""" batch_size = FLAGS.batch_size tasks = FLAGS.task.split("-") with tf.Session() as sess: (model, min_length, max_length, checkpoint_dir, curriculum, _) = initialize(sess) quant_op = neural_gpu.quantize_weights_op(512, 8) max_cur_length = min(min_length + 3, max_length) prev_acc_perp = [1000000 for _ in xrange(3)] prev_seq_err = 1.0 # Main traning loop. while True: global_step, pull, max_cur_length, learning_rate = sess.run( [model.global_step, model.pull, model.cur_length, model.lr]) acc_loss, acc_total, acc_errors, acc_seq_err = 0.0, 0, 0, 0 acc_grad_norm, step_count, step_time = 0.0, 0, 0.0 for _ in xrange(FLAGS.steps_per_checkpoint): global_step += 1 task = random.choice(tasks) # Select the length for curriculum learning. l = np.random.randint(max_cur_length - min_length + 1) + min_length # Prefer longer stuff 60% of time. if np.random.randint(100) < 60: l1 = np.random.randint(max_cur_length - min_length + 1) + min_length l = max(l, l1) # Mixed curriculum learning: in 25% of cases go to any larger length. if np.random.randint(100) < 25: l1 = np.random.randint(max_length - min_length + 1) + min_length l = max(l, l1) # Run a step and time it. start_time = time.time() inp, target = data.get_batch(l, batch_size, True, task) noise_param = math.sqrt( math.pow(global_step, -0.55) * prev_seq_err) * FLAGS.grad_noise_scale loss, res, gnorm, _ = model.step(sess, inp, target, True, noise_param) step_time += time.time() - start_time acc_grad_norm += float(gnorm) # Accumulate statistics only if we did not exceed curriculum length. if l < max_cur_length + 1: step_count += 1 acc_loss += loss errors, total, seq_err = data.accuracy( inp, res, target, batch_size, 0) acc_total += total acc_errors += errors acc_seq_err += seq_err # Normalize and print out accumulated statistics. acc_loss /= step_count step_time /= FLAGS.steps_per_checkpoint acc_seq_err = float(acc_seq_err) / (step_count * batch_size) prev_seq_err = max(0.0, acc_seq_err - 0.02) # No noise at error < 2%. acc_errors = float( acc_errors) / acc_total if acc_total > 0 else 1.0 msg1 = "step %d step-time %.2f" % (global_step, step_time) msg2 = "lr %.8f pull %.3f" % (learning_rate, pull) msg3 = ("%s %s grad-norm %.8f" % (msg1, msg2, acc_grad_norm / FLAGS.steps_per_checkpoint)) data.print_out( "%s len %d ppx %.8f errors %.2f sequence-errors %.2f" % (msg3, max_cur_length, data.safe_exp(acc_loss), 100 * acc_errors, 100 * acc_seq_err)) # If errors are below the curriculum threshold, move curriculum forward. if curriculum > acc_seq_err: if FLAGS.quantize: # Quantize weights. data.print_out(" Quantizing parameters.") sess.run([quant_op]) # Increase current length (until the next with training data). do_incr = True while do_incr and max_cur_length < max_length: sess.run(model.cur_length_incr_op) for t in tasks: if data.train_set[t]: do_incr = False # Forget last perplexities if we're not yet at the end. if max_cur_length < max_length: prev_acc_perp.append(1000000) # Either increase pull or, if it's large, average parameters. if pull < 0.1: sess.run(model.pull_incr_op) else: data.print_out(" Averaging parameters.") sess.run(model.avg_op) if acc_seq_err < (curriculum / 3.0): sess.run(model.lr_decay_op) # Lower learning rate if we're worse than the last 3 checkpoints. acc_perp = data.safe_exp(acc_loss) if acc_perp > max(prev_acc_perp[-3:]): sess.run(model.lr_decay_op) prev_acc_perp.append(acc_perp) # Save checkpoint. checkpoint_path = os.path.join(checkpoint_dir, "neural_gpu.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) # Run evaluation. bound = data.bins[-1] + 1 for t in tasks: l = min_length while l < max_length + EXTRA_EVAL and l < bound: _, seq_err, _ = single_test(l, model, sess, t, FLAGS.nprint, batch_size) l += 1 while l < bound + 1 and not data.test_set[t][l]: l += 1 if seq_err < 0.05: # Run larger test if we're good enough. _, seq_err = multi_test(data.forward_max, model, sess, t, FLAGS.nprint, batch_size * 4) if seq_err < 0.01: # Super-large test on 1-task large-forward models. if data.forward_max > 4000 and len(tasks) == 1: multi_test(data.forward_max, model, sess, tasks[0], FLAGS.nprint, batch_size * 16, 0)
def train(cat_dim, noise_dim, batch_size, n_batch_per_epoch, nb_epoch, dset="mnist"): """ Train model Load the whole train data in memory for faster operations args: **kwargs (dict) keyword arguments that specify the model hyperparameters """ general_utils.setup_logging("IG") # Load and rescale data if dset == "mnist": print("loading mnist data") X_real_train, Y_real_train, X_real_test, Y_real_test = data_utils.load_mnist( ) # pick 1000 sample for testing # X_real_test = X_real_test[-1000:] # Y_real_test = Y_real_test[-1000:] img_dim = X_real_train.shape[-3:] epoch_size = n_batch_per_epoch * batch_size try: # Create optimizers opt_dcgan = Adam(lr=1E-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08) opt_discriminator = Adam(lr=2E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08) # opt_discriminator = SGD(lr=1E-4, momentum=0.9, nesterov=True) # Load generator model generator_model = models.load("generator_deconv", cat_dim, noise_dim, img_dim, batch_size, dset=dset) # Load discriminator model discriminator_model = models.load("DCGAN_discriminator", cat_dim, noise_dim, img_dim, batch_size, dset=dset) generator_model.compile(loss='mse', optimizer=opt_discriminator) # stop the discriminator to learn while in generator is learning discriminator_model.trainable = False DCGAN_model = models.DCGAN(generator_model, discriminator_model, cat_dim, noise_dim) list_losses = ['binary_crossentropy', 'categorical_crossentropy'] list_weights = [1, 1] DCGAN_model.compile(loss=list_losses, loss_weights=list_weights, optimizer=opt_dcgan) # Multiple discriminator losses # allow the discriminator to learn again discriminator_model.trainable = True discriminator_model.compile(loss=list_losses, loss_weights=list_weights, optimizer=opt_discriminator) # Start training print("Start training") for e in range(nb_epoch + 1): # Initialize progbar and batch counter # progbar = generic_utils.Progbar(epoch_size) batch_counter = 1 start = time.time() print("Epoch: {}".format(e)) for X_real_batch, Y_real_batch in zip( data_utils.gen_batch(X_real_train, batch_size), data_utils.gen_batch(Y_real_train, batch_size)): # Create a batch to feed the discriminator model X_disc_fake, y_disc_fake, noise_sample = data_utils.get_disc_batch( X_real_batch, Y_real_batch, generator_model, batch_size, cat_dim, noise_dim, type="fake") X_disc_real, y_disc_real = data_utils.get_disc_batch( X_real_batch, Y_real_batch, generator_model, batch_size, cat_dim, noise_dim, type="real") # Update the discriminator disc_loss_fake = discriminator_model.train_on_batch( X_disc_fake, [y_disc_fake, Y_real_batch]) disc_loss_real = discriminator_model.train_on_batch( X_disc_real, [y_disc_real, Y_real_batch]) disc_loss = disc_loss_fake + disc_loss_real # Create a batch to feed the generator model # X_noise, y_gen = data_utils.get_gen_batch(batch_size, cat_dim, noise_dim) # Freeze the discriminator discriminator_model.trainable = False gen_loss = DCGAN_model.train_on_batch( [Y_real_batch, noise_sample], [y_disc_real, Y_real_batch]) # Unfreeze the discriminator discriminator_model.trainable = True # training validation p_real_batch, p_Y_batch = discriminator_model.predict( X_real_batch, batch_size=batch_size) acc_train = data_utils.accuracy(p_Y_batch, Y_real_batch) batch_counter += 1 # progbar.add(batch_size, values=[("D tot", disc_loss[0]), # ("D cat", disc_loss[2]), # ("G tot", gen_loss[0]), # ("G cat", gen_loss[2]), # ("P Real:", p_real_batch), # ("Q acc", acc_train)]) # Save images for visualization if batch_counter % (n_batch_per_epoch / 2) == 0 and e % 10 == 0: data_utils.plot_generated_batch(X_real_batch, generator_model, batch_size, cat_dim, noise_dim, e) if batch_counter >= n_batch_per_epoch: break print("") print('Epoch %s/%s, Time: %s' % (e + 1, nb_epoch, time.time() - start)) _, p_Y_test = discriminator_model.predict( X_real_test, batch_size=X_real_test.shape[0]) acc_test = data_utils.accuracy(p_Y_test, Y_real_test) print("Epoch: {} Accuracy: {}".format(e + 1, acc_test)) if e % 1000 == 0: gen_weights_path = os.path.join( '../../models/IG/gen_weights.h5') generator_model.save_weights(gen_weights_path, overwrite=True) disc_weights_path = os.path.join( '../../models/IG/disc_weights.h5') discriminator_model.save_weights(disc_weights_path, overwrite=True) DCGAN_weights_path = os.path.join( '../../models/IG/DCGAN_weights.h5') DCGAN_model.save_weights(DCGAN_weights_path, overwrite=True) except KeyboardInterrupt: pass
def evaluate(args, model, tokenizer, eval_dataset): results = {} if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: os.makedirs(args.output_dir) args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) # Note that DistributedSampler samples randomly eval_sampler = SequentialSampler( eval_dataset) if args.local_rank == -1 else DistributedSampler( eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size, collate_fn=mCollateFn) # Eval! logger.info("***** Running evaluation *****") logger.info(" Num examples = %d", len(eval_dataset)) logger.info(" Batch size = %d", args.eval_batch_size) CE = torch.nn.CrossEntropyLoss(reduction='none') preds = [] out_label_ids = [] for batch in tqdm(eval_dataloader, desc="Evaluating"): model.eval() with torch.no_grad(): num_cand = len(batch[0][0]) choice_loss = [] choice_seq_lens = np.array( [0] + [len(c) for sample in batch[0] for c in sample]) choice_seq_lens = np.cumsum(choice_seq_lens) input_ids = torch.cat([c for sample in batch[0] for c in sample], dim=0).to(args.device) att_mask = torch.cat([c for sample in batch[1] for c in sample], dim=0).to(args.device) input_labels = torch.cat( [c for sample in batch[2] for c in sample], dim=0).to(args.device) if len(input_ids) < args.max_sequence_per_time: inputs = {'input_ids': input_ids, 'attention_mask': att_mask} outputs = model(**inputs) ce_loss = CE(outputs[0].view(-1, outputs[0].size(-1)), input_labels.view(-1)) ce_loss = ce_loss.view(outputs[0].size(0), -1).sum(1) else: ce_loss = [] for chunk in range(0, len(input_ids), args.max_sequence_per_time): inputs = { 'input_ids': input_ids[chunk:chunk + args.max_sequence_per_time], 'attention_mask': att_mask[chunk:chunk + args.max_sequence_per_time] } outputs = model(**inputs) tmp_ce_loss = CE( outputs[0].view(-1, outputs[0].size(-1)), input_labels[chunk:chunk + args.max_sequence_per_time].view(-1)) tmp_ce_loss = tmp_ce_loss.view(outputs[0].size(0), -1).sum(1) ce_loss.append(tmp_ce_loss) ce_loss = torch.cat(ce_loss, dim=0) for c_i in range(len(choice_seq_lens) - 1): start = choice_seq_lens[c_i] end = choice_seq_lens[c_i + 1] choice_loss.append(-ce_loss[start:end].sum() / (end - start)) choice_loss = torch.stack(choice_loss) choice_loss = choice_loss.view(-1, num_cand) preds.append(choice_loss) out_label_ids.append(batch[3].numpy()) preds = torch.cat(preds, dim=0).cpu().numpy() save_logits(preds.tolist(), os.path.join(args.output_dir, args.logits_file)) preds = np.argmax(preds, axis=1) result = accuracy(preds, np.concatenate(out_label_ids, axis=0)) results.update(result) output_eval_file = os.path.join(args.output_dir, args.results_file) with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) return results
while len(data_gen.test_set[cnf.task][tmp_length]) == 0 and tmp_length > 1: tmp_length -= 1 data_gen.init_data(cnf.task, tmp_length, cnf.test_data_size, cnf.n_input) data_gen.reset_counters() batchSize = 1 if test_length < 2000: batchSize = 16 if test_length < 800: batchSize = 128 with tf.Graph().as_default(): tester = RSE(cnf.n_hidden, [test_length], cnf.n_input, [batchSize], cnf.n_output, cnf.dropout_keep_prob) tester.create_test_graph(test_length) saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables()) with tf.compat.v1.Session(config=cnf.tf_config) as sess: sess.run(tf.compat.v1.global_variables_initializer()) saver.restore(sess, cnf.model_file) errors, seq_errors, total = 0, 0, 0 for iter in range(cnf.test_data_size // batchSize): batch_xs, batch_ys = data_supplier.supply_test_data(test_length, batchSize) acc1, test_result, _ = tester.get_accuracy(sess, batch_xs, batch_ys) er, tot, seq_er = data_gen.accuracy(batch_xs[0], test_result, batch_ys[0], batchSize, 0) errors += er seq_errors += seq_er total += tot acc_real = 1.0 - float(errors) / total print("Testing length:", test_length, "accuracy=", acc_real, "errors =", errors, "incorrect sequences=", seq_errors) test_length = test_length * 2
n_label=48 x_patch = np.zeros((batch,n_ch,n_x,n_y,n_z)) y_patch = np.zeros((batch,n_label)) x2_patch = np.zeros((batch,n_label)) x_patch[:,0,2:-1,2:-1,2:-1] =np.array(imgs) y_patch[:,:] = np.array(labels) x2_patch[:,:] = np.array(p_encoding) vis_enlarge_ratio =5 ypred = model.predict(x_patch,batch_size=10) ypred[ypred>0.5]=1 ypred[ypred<0.5]=0 scores=[] for i in range(batch): score1= jaccard_similarity_score(labels[i],ypred[i,:]) score2= utils.accuracy(labels[i],ypred[i,:]) scores.append((score1,score2)) print(scores) #for x,y in data_generator_undirected(train_dir,traindatalist): # pass train_generator = data_generator_undirected(train_dir,traindatalist, n_label= 48,batch_size= 10,num_nodes_per_img =50) val_generator = data_generator_undirected(val_dir,valdatalist, n_label= 48,batch_size= 10,num_nodes_per_img =50)
def train(): """Train the model.""" batch_size = FLAGS.batch_size * FLAGS.num_gpus (model, beam_model, min_length, max_length, checkpoint_dir, (train_set, dev_set, en_vocab_path, fr_vocab_path), sv, sess) = initialize() with sess.as_default(): quant_op = model.quantize_op max_cur_length = min(min_length + 3, max_length) prev_acc_perp = [1000000 for _ in xrange(5)] prev_seq_err = 1.0 is_chief = FLAGS.task < 1 do_report = False # Main traning loop. while not sv.ShouldStop(): global_step, max_cur_length, learning_rate = sess.run( [model.global_step, model.cur_length, model.lr]) acc_loss, acc_l1, acc_total, acc_errors, acc_seq_err = 0.0, 0.0, 0, 0, 0 acc_grad_norm, step_count, step_c1, step_time = 0.0, 0, 0, 0.0 # For words in the word vector file, set their embedding at start. bound1 = FLAGS.steps_per_checkpoint - 1 if FLAGS.word_vector_file_en and global_step < bound1 and is_chief: assign_vectors(FLAGS.word_vector_file_en, "embedding:0", en_vocab_path, sess) if FLAGS.max_target_vocab < 1: assign_vectors(FLAGS.word_vector_file_en, "target_embedding:0", en_vocab_path, sess) if FLAGS.word_vector_file_fr and global_step < bound1 and is_chief: assign_vectors(FLAGS.word_vector_file_fr, "embedding:0", fr_vocab_path, sess) if FLAGS.max_target_vocab < 1: assign_vectors(FLAGS.word_vector_file_fr, "target_embedding:0", fr_vocab_path, sess) for _ in xrange(FLAGS.steps_per_checkpoint): step_count += 1 step_c1 += 1 global_step = int(model.global_step.eval()) train_beam_anneal = global_step / float(FLAGS.train_beam_anneal) train_beam_freq = FLAGS.train_beam_freq * min(1.0, train_beam_anneal) p = random.choice(FLAGS.problem.split("-")) train_set = global_train_set[p][-1] bucket_id = get_bucket_id(train_buckets_scale[p][-1], max_cur_length, train_set) # Prefer longer stuff 60% of time if not wmt. if np.random.randint(100) < 60 and FLAGS.problem != "wmt": bucket1 = get_bucket_id(train_buckets_scale[p][-1], max_cur_length, train_set) bucket_id = max(bucket1, bucket_id) # Run a step and time it. start_time = time.time() inp, target = data.get_batch(bucket_id, batch_size, train_set, FLAGS.height) noise_param = math.sqrt(math.pow(global_step + 1, -0.55) * prev_seq_err) * FLAGS.grad_noise_scale # In multi-step mode, we use best from beam for middle steps. state, new_target, scores, history = None, None, None, [] while (FLAGS.beam_size > 1 and train_beam_freq > np.random.random_sample()): # Get the best beam (no training, just forward model). new_target, new_first, new_inp, scores = get_best_beam( beam_model, sess, inp, target, batch_size, FLAGS.beam_size, bucket_id, history, p) history.append(new_first) # Training step with the previous input and the best beam as target. _, _, _, state = model.step(sess, inp, new_target, FLAGS.do_train, noise_param, update_mem=True, state=state) # Change input to the new one for the next step. inp = new_inp # If all results are great, stop (todo: not to wait for all?). if FLAGS.nprint > 1: print(scores) if sum(scores) / float(len(scores)) >= 10.0: break # The final step with the true target. loss, res, gnorm, _ = model.step( sess, inp, target, FLAGS.do_train, noise_param, update_mem=True, state=state) step_time += time.time() - start_time acc_grad_norm += 0.0 if gnorm is None else float(gnorm) # Accumulate statistics. acc_loss += loss acc_l1 += loss errors, total, seq_err = data.accuracy( inp, res, target, batch_size, 0, new_target, scores) if FLAGS.nprint > 1: print("seq_err: ", seq_err) acc_total += total acc_errors += errors acc_seq_err += seq_err # Report summary every 10 steps. if step_count + 3 > FLAGS.steps_per_checkpoint: do_report = True # Don't polute plot too early. if is_chief and step_count % 10 == 1 and do_report: cur_loss = acc_l1 / float(step_c1) acc_l1, step_c1 = 0.0, 0 cur_perp = data.safe_exp(cur_loss) summary = tf.Summary() summary.value.extend( [tf.Summary.Value(tag="log_perplexity", simple_value=cur_loss), tf.Summary.Value(tag="perplexity", simple_value=cur_perp)]) sv.SummaryComputed(sess, summary, global_step) # Normalize and print out accumulated statistics. acc_loss /= step_count step_time /= FLAGS.steps_per_checkpoint acc_seq_err = float(acc_seq_err) / (step_count * batch_size) prev_seq_err = max(0.0, acc_seq_err - 0.02) # No noise at error < 2%. acc_errors = float(acc_errors) / acc_total if acc_total > 0 else 1.0 t_size = float(sum([len(x) for x in train_set])) / float(1000000) msg = ("step %d step-time %.2f train-size %.3f lr %.6f grad-norm %.4f" % (global_step + 1, step_time, t_size, learning_rate, acc_grad_norm / FLAGS.steps_per_checkpoint)) data.print_out("%s len %d ppl %.6f errors %.2f sequence-errors %.2f" % (msg, max_cur_length, data.safe_exp(acc_loss), 100*acc_errors, 100*acc_seq_err)) # If errors are below the curriculum threshold, move curriculum forward. is_good = FLAGS.curriculum_ppx > data.safe_exp(acc_loss) is_good = is_good and FLAGS.curriculum_seq > acc_seq_err if is_good and is_chief: if FLAGS.quantize: # Quantize weights. data.print_out(" Quantizing parameters.") sess.run([quant_op]) # Increase current length (until the next with training data). sess.run(model.cur_length_incr_op) # Forget last perplexities if we're not yet at the end. if max_cur_length < max_length: prev_acc_perp.append(1000000) # Lower learning rate if we're worse than the last 5 checkpoints. acc_perp = data.safe_exp(acc_loss) if acc_perp > max(prev_acc_perp[-5:]) and is_chief: sess.run(model.lr_decay_op) prev_acc_perp.append(acc_perp) # Save checkpoint. if is_chief: checkpoint_path = os.path.join(checkpoint_dir, "neural_gpu.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) # Run evaluation. bin_bound = 4 for p in FLAGS.problem.split("-"): total_loss, total_err, tl_counter = 0.0, 0.0, 0 for bin_id in xrange(len(data.bins)): if bin_id < bin_bound or bin_id % FLAGS.eval_bin_print == 1: err, _, loss = single_test(bin_id, model, sess, FLAGS.nprint, batch_size * 4, dev_set, p, beam_model=beam_model) if loss > 0.0: total_loss += loss total_err += err tl_counter += 1 test_loss = total_loss / max(1, tl_counter) test_err = total_err / max(1, tl_counter) test_perp = data.safe_exp(test_loss) summary = tf.Summary() summary.value.extend( [tf.Summary.Value(tag="test/%s/loss" % p, simple_value=test_loss), tf.Summary.Value(tag="test/%s/error" % p, simple_value=test_err), tf.Summary.Value(tag="test/%s/perplexity" % p, simple_value=test_perp)]) sv.SummaryComputed(sess, summary, global_step)
def train(): """Train the model.""" batch_size = FLAGS.batch_size tasks = FLAGS.task.split("-") with tf.Session() as sess: (model, min_length, max_length, checkpoint_dir, curriculum, _) = initialize(sess) quant_op = neural_gpu.quantize_weights_op(512, 8) max_cur_length = min(min_length + 3, max_length) prev_acc_perp = [1000000 for _ in xrange(3)] prev_seq_err = 1.0 # Main traning loop. while True: global_step, pull, max_cur_length, learning_rate = sess.run( [model.global_step, model.pull, model.cur_length, model.lr]) acc_loss, acc_total, acc_errors, acc_seq_err = 0.0, 0, 0, 0 acc_grad_norm, step_count, step_time = 0.0, 0, 0.0 for _ in xrange(FLAGS.steps_per_checkpoint): global_step += 1 task = random.choice(tasks) # Select the length for curriculum learning. l = np.random.randint(max_cur_length - min_length + 1) + min_length # Prefer longer stuff 60% of time. if np.random.randint(100) < 60: l1 = np.random.randint(max_cur_length - min_length+1) + min_length l = max(l, l1) # Mixed curriculum learning: in 25% of cases go to any larger length. if np.random.randint(100) < 25: l1 = np.random.randint(max_length - min_length + 1) + min_length l = max(l, l1) # Run a step and time it. start_time = time.time() inp, target = data.get_batch(l, batch_size, True, task) noise_param = math.sqrt(math.pow(global_step, -0.55) * prev_seq_err) * FLAGS.grad_noise_scale loss, res, gnorm, _ = model.step(sess, inp, target, True, noise_param) step_time += time.time() - start_time acc_grad_norm += float(gnorm) # Accumulate statistics only if we did not exceed curriculum length. if l < max_cur_length + 1: step_count += 1 acc_loss += loss errors, total, seq_err = data.accuracy(inp, res, target, batch_size, 0) acc_total += total acc_errors += errors acc_seq_err += seq_err # Normalize and print out accumulated statistics. acc_loss /= step_count step_time /= FLAGS.steps_per_checkpoint acc_seq_err = float(acc_seq_err) / (step_count * batch_size) prev_seq_err = max(0.0, acc_seq_err - 0.02) # No noise at error < 2%. acc_errors = float(acc_errors) / acc_total if acc_total > 0 else 1.0 msg1 = "step %d step-time %.2f" % (global_step, step_time) msg2 = "lr %.8f pull %.3f" % (learning_rate, pull) msg3 = ("%s %s grad-norm %.8f" % (msg1, msg2, acc_grad_norm / FLAGS.steps_per_checkpoint)) data.print_out("%s len %d ppx %.8f errors %.2f sequence-errors %.2f" % (msg3, max_cur_length, data.safe_exp(acc_loss), 100*acc_errors, 100*acc_seq_err)) # If errors are below the curriculum threshold, move curriculum forward. if curriculum > acc_seq_err: if FLAGS.quantize: # Quantize weights. data.print_out(" Quantizing parameters.") sess.run([quant_op]) # Increase current length (until the next with training data). do_incr = True while do_incr and max_cur_length < max_length: sess.run(model.cur_length_incr_op) for t in tasks: if data.train_set[t]: do_incr = False # Forget last perplexities if we're not yet at the end. if max_cur_length < max_length: prev_acc_perp.append(1000000) # Either increase pull or, if it's large, average parameters. if pull < 0.1: sess.run(model.pull_incr_op) else: data.print_out(" Averaging parameters.") sess.run(model.avg_op) if acc_seq_err < (curriculum / 3.0): sess.run(model.lr_decay_op) # Lower learning rate if we're worse than the last 3 checkpoints. acc_perp = data.safe_exp(acc_loss) if acc_perp > max(prev_acc_perp[-3:]): sess.run(model.lr_decay_op) prev_acc_perp.append(acc_perp) # Save checkpoint. checkpoint_path = os.path.join(checkpoint_dir, "neural_gpu.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) # Run evaluation. bound = data.bins[-1] + 1 for t in tasks: l = min_length while l < max_length + EXTRA_EVAL and l < bound: _, seq_err, _ = single_test(l, model, sess, t, FLAGS.nprint, batch_size) l += 1 while l < bound + 1 and not data.test_set[t][l]: l += 1 if seq_err < 0.05: # Run larger test if we're good enough. _, seq_err = multi_test(data.forward_max, model, sess, t, FLAGS.nprint, batch_size * 4) if seq_err < 0.01: # Super-large test on 1-task large-forward models. if data.forward_max > 4000 and len(tasks) == 1: multi_test(data.forward_max, model, sess, tasks[0], FLAGS.nprint, batch_size * 16, 0)