def evaluate_on_dataset(self, split, sess, args): logging.info('Computing loss and metrics on ' + split + ' data') initime = time.time() sess.run(self.reader.get_init_op(split)) nbatches = self.reader.get_nbatches_per_epoch(split) step = 0 all_predictions = [] all_labels = [] all_names = [] loss_acum = 0 while True: try: predictions, labels, loss, names = sess.run( [self.predictions, self.labels, self.loss, self.filenames], {self.is_training: False}) all_predictions.extend(predictions) all_labels.extend(labels) all_names.extend(names) loss_acum += loss step += 1 except tf.errors.OutOfRangeError: break if step % args.nsteps_display == 0: print('Step %i / %i, loss: %.2e' % (step, nbatches, loss)) metrics = accuracy.compute_accuracy(all_predictions, all_labels) loss_per_image = loss_acum / nbatches fintime = time.time() logging.debug('Done in %.2f s' % (fintime - initime)) return loss_per_image, metrics, all_predictions, all_labels, all_names
def evaluate(self, args, split): logging.info("Start evaluation") with tf.Session(config=tools.get_config_proto(args.gpu_memory_fraction)) as sess: assert type(self.saver) == tf.train.Saver, 'Saver is not correctly initialized' # Initialization: self.initialize(sess, args) # Process all data: logging.info('Computing metrics on ' + split + ' data') initime = time.time() sess.run(self.reader.get_init_op(split)) nbatches = self.reader.get_nbatches_per_epoch(split) step = 0 all_predictions = [] all_labels = [] all_names = [] while True: try: predictions, labels, names, images = sess.run([self.predictions, self.labels, self.filenames, self.inputs], {self.is_training: False}) all_predictions.extend(predictions) all_labels.extend(labels) all_names.extend(names) if args.save_input_images: tools.save_input_images(names, images, args, 1, step, self.reader.img_extension) step += 1 except tf.errors.OutOfRangeError: break if step % args.nsteps_display == 0: print('Step %i / %i' % (step, nbatches)) metrics = accuracy.compute_accuracy(all_predictions, all_labels) fintime = time.time() logging.debug('Done in %.2f s' % (fintime - initime)) logging.info(split + ' accuracy: %.2f' % metrics) # Write results: tools.write_results(all_predictions, all_labels, all_names, self.classnames, args) return metrics
momentum=0.9, weight_decay=1e-4) nepochs = 100 for epoch in range(nepochs): epoch_start = time.time() print('') print('Epoch ' + str(epoch)) for i, data in enumerate(train_dataloader, 0): inputs, labels = data inputs = inputs.to(device) labels = labels.to(device) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = model(inputs) loss = loss_fun(outputs, labels) loss.backward() optimizer.step() if i % 100 == 0: acc = compute_accuracy(outputs, labels) print('step %i, loss: %.2e, accuracy: %.2f' % (i, loss, acc)) epoch_time = time.time() - epoch_start print('Epoch computed in %i s' % int(round(time.time() - epoch_start))) print('Finished Training')
def train(self, args): print('') logging.info("Start training") nbatches_train = self.reader.get_nbatches_per_epoch('train') lr_scheduler = LRScheduler(args.lr_scheduler_opts, args.outdir) with tf.Session(config=tools.get_config_proto(args.gpu_memory_fraction)) as sess: assert type(self.saver) == tf.train.Saver, 'Saver is not correctly initialized' # Initialization: self.initialize(sess, args) # Lists for the training history: train_metrics = [] train_losses = [] val_metrics = [] val_losses = [] best_val_metric = 0 checkpoints = [] # This is a list of Checkpoint objects. # Tensorboard: merged, summary_writer, tensorboard_url = prepare_tensorboard(sess, args.outdir) # Loop on epochs: current_lr = args.learning_rate global_step = 0 for epoch in range(1, args.num_epochs + 1): print('') logging.info('Starting epoch %d / %d' % (epoch, args.num_epochs)) sess.run(self.reader.get_init_op('train')) current_lr = lr_scheduler.GetLearningRateAtEpoch(epoch, current_lr) _ = sess.run([self.update_lr_op], feed_dict={self.lr_to_update: current_lr}) learning_rate = sess.run([self.learning_rate])[0] logging.info('Learning rate: ' + str(learning_rate)) step = 0 all_predictions = [] all_labels = [] loss_acum = 0 iniepoch = time.time() while True: try: if self.nbatches_accum > 0: if step % self.nbatches_accum == 0: sess.run(self.zero_ops) _, loss, predictions, labels, summaryOut = sess.run([self.accum_ops, self.loss, self.predictions, self.labels, merged], {self.is_training: True}) if (step + 1) % self.nbatches_accum == 0: _ = sess.run([self.train_step]) else: ini = time.time() _, loss, predictions, labels, summaryOut = sess.run([self.train_op, self.loss, self.predictions, self.labels, merged], {self.is_training: True}) fin = time.time() print('Step ' + str(step) + ' done in ' + str(fin - ini) + ' s.') if math.isnan(loss): raise Exception("Loss is Not A Number") if epoch % args.nepochs_checktrain == 0 and not args.recompute_train: all_predictions.extend(predictions) all_labels.extend(labels) loss_acum += loss step += 1 global_step += 1 # Tensorboard: summary_writer.add_summary(summaryOut, global_step) if global_step == 1: webbrowser.open_new_tab(tensorboard_url) except tf.errors.OutOfRangeError: break if step % args.nsteps_display == 0: logging.info('Step %i / %i, loss: %.2e' % (step, nbatches_train, loss)) finepoch = time.time() logging.debug('Epoch computed in %.2f s' % (finepoch - iniepoch)) # Compute loss and metrics on training data: if epoch % args.nepochs_checktrain == 0: if args.recompute_train: train_loss, metrics, _, _, _ = self.evaluate_on_dataset('train', sess, args) train_losses.append(train_loss) train_metrics.append(metrics) logging.info('Train loss: %.2e' % train_loss) logging.info('Train accuracy: %.2f' % metrics) else: train_loss = loss_acum / nbatches_train train_losses.append(train_loss) logging.info('Mean train loss during epoch: %.2e' % train_loss) metrics = accuracy.compute_accuracy(all_predictions, all_labels) train_metrics.append(metrics) logging.info('Mean train accuracy during epoch: %.2f' % metrics) else: train_loss = None # Compute loss and metrics on validation data: if epoch % args.nepochs_checkval == 0: val_loss, metrics, _, _, _ = self.evaluate_on_dataset('val', sess, args) val_losses.append(val_loss) val_metrics.append(metrics) logging.info('Val loss: %.2e' % val_loss) logging.info('Val accuracy: %.2f' % metrics) else: val_loss = None # Plot training progress: if epoch % args.nepochs_checktrain == 0 or epoch % args.nepochs_checkval == 0: tools.plot_training_history(train_metrics, train_losses, val_metrics, val_losses, args, epoch) # Save the model: if epoch % args.nepochs_save == 0: # save_path = self.saver.save(sess, os.path.join(args.outdir, 'model'), global_step=epoch) # logging.info('Model saved to ' + save_path) self.save_checkpoint_classif(sess, val_loss, val_metrics[0], epoch, checkpoints, args.outdir) # Save the model (if we haven't done it yet): if args.num_epochs % args.nepochs_save != 0: # save_path = self.saver.save(sess, os.path.join(args.outdir, 'model'), global_step=args.num_epochs) # logging.info('Model saved to ' + save_path) self.save_checkpoint_classif(sess, val_loss, val_metrics[0], epoch, checkpoints, args.outdir) best_val_metric = np.max(np.array(val_metrics, dtype=np.float32)) print('Best validation metric: ' + str(best_val_metric)) return best_val_metric
def scoreDay(self, X_day, Y_day, returnLineup=False): y_predicted = self.predict(X_day) return compute_accuracy(y_predicted, Y_day, returnLineup)