def train_one_batch(self, sess, x, y_, accuracy, train_step, train_feed_dict): tf.summary.scalar('accuracy', accuracy) merged = tf.summary.merge_all() sess.run(tf.global_variables_initializer()) dataset = Dataset(input_file_path=self.data_path, max_sample_records=self.max_sample_records) # Not sure what these two lines do run_opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_opts_metadata = tf.RunMetadata() train_batches = dataset.get_batches(train=True) batch = next(train_batches) images, labels = process_data(batch) train_feed_dict[x] = images train_feed_dict[y_] = labels for epoch in range(self.n_epochs): train_step.run(feed_dict=train_feed_dict) train_summary, train_accuracy = sess.run( [merged, accuracy], feed_dict=train_feed_dict, options=run_opts, run_metadata=run_opts_metadata) test_summary, test_accuracy = sess.run( [merged, accuracy], feed_dict=train_feed_dict, options=run_opts, run_metadata=run_opts_metadata) message = "epoch: {0}, training accuracy: {1}, validation accuracy: {2}" print(message.format(epoch, train_accuracy, test_accuracy))
def train(self, sess, x, y_, accuracy, train_step, train_feed_dict, test_feed_dict): # To view graph: tensorboard --logdir=/Users/ryanzotti/Documents/repos/Self_Driving_RC_Car/tf_visual_data/runs tf.summary.scalar('accuracy', accuracy) merged = tf.summary.merge_all() # Archive the model script in case of good results that need to be replicated # If model is being restored, then assume model file has already been saved somewhere # and that self.model_file is None if self.model_file is not None: cmd = 'cp {model_file} {archive_path}' shell_command( cmd.format(model_file=self.model_file, archive_path=self.model_dir + '/')) if not self.restored_model: # Don't want to erase restored model weights sess.run(tf.global_variables_initializer()) dataset = Dataset(input_file_path=self.data_path, max_sample_records=self.max_sample_records) # TODO: Document and understand what RunOptions does run_opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_opts_metadata = tf.RunMetadata() train_images, train_labels = process_data( dataset.get_sample(train=True)) train_feed_dict[x] = train_images train_feed_dict[y_] = train_labels train_summary, train_accuracy = sess.run( [merged, accuracy], feed_dict=train_feed_dict, options=run_opts, run_metadata=run_opts_metadata) test_images, test_labels = process_data( dataset.get_sample(train=False)) test_feed_dict[x] = test_images test_feed_dict[y_] = test_labels test_summary, test_accuracy = sess.run([merged, accuracy], feed_dict=test_feed_dict, options=run_opts, run_metadata=run_opts_metadata) # Always worth printing accuracy, even for a restored model, since it provides an early sanity check message = "epoch: {0}, training accuracy: {1}, validation accuracy: {2}" print(message.format(self.start_epoch, train_accuracy, test_accuracy)) if self.tf_timeline: # Used for debugging slow Tensorflow code create_tf_timeline(self.model_dir, run_opts_metadata) # Don't double-count. A restored model already has its last checkpoint and results.txt entry available if not self.restored_model: with open(self.results_file, 'a') as f: f.write( message.format(self.start_epoch, train_accuracy, test_accuracy) + '\n') self.save_model(sess, epoch=self.start_epoch) sync_to_aws(s3_path=self.s3_data_dir, local_path=self.data_path) # Save to AWS for epoch in range(self.start_epoch + 1, self.start_epoch + self.n_epochs): prev_time = datetime.now() train_batches = dataset.get_batches(train=True) for batch_id, batch in enumerate(train_batches): images, labels = process_data(batch) train_feed_dict[x] = images train_feed_dict[y_] = labels sess.run(train_step, feed_dict=train_feed_dict) # Track speed to better compare GPUs and CPUs now = datetime.now() diff_seconds = (now - prev_time).total_seconds() if self.show_speed: speed_results = 'batch {batch_id} of {total_batches}, {seconds} seconds' speed_results = speed_results.format( batch_id=batch_id, seconds=diff_seconds, total_batches=dataset.batches_per_epoch) with open(self.speed_file, 'a') as f: f.write(speed_results + '\n') print(speed_results) prev_time = datetime.now() # TODO: Document and understand what RunOptions does run_opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_opts_metadata = tf.RunMetadata() train_images, train_labels = process_data( dataset.get_sample(train=True)) train_feed_dict[x] = train_images train_feed_dict[y_] = train_labels train_summary, train_accuracy = sess.run( [merged, accuracy], feed_dict=train_feed_dict, options=run_opts, run_metadata=run_opts_metadata) test_images, test_labels = process_data( dataset.get_sample(train=False)) test_feed_dict[x] = test_images test_feed_dict[y_] = test_labels test_summary, test_accuracy = sess.run( [merged, accuracy], feed_dict=test_feed_dict, options=run_opts, run_metadata=run_opts_metadata) print(message.format(epoch, train_accuracy, test_accuracy)) with open(self.results_file, 'a') as f: f.write( message.format(epoch, train_accuracy, test_accuracy) + '\n') # Save a model checkpoint after every epoch self.save_model(sess, epoch=epoch) sync_to_aws(s3_path=self.s3_data_dir, local_path=self.data_path) # Save to AWS # Marks unambiguous successful completion to prevent deletion by cleanup script shell_command('touch ' + self.model_dir + '/SUCCESS')
def train(self, sess, x, y_, accuracy, train_step, train_feed_dict, test_feed_dict): # To view graph: tensorboard --logdir=/Users/ryanzotti/Documents/repos/Self_Driving_RC_Car/tf_visual_data/runs tf.scalar_summary('accuracy', accuracy) merged = tf.merge_all_summaries() tfboard_basedir = '/Users/ryanzotti/Documents/repos/Self_Driving_RC_Car/tf_visual_data/runs/' tfboard_run_dir = mkdir_tfboard_run_dir(tfboard_basedir) # Archive this script to document model design in event of good results that need to be replicated model_file_path = os.path.dirname( os.path.realpath(__file__)) + '/' + os.path.basename(__file__) cmd = 'cp {model_file} {archive_path}' shell_command( cmd.format(model_file=model_file_path, archive_path=tfboard_run_dir + '/')) sess.run(tf.initialize_all_variables()) input_file_path = os.path.join(self.data_path, 'data') dataset = Dataset(input_file_path=input_file_path, max_sample_records=self.max_sample_records) # Not sure what these two lines do run_opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_opts_metadata = tf.RunMetadata() train_images, train_labels = process_data( dataset.get_sample(train=True)) train_feed_dict[x] = train_images train_feed_dict[y_] = train_labels train_summary, train_accuracy = sess.run( [merged, accuracy], feed_dict=train_feed_dict, options=run_opts, run_metadata=run_opts_metadata) test_images, test_labels = process_data( dataset.get_sample(train=False)) test_feed_dict[x] = test_images test_feed_dict[y_] = test_labels test_summary, test_accuracy = sess.run([merged, accuracy], feed_dict=test_feed_dict, options=run_opts, run_metadata=run_opts_metadata) message = "epoch: {0}, training accuracy: {1}, validation accuracy: {2}" print(message.format(-1, train_accuracy, test_accuracy)) for epoch in range(self.epochs): train_batches = dataset.get_batches(train=True) for batch in train_batches: images, labels = process_data(batch) train_feed_dict[x] = images train_feed_dict[y_] = labels train_step.run(feed_dict=train_feed_dict) # TODO: remove all this hideous boilerplate run_opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_opts_metadata = tf.RunMetadata() train_images, train_labels = process_data( dataset.get_sample(train=True)) train_feed_dict[x] = train_images train_feed_dict[y_] = train_labels train_summary, train_accuracy = sess.run( [merged, accuracy], feed_dict=train_feed_dict, options=run_opts, run_metadata=run_opts_metadata) test_images, test_labels = process_data( dataset.get_sample(train=False)) test_feed_dict[x] = test_images test_feed_dict[y_] = test_labels test_summary, test_accuracy = sess.run( [merged, accuracy], feed_dict=test_feed_dict, options=run_opts, run_metadata=run_opts_metadata) print(message.format(epoch, train_accuracy, test_accuracy)) # Save the trained model to a file saver = tf.train.Saver() save_path = saver.save(sess, tfboard_run_dir + "/model.ckpt") # Marks unambiguous successful completion to prevent deletion by cleanup script shell_command('touch ' + tfboard_run_dir + '/SUCCESS')
def train(self, sess, x, y_, accuracy, train_step, train_feed_dict, test_feed_dict): # To view graph: tensorboard --logdir=/Users/ryanzotti/Documents/repos/Self_Driving_RC_Car/tf_visual_data/runs tf.summary.scalar('accuracy', accuracy) merged = tf.summary.merge_all() # Archive the model script in case of good results that need to be replicated # If model is being restored, then assume model file has already been saved somewhere # and that self.model_file is None if self.model_file is not None: cmd = 'cp {model_file} {archive_path}' shell_command( cmd.format(model_file=self.model_file, archive_path=self.tfboard_run_dir + '/')) sess.run(tf.global_variables_initializer()) dataset = Dataset(input_file_path=self.data_path, max_sample_records=self.max_sample_records) # TODO: Document and understand what RunOptions does run_opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_opts_metadata = tf.RunMetadata() train_images, train_labels = process_data( dataset.get_sample(train=True)) train_feed_dict[x] = train_images train_feed_dict[y_] = train_labels train_summary, train_accuracy = sess.run( [merged, accuracy], feed_dict=train_feed_dict, options=run_opts, run_metadata=run_opts_metadata) test_images, test_labels = process_data( dataset.get_sample(train=False)) test_feed_dict[x] = test_images test_feed_dict[y_] = test_labels test_summary, test_accuracy = sess.run([merged, accuracy], feed_dict=test_feed_dict, options=run_opts, run_metadata=run_opts_metadata) message = "epoch: {0}, training accuracy: {1}, validation accuracy: {2}" print(message.format(self.start_epoch, train_accuracy, test_accuracy)) with open(self.results_file, 'a') as f: f.write( message.format(self.start_epoch, train_accuracy, test_accuracy) + '\n') # Save a model checkpoint after every epoch self.save_model(sess, epoch=self.start_epoch) for epoch in range(self.start_epoch + 1, self.start_epoch + self.n_epochs): train_batches = dataset.get_batches(train=True) for batch in train_batches: images, labels = process_data(batch) train_feed_dict[x] = images train_feed_dict[y_] = labels sess.run(train_step, feed_dict=train_feed_dict) # TODO: Document and understand what RunOptions does run_opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_opts_metadata = tf.RunMetadata() train_images, train_labels = process_data( dataset.get_sample(train=True)) train_feed_dict[x] = train_images train_feed_dict[y_] = train_labels train_summary, train_accuracy = sess.run( [merged, accuracy], feed_dict=train_feed_dict, options=run_opts, run_metadata=run_opts_metadata) test_images, test_labels = process_data( dataset.get_sample(train=False)) test_feed_dict[x] = test_images test_feed_dict[y_] = test_labels test_summary, test_accuracy = sess.run( [merged, accuracy], feed_dict=test_feed_dict, options=run_opts, run_metadata=run_opts_metadata) print(message.format(epoch, train_accuracy, test_accuracy)) with open(self.results_file, 'a') as f: f.write( message.format(epoch, train_accuracy, test_accuracy) + '\n') # Save a model checkpoint after every epoch self.save_model(sess, epoch=epoch) # Marks unambiguous successful completion to prevent deletion by cleanup script shell_command('touch ' + self.tfboard_run_dir + '/SUCCESS')