Beispiel #1
0
    def train_one_batch(self, sess, x, y_, accuracy, train_step,
                        train_feed_dict):

        tf.summary.scalar('accuracy', accuracy)
        merged = tf.summary.merge_all()
        sess.run(tf.global_variables_initializer())
        dataset = Dataset(input_file_path=self.data_path,
                          max_sample_records=self.max_sample_records)

        # Not sure what these two lines do
        run_opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_opts_metadata = tf.RunMetadata()

        train_batches = dataset.get_batches(train=True)
        batch = next(train_batches)
        images, labels = process_data(batch)
        train_feed_dict[x] = images
        train_feed_dict[y_] = labels
        for epoch in range(self.n_epochs):
            train_step.run(feed_dict=train_feed_dict)
            train_summary, train_accuracy = sess.run(
                [merged, accuracy],
                feed_dict=train_feed_dict,
                options=run_opts,
                run_metadata=run_opts_metadata)
            test_summary, test_accuracy = sess.run(
                [merged, accuracy],
                feed_dict=train_feed_dict,
                options=run_opts,
                run_metadata=run_opts_metadata)
            message = "epoch: {0}, training accuracy: {1}, validation accuracy: {2}"
            print(message.format(epoch, train_accuracy, test_accuracy))
Beispiel #2
0
    def train(self, sess, x, y_, accuracy, train_step, train_feed_dict,
              test_feed_dict):

        # To view graph: tensorboard --logdir=/Users/ryanzotti/Documents/repos/Self_Driving_RC_Car/tf_visual_data/runs
        tf.summary.scalar('accuracy', accuracy)
        merged = tf.summary.merge_all()

        # Archive the model script in case of good results that need to be replicated
        # If model is being restored, then assume model file has already been saved somewhere
        # and that self.model_file is None
        if self.model_file is not None:
            cmd = 'cp {model_file} {archive_path}'
            shell_command(
                cmd.format(model_file=self.model_file,
                           archive_path=self.model_dir + '/'))

        if not self.restored_model:  # Don't want to erase restored model weights
            sess.run(tf.global_variables_initializer())

        dataset = Dataset(input_file_path=self.data_path,
                          max_sample_records=self.max_sample_records)

        # TODO: Document and understand what RunOptions does
        run_opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_opts_metadata = tf.RunMetadata()

        train_images, train_labels = process_data(
            dataset.get_sample(train=True))
        train_feed_dict[x] = train_images
        train_feed_dict[y_] = train_labels
        train_summary, train_accuracy = sess.run(
            [merged, accuracy],
            feed_dict=train_feed_dict,
            options=run_opts,
            run_metadata=run_opts_metadata)
        test_images, test_labels = process_data(
            dataset.get_sample(train=False))
        test_feed_dict[x] = test_images
        test_feed_dict[y_] = test_labels
        test_summary, test_accuracy = sess.run([merged, accuracy],
                                               feed_dict=test_feed_dict,
                                               options=run_opts,
                                               run_metadata=run_opts_metadata)

        # Always worth printing accuracy, even for a restored model, since it provides an early sanity check
        message = "epoch: {0}, training accuracy: {1}, validation accuracy: {2}"
        print(message.format(self.start_epoch, train_accuracy, test_accuracy))

        if self.tf_timeline:  # Used for debugging slow Tensorflow code
            create_tf_timeline(self.model_dir, run_opts_metadata)

        # Don't double-count. A restored model already has its last checkpoint and results.txt entry available
        if not self.restored_model:
            with open(self.results_file, 'a') as f:
                f.write(
                    message.format(self.start_epoch, train_accuracy,
                                   test_accuracy) + '\n')
            self.save_model(sess, epoch=self.start_epoch)
            sync_to_aws(s3_path=self.s3_data_dir,
                        local_path=self.data_path)  # Save to AWS

        for epoch in range(self.start_epoch + 1,
                           self.start_epoch + self.n_epochs):
            prev_time = datetime.now()
            train_batches = dataset.get_batches(train=True)
            for batch_id, batch in enumerate(train_batches):
                images, labels = process_data(batch)
                train_feed_dict[x] = images
                train_feed_dict[y_] = labels
                sess.run(train_step, feed_dict=train_feed_dict)

                # Track speed to better compare GPUs and CPUs
                now = datetime.now()
                diff_seconds = (now - prev_time).total_seconds()
                if self.show_speed:
                    speed_results = 'batch {batch_id} of {total_batches}, {seconds} seconds'
                    speed_results = speed_results.format(
                        batch_id=batch_id,
                        seconds=diff_seconds,
                        total_batches=dataset.batches_per_epoch)
                    with open(self.speed_file, 'a') as f:
                        f.write(speed_results + '\n')
                    print(speed_results)
                prev_time = datetime.now()

            # TODO: Document and understand what RunOptions does
            run_opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_opts_metadata = tf.RunMetadata()

            train_images, train_labels = process_data(
                dataset.get_sample(train=True))
            train_feed_dict[x] = train_images
            train_feed_dict[y_] = train_labels
            train_summary, train_accuracy = sess.run(
                [merged, accuracy],
                feed_dict=train_feed_dict,
                options=run_opts,
                run_metadata=run_opts_metadata)
            test_images, test_labels = process_data(
                dataset.get_sample(train=False))
            test_feed_dict[x] = test_images
            test_feed_dict[y_] = test_labels
            test_summary, test_accuracy = sess.run(
                [merged, accuracy],
                feed_dict=test_feed_dict,
                options=run_opts,
                run_metadata=run_opts_metadata)
            print(message.format(epoch, train_accuracy, test_accuracy))
            with open(self.results_file, 'a') as f:
                f.write(
                    message.format(epoch, train_accuracy, test_accuracy) +
                    '\n')

            # Save a model checkpoint after every epoch
            self.save_model(sess, epoch=epoch)
            sync_to_aws(s3_path=self.s3_data_dir,
                        local_path=self.data_path)  # Save to AWS

        # Marks unambiguous successful completion to prevent deletion by cleanup script
        shell_command('touch ' + self.model_dir + '/SUCCESS')
Beispiel #3
0
    def train(self, sess, x, y_, accuracy, train_step, train_feed_dict,
              test_feed_dict):

        # To view graph: tensorboard --logdir=/Users/ryanzotti/Documents/repos/Self_Driving_RC_Car/tf_visual_data/runs
        tf.scalar_summary('accuracy', accuracy)
        merged = tf.merge_all_summaries()

        tfboard_basedir = '/Users/ryanzotti/Documents/repos/Self_Driving_RC_Car/tf_visual_data/runs/'
        tfboard_run_dir = mkdir_tfboard_run_dir(tfboard_basedir)

        # Archive this script to document model design in event of good results that need to be replicated
        model_file_path = os.path.dirname(
            os.path.realpath(__file__)) + '/' + os.path.basename(__file__)
        cmd = 'cp {model_file} {archive_path}'
        shell_command(
            cmd.format(model_file=model_file_path,
                       archive_path=tfboard_run_dir + '/'))

        sess.run(tf.initialize_all_variables())

        input_file_path = os.path.join(self.data_path, 'data')
        dataset = Dataset(input_file_path=input_file_path,
                          max_sample_records=self.max_sample_records)

        # Not sure what these two lines do
        run_opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_opts_metadata = tf.RunMetadata()

        train_images, train_labels = process_data(
            dataset.get_sample(train=True))
        train_feed_dict[x] = train_images
        train_feed_dict[y_] = train_labels
        train_summary, train_accuracy = sess.run(
            [merged, accuracy],
            feed_dict=train_feed_dict,
            options=run_opts,
            run_metadata=run_opts_metadata)
        test_images, test_labels = process_data(
            dataset.get_sample(train=False))
        test_feed_dict[x] = test_images
        test_feed_dict[y_] = test_labels
        test_summary, test_accuracy = sess.run([merged, accuracy],
                                               feed_dict=test_feed_dict,
                                               options=run_opts,
                                               run_metadata=run_opts_metadata)
        message = "epoch: {0}, training accuracy: {1}, validation accuracy: {2}"
        print(message.format(-1, train_accuracy, test_accuracy))

        for epoch in range(self.epochs):
            train_batches = dataset.get_batches(train=True)
            for batch in train_batches:
                images, labels = process_data(batch)
                train_feed_dict[x] = images
                train_feed_dict[y_] = labels
                train_step.run(feed_dict=train_feed_dict)

            # TODO: remove all this hideous boilerplate
            run_opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_opts_metadata = tf.RunMetadata()
            train_images, train_labels = process_data(
                dataset.get_sample(train=True))
            train_feed_dict[x] = train_images
            train_feed_dict[y_] = train_labels
            train_summary, train_accuracy = sess.run(
                [merged, accuracy],
                feed_dict=train_feed_dict,
                options=run_opts,
                run_metadata=run_opts_metadata)
            test_images, test_labels = process_data(
                dataset.get_sample(train=False))
            test_feed_dict[x] = test_images
            test_feed_dict[y_] = test_labels
            test_summary, test_accuracy = sess.run(
                [merged, accuracy],
                feed_dict=test_feed_dict,
                options=run_opts,
                run_metadata=run_opts_metadata)
            print(message.format(epoch, train_accuracy, test_accuracy))

        # Save the trained model to a file
        saver = tf.train.Saver()
        save_path = saver.save(sess, tfboard_run_dir + "/model.ckpt")

        # Marks unambiguous successful completion to prevent deletion by cleanup script
        shell_command('touch ' + tfboard_run_dir + '/SUCCESS')
Beispiel #4
0
    def train(self, sess, x, y_, accuracy, train_step, train_feed_dict,
              test_feed_dict):

        # To view graph: tensorboard --logdir=/Users/ryanzotti/Documents/repos/Self_Driving_RC_Car/tf_visual_data/runs
        tf.summary.scalar('accuracy', accuracy)
        merged = tf.summary.merge_all()

        # Archive the model script in case of good results that need to be replicated
        # If model is being restored, then assume model file has already been saved somewhere
        # and that self.model_file is None
        if self.model_file is not None:
            cmd = 'cp {model_file} {archive_path}'
            shell_command(
                cmd.format(model_file=self.model_file,
                           archive_path=self.tfboard_run_dir + '/'))

        sess.run(tf.global_variables_initializer())

        dataset = Dataset(input_file_path=self.data_path,
                          max_sample_records=self.max_sample_records)

        # TODO: Document and understand what RunOptions does
        run_opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_opts_metadata = tf.RunMetadata()

        train_images, train_labels = process_data(
            dataset.get_sample(train=True))
        train_feed_dict[x] = train_images
        train_feed_dict[y_] = train_labels
        train_summary, train_accuracy = sess.run(
            [merged, accuracy],
            feed_dict=train_feed_dict,
            options=run_opts,
            run_metadata=run_opts_metadata)
        test_images, test_labels = process_data(
            dataset.get_sample(train=False))
        test_feed_dict[x] = test_images
        test_feed_dict[y_] = test_labels
        test_summary, test_accuracy = sess.run([merged, accuracy],
                                               feed_dict=test_feed_dict,
                                               options=run_opts,
                                               run_metadata=run_opts_metadata)
        message = "epoch: {0}, training accuracy: {1}, validation accuracy: {2}"
        print(message.format(self.start_epoch, train_accuracy, test_accuracy))

        with open(self.results_file, 'a') as f:
            f.write(
                message.format(self.start_epoch, train_accuracy, test_accuracy)
                + '\n')

        # Save a model checkpoint after every epoch
        self.save_model(sess, epoch=self.start_epoch)

        for epoch in range(self.start_epoch + 1,
                           self.start_epoch + self.n_epochs):
            train_batches = dataset.get_batches(train=True)
            for batch in train_batches:
                images, labels = process_data(batch)
                train_feed_dict[x] = images
                train_feed_dict[y_] = labels
                sess.run(train_step, feed_dict=train_feed_dict)

            # TODO: Document and understand what RunOptions does
            run_opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_opts_metadata = tf.RunMetadata()

            train_images, train_labels = process_data(
                dataset.get_sample(train=True))
            train_feed_dict[x] = train_images
            train_feed_dict[y_] = train_labels
            train_summary, train_accuracy = sess.run(
                [merged, accuracy],
                feed_dict=train_feed_dict,
                options=run_opts,
                run_metadata=run_opts_metadata)
            test_images, test_labels = process_data(
                dataset.get_sample(train=False))
            test_feed_dict[x] = test_images
            test_feed_dict[y_] = test_labels
            test_summary, test_accuracy = sess.run(
                [merged, accuracy],
                feed_dict=test_feed_dict,
                options=run_opts,
                run_metadata=run_opts_metadata)
            print(message.format(epoch, train_accuracy, test_accuracy))
            with open(self.results_file, 'a') as f:
                f.write(
                    message.format(epoch, train_accuracy, test_accuracy) +
                    '\n')

            # Save a model checkpoint after every epoch
            self.save_model(sess, epoch=epoch)

        # Marks unambiguous successful completion to prevent deletion by cleanup script
        shell_command('touch ' + self.tfboard_run_dir + '/SUCCESS')