Beispiel #1
0
def export_model(last_checkpoint):
    # Create a session with a new graph.
    with tf.Session(graph=tf.Graph()) as sess:
        x = tf.placeholder(tf.float32, [None, 784])
        p = mnist_model.get_model(x, training=False)

        # Define key elements
        input_key = tf.placeholder(tf.int64, [
            None,
        ])
        output_key = tf.identity(input_key)

        # Define API inputs/outputs object
        inputs = {'key': input_key.name, 'image': x.name}
        outputs = {'key': output_key.name, 'scores': p.name}
        tf.add_to_collection('inputs', json.dumps(inputs))
        tf.add_to_collection('outputs', json.dumps(outputs))

        init_op = tf.global_variables_initializer()
        sess.run(init_op)

        # Restore the latest checkpoint and save the model
        saver = tf.train.Saver()
        saver.restore(sess, last_checkpoint)
        saver.export_meta_graph(os.path.join(MODEL_DIR, 'export.meta'))
        saver.save(sess,
                   os.path.join(MODEL_DIR, 'export'),
                   write_meta_graph=False)
Beispiel #2
0
def export_model(last_checkpoint):
    # Create a session with a new graph.
    with tf.Session(graph=tf.Graph()) as sess:
        x = tf.placeholder(tf.float32, [None, 784])
        p = mnist_model.get_model(x, training=False)

        # Define key elements
        input_key = tf.placeholder(tf.int64, [
            None,
        ])
        output_key = tf.identity(input_key)

        # Define API inputs/outputs object
        inputs = {'key': input_key, 'image': x}
        input_signatures = {}
        for key, val in inputs.iteritems():
            predict_input_tensor = meta_graph_pb2.TensorInfo()
            predict_input_tensor.name = val.name
            predict_input_tensor.dtype = val.dtype.as_datatype_enum
            input_signatures[key] = predict_input_tensor

        outputs = {'key': output_key, 'scores': p}
        output_signatures = {}
        for key, val in outputs.iteritems():
            predict_output_tensor = meta_graph_pb2.TensorInfo()
            predict_output_tensor.name = val.name
            predict_output_tensor.dtype = val.dtype.as_datatype_enum
            output_signatures[key] = predict_output_tensor

        inputs_name, outputs_name = {}, {}
        for key, val in inputs.iteritems():
            inputs_name[key] = val.name
        for key, val in outputs.iteritems():
            outputs_name[key] = val.name
        tf.add_to_collection('inputs', json.dumps(inputs_name))
        tf.add_to_collection('outputs', json.dumps(outputs_name))

        init_op = tf.global_variables_initializer()
        sess.run(init_op)

        # Restore the latest checkpoint and save the model
        saver = tf.train.Saver()
        saver.restore(sess, last_checkpoint)

        predict_signature_def = signature_def_utils.build_signature_def(
            input_signatures, output_signatures,
            signature_constants.PREDICT_METHOD_NAME)
        build = builder.SavedModelBuilder(MODEL_DIR)
        build.add_meta_graph_and_variables(
            sess, [tag_constants.SERVING],
            signature_def_map={
                signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                predict_signature_def
            },
            assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS))
        build.save()
Beispiel #3
0
parser.add_argument('--model', required=True)
parser.add_argument('--angle', type=int, default=60)
parser.add_argument('--span_range', type=int, default=0.9)
parser.add_argument('--grid_size', type=int, default=4)
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

args.span_range_height = args.span_range_width = args.span_range
args.grid_height = args.grid_width = args.grid_size
args.image_height = args.image_width = 28

torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

model = mnist_model.get_model(args)
if args.cuda:
    model.cuda()

optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
train_loader = data_loader.get_train_loader(args)
test_loader = data_loader.get_test_loader(args)


def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        # print(data.shape)
        data, target = Variable(data), Variable(target)
Beispiel #4
0
def run_training():

    with tf.Graph().as_default() as graph:

        # Prepare training data
        mnist_data = mnist.read_data_sets(DATA_DIR,
                                          one_hot=True,
                                          local_only=LOCAL_DATA)

        # Create placeholders
        x = tf.placeholder(tf.float32, [None, 784])
        t = tf.placeholder(tf.float32, [None, 10])
        keep_prob = tf.placeholder(tf.float32, [])
        global_step = tf.Variable(
            0, trainable=False
        )  # This is a useless variable (in this code) but it's use to not brake the API

        # Add test loss and test accuracy to summary
        test_loss = tf.placeholder(tf.float32, [])
        test_accuracy = tf.placeholder(tf.float32, [])
        tf.summary.scalar('Test_loss', test_loss)
        tf.summary.scalar('Test_accuracy', test_accuracy)

        # Define a model
        p = mnist_model.get_model(x, keep_prob, training=True)
        train_step, loss, accuracy = mnist_model.get_trainer(p, t, global_step)

        init_op = tf.global_variables_initializer()
        saver = tf.train.Saver()
        summary = tf.summary.merge_all()

        # Create a supervisor
        sv = tf.train.Supervisor(is_chief=True,
                                 logdir=LOG_DIR,
                                 init_op=init_op,
                                 saver=saver,
                                 summary_op=None,
                                 global_step=global_step,
                                 save_model_secs=0)

        # Create a session and start a training loop
        with sv.managed_session() as sess:

            reports, step = 0, 0
            start_time = time.time()

            while not sv.should_stop() and step < MAX_STEPS:

                images, labels = mnist_data.train.next_batch(BATCH_SIZE)
                feed_dict = {x: images, t: labels, keep_prob: 0.5}
                _, loss_val, step = sess.run([train_step, loss, global_step],
                                             feed_dict=feed_dict)

                if step > CHECKPOINT * reports:
                    reports += 1
                    logging.info('Step: %d, Train loss: %f', step, loss_val)

                    # Evaluate the test loss and test accuracy
                    loss_vals, acc_vals = [], []
                    for _ in range(len(mnist_data.test.labels) // BATCH_SIZE):
                        images, labels = mnist_data.test.next_batch(BATCH_SIZE)
                        feed_dict = {x: images, t: labels, keep_prob: 1.0}
                        loss_val, acc_val = sess.run([loss, accuracy],
                                                     feed_dict=feed_dict)
                        loss_vals.append(loss_val)
                        acc_vals.append(acc_val)

                    loss_val, acc_val = np.sum(loss_vals), np.mean(acc_vals)

                    # Save summary
                    feed_dict = {test_loss: loss_val, test_accuracy: acc_val}
                    sv.summary_computed(sess,
                                        sess.run(summary, feed_dict=feed_dict),
                                        step)
                    sv.summary_writer.flush()

                    logging.info('Time elapsed: %d',
                                 (time.time() - start_time))
                    logging.info('Step: %d, Test loss: %f, Test accuracy: %f',
                                 step, loss_val, acc_val)

        sv.stop()
Beispiel #5
0
def run_training():
    # Get cluster and node information
    env = json.loads(os.environ.get('TF_CONFIG', '{}'))
    cluster_info = env.get('cluster')
    cluster_spec = tf.train.ClusterSpec(cluster_info)
    task_info = env.get('task')
    job_name, task_index = task_info['type'], task_info['index']

    device_fn = tf.train.replica_device_setter(
        cluster=cluster_spec,
        worker_device='/job:%s/task:%d' % (job_name, task_index))

    logging.info('Start job:%s, index:%d', job_name, task_index)

    # Create a server object
    server = tf.train.Server(cluster_spec,
                             job_name=job_name,
                             task_index=task_index)

    # Start a parameter server node
    if job_name == 'ps':
        server.join()

    # Start a master/worker node
    if job_name == 'master' or job_name == 'worker':
        is_chief = (job_name == 'master')

        with tf.Graph().as_default() as graph:
            with tf.device(device_fn):

                # Prepare training data
                mnist_data = mnist.read_data_sets(DATA_DIR,
                                                  one_hot=True,
                                                  local_only=LOCAL_DATA)

                # Create placeholders
                x = tf.placeholder(tf.float32, [None, 784])
                t = tf.placeholder(tf.float32, [None, 10])
                keep_prob = tf.placeholder(tf.float32, [])
                global_step = tf.Variable(0, trainable=False)

                # Add test loss and test accuracy to summary
                test_loss = tf.placeholder(tf.float32, [])
                test_accuracy = tf.placeholder(tf.float32, [])
                tf.summary.scalar('Test_loss', test_loss)
                tf.summary.scalar('Test_accuracy', test_accuracy)

                # Define a model
                p = mnist_model.get_model(x, keep_prob, training=True)
                train_step, loss, accuracy = mnist_model.get_trainer(
                    p, t, global_step)

                init_op = tf.global_variables_initializer()
                saver = tf.train.Saver()
                summary = tf.summary.merge_all()

                # Create a supervisor
                sv = tf.train.Supervisor(is_chief=is_chief,
                                         logdir=LOG_DIR,
                                         init_op=init_op,
                                         saver=saver,
                                         summary_op=None,
                                         global_step=global_step,
                                         save_model_secs=0)

                # Create a session and start a training loop
                with sv.managed_session(server.target) as sess:
                    reports, step = 0, 0
                    start_time = time.time()
                    while not sv.should_stop() and step < MAX_STEPS:
                        images, labels = mnist_data.train.next_batch(
                            BATCH_SIZE)
                        feed_dict = {x: images, t: labels, keep_prob: 0.5}
                        _, loss_val, step = sess.run(
                            [train_step, loss, global_step],
                            feed_dict=feed_dict)
                        if step > CHECKPOINT * reports:
                            reports += 1
                            logging.info('Step: %d, Train loss: %f', step,
                                         loss_val)
                            if is_chief:
                                # Save checkpoint
                                sv.saver.save(sess,
                                              sv.save_path,
                                              global_step=step)

                                # Evaluate the test loss and test accuracy
                                loss_vals, acc_vals = [], []
                                for _ in range(
                                        len(mnist_data.test.labels) //
                                        BATCH_SIZE):
                                    images, labels = mnist_data.test.next_batch(
                                        BATCH_SIZE)
                                    feed_dict = {
                                        x: images,
                                        t: labels,
                                        keep_prob: 1.0
                                    }
                                    loss_val, acc_val = sess.run(
                                        [loss, accuracy], feed_dict=feed_dict)
                                    loss_vals.append(loss_val)
                                    acc_vals.append(acc_val)
                                loss_val, acc_val = np.sum(loss_vals), np.mean(
                                    acc_vals)

                                # Save summary
                                feed_dict = {
                                    test_loss: loss_val,
                                    test_accuracy: acc_val
                                }
                                sv.summary_computed(
                                    sess, sess.run(summary,
                                                   feed_dict=feed_dict), step)
                                sv.summary_writer.flush()

                                logging.info('Time elapsed: %d',
                                             (time.time() - start_time))
                                logging.info(
                                    'Step: %d, Test loss: %f, Test accuracy: %f',
                                    step, loss_val, acc_val)

                    # Export the final model
                    if is_chief:
                        sv.saver.save(sess,
                                      sv.save_path,
                                      global_step=sess.run(global_step))
                        export_model(tf.train.latest_checkpoint(LOG_DIR))

                sv.stop()