예제 #1
0
def main(_):
    # Import data
    mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)

    training_placeholder = None
    x = tf.placeholder(tf.float32, shape=[None, 784])
    # Define loss and optimizer
    y_ = tf.placeholder(tf.float32, [None, 10])

    cfgs = nf.parse_cfg_from_str("")
    if FLAGS.cfg is not None:
        cfgs = nf.parse_cfg_from_file(FLAGS.cfg)

    with nf.fixed_scope("fixed_mlp_mnist",
                        cfgs) as (s, training, fixed_mapping):
        training_placeholder = training
        # Using chaining writing style:
        res = nf.wrap(x).Dense(units=100,
                               name="dense1").ReLU(name="relu1").Dense(
                                   units=10, name="dense2").tensor
        # Alternatively, you can use the normal writing style:
        # res = nf.Dense(nf.ReLU(nf.Dense(x, units=100, name="dense1"), name="relu1"), units=10, name="dense2")

    saver = nf.utils.fixed_model_saver(fixed_mapping)

    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=res))
    optimizer = tf.train.GradientDescentOptimizer(0.5)
    grads_and_vars = optimizer.compute_gradients(cross_entropy)
    train_step = optimizer.apply_gradients(grads_and_vars)

    sess = tf.InteractiveSession()
    tf.global_variables_initializer().run()

    # Restore the trained weights from snapshot
    saver.restore(sess, FLAGS.train_dir)

    # Test trained model
    correct_prediction = tf.equal(tf.argmax(res, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    weight_data_scales = tf.get_collection(
        nf.FixedKeys.FIXED_WEIGHT_DATA_SCALE)
    act_data_scales = tf.get_collection(
        nf.FixedKeys.FIXED_ACTIVATION_DATA_SCALE)
    acc, weight_scales, act_scales = sess.run(
        [accuracy, weight_data_scales, act_data_scales],
        feed_dict={
            x: mnist.test.images,
            y_: mnist.test.labels,
            training_placeholder: False
        })

    print("accuracy: ", acc)
    print("Data fix scales: ", "\n".join(["{} {}".format(*item) for item in \
                                          zip([tensor.op.name for tensor in \
                                               itertools.chain(weight_data_scales, act_data_scales)],
                                              itertools.chain(weight_scales, act_scales))]))
예제 #2
0
def main(_):
    # Import data
    mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)

    training_placeholder = None
    x = tf.placeholder(tf.float32, shape=[None, 784])
    # Define loss and optimizer
    y_ = tf.placeholder(tf.float32, [None, 10])

    cfgs = nf.parse_cfg_from_str("")
    s_cfgs = nf.parse_strategy_cfg_from_str("")

    if FLAGS.cfg is not None:
        cfgs = nf.parse_cfg_from_file(FLAGS.cfg)
    if FLAGS.scfg is not None:
        s_cfgs = nf.parse_strategy_cfg_from_file(FLAGS.scfg)

    with nf.fixed_scope("fixed_mlp_mnist", cfgs, s_cfgs) as (s, training, _):
        training_placeholder = training
        # Using chaining writing style:
        res = nf.wrap(x).Dense(units=100).ReLU().Dense(units=10).tensor
        # Alternatively, you can use the normal writing style:
        # res = nf.Dense(nf.ReLU(nf.Dense(x, units=100)), units=10)

    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=res))
    optimizer = tf.train.GradientDescentOptimizer(0.5)
    grads_and_vars = optimizer.compute_gradients(cross_entropy)
    train_step = optimizer.apply_gradients(grads_and_vars)

    sess = tf.InteractiveSession()
    tf.global_variables_initializer().run()

    # Train
    for _ in range(1000):
        batch_xs, batch_ys = mnist.train.next_batch(100)
        sess.run(train_step,
                 feed_dict={
                     x: batch_xs,
                     y_: batch_ys,
                     training_placeholder: True
                 })

    # Test trained model
    correct_prediction = tf.equal(tf.argmax(res, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print(
        sess.run(accuracy,
                 feed_dict={
                     x: mnist.test.images,
                     y_: mnist.test.labels,
                     training_placeholder: False
                 }))
def main(_):
    batch_size = FLAGS.batch_size # default to 128
    num_classes = 10

    # The data, shuffled and split between train and test sets:
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    log(x_train.shape[0], " train samples")
    log(x_test.shape[0], " test samples")
    
    # Convert class vectors to binary class matrices.
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    
    x_train = x_train.astype("float32")
    x_test = x_test.astype("float32")
    x_train /= 255
    x_test /= 255
    
    # Construct the model
    cfgs = nf.parse_cfg_from_str("")
    if FLAGS.cfg is not None:
        cfgs = nf.parse_cfg_from_file(FLAGS.cfg)
    
    s_cfgs = nf.parse_strategy_cfg_from_str("")
    if FLAGS.scfg is not None:
        s_cfgs = nf.parse_strategy_cfg_from_file(FLAGS.scfg)

    x = tf.placeholder(tf.float32, shape=[None] + list(x_train.shape[1:]))
    labels = tf.placeholder(tf.float32, [None, num_classes])
    weight_decay = FLAGS.weight_decay
    
    with nf.fixed_scope("fixed_mlp_mnist", cfgs, s_cfgs) as (s, training, fixed_mapping):
        training_placeholder = training
        # Using chaining writing style:
        logits = globals()[FLAGS.model](x, num_classes, training, weight_decay).tensor

    # Construct the fixed saver
    saver = nf.utils.fixed_model_saver(fixed_mapping)
    
    # Loss and metrics
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits))
    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    loss = cross_entropy + tf.add_n(reg_losses) if reg_losses else cross_entropy
    index_label = tf.argmax(labels, 1)
    correct = tf.equal(tf.argmax(logits, 1), index_label)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    top5_correct = tf.nn.in_top_k(logits, index_label, 5)
    top5_accuracy = tf.reduce_mean(tf.cast(top5_correct, tf.float32))
    
    # Initialize the optimizer
    global_step = tf.Variable(0, name="global_step", trainable=False)
    # Learning rate is multiplied by 0.5 after training for every 30 epochs
    learning_rate = tf.train.exponential_decay(0.05, global_step=global_step,
                                               decay_steps=int(x_train.shape[0] / batch_size * 30),
                                               decay_rate=0.5, staircase=True)
    # FIXME: momentum也不能要了...
    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)
    train_step = optimizer.minimize(loss, global_step=global_step)
    
    # Scales and summary op
    weight_tensor_names, weight_data_scales, weight_grad_scales, \
        weight_data_cfgs, weight_grad_cfgs = zip(*sorted([(k.op.name, v["q_data_scale"], v["q_grad_scale"], v["data_cfg"], v["grad_cfg"])
                                                          for k, v in fixed_mapping[nf.DataTypes.WEIGHT].iteritems()], key=lambda x: x[0]))
    weight_data_names = [t.op.name for t in weight_data_scales]
    weight_grad_names = [t.op.name for t in weight_grad_scales]
    wd_bit_widths = [c.bit_width for c in weight_data_cfgs]
    wg_bit_widths = [c.bit_width for c in weight_grad_cfgs]
    wg_scales_values_min = None
    wg_scales_values_max = None
    # Summary weight data/grad scales
    for t in weight_data_scales:
        ind = t.op.name.index("/data")
        summary_name = t.op.name[:ind].replace("/", "_") + t.op.name[ind:]
        tf.summary.scalar(summary_name, t)
    for t in weight_grad_scales:
        ind = t.op.name.index("/grad")
        summary_name = t.op.name[:ind].replace("/", "_") + t.op.name[ind:]
        tf.summary.scalar(summary_name, t)
    summary_op = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter(FLAGS.summary_dir)

    log("Using real-time data augmentation.")
    # This will do preprocessing and realtime data augmentation:
    datagen_train = ImageDataGenerator(
        featurewise_center=True,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=True,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

    datagen_test = ImageDataGenerator(
        featurewise_center=True,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=True,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images

    # Compute quantities required for feature-wise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen_train.fit(x_train)
    datagen_test.fit(x_test)
    random_crop = RandomCrop(32, 4) # padding 4 and crop 32x32
    steps_per_epoch = x_train.shape[0] // batch_size
    total_iters = FLAGS.epochs * steps_per_epoch

    config = tf.ConfigProto()
    config.gpu_options.allow_growth=True
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())

        if FLAGS.load_from is not None:
            log("Loading checkpoint from {}".format(FLAGS.load_from))
            saver.restore(sess, FLAGS.load_from)

        log("Start training...")
        # Training
        gen = MultiProcessGen(datagen_train.flow(x_train, y_train, batch_size=batch_size))
        iter_ = 0
        try:
            for epoch in range(1, FLAGS.epochs+1):
                start_time = time.time()
                loss_v_epoch = 0
                acc_1_epoch = 0
                acc_5_epoch = 0

                # Train batches
                for step in range(1, steps_per_epoch+1):
                    # TODO: use another thread to execute the data augumentation and enqueue
                    x_v, y_v = next(gen)
                    x_crop_v = random_crop(x_v)
                    _, loss_v, acc_1, acc_5, wd_scales_v, wg_scales_v, summary = sess.run([train_step, loss, accuracy, top5_accuracy,
                                                                                           weight_data_scales, weight_grad_scales, summary_op],
                                                                                 feed_dict={
                                                                                     x: x_crop_v,
                                                                                     labels: y_v
                                                                                 })
                    print("\rEpoch {}: steps {}/{}".format(epoch, step, steps_per_epoch), end="")
                    loss_v_epoch += loss_v
                    acc_1_epoch += acc_1
                    acc_5_epoch += acc_5
                    iter_ += 1
                    if iter_ > total_iters - FLAGS.weight_grad_iters:
                        if wg_scales_values_min is None:
                            wg_scales_values_min = np.array(wg_scales_v, dtype=np.int)
                            wg_scales_values_max = np.array(wg_scales_v, dtype=np.int)
                        else:
                            wg_scales_values_min = np.minimum(wg_scales_values_min, wg_scales_v)
                            wg_scales_values_max = np.maximum(wg_scales_values_max, wg_scales_v)
                    summary_writer.add_summary(summary, iter_)

                loss_v_epoch /= steps_per_epoch
                acc_1_epoch /= steps_per_epoch
                acc_5_epoch /= steps_per_epoch

                duration = time.time() - start_time
                sec_per_batch = duration / (steps_per_epoch * batch_size)
                log("\r{}: Epoch {}; (average) loss: {:.3f}; (average) top1 accuracy: {:.2f} %; (average) top5 accuracy: {:.2f} %. {:.3f} sec/batch"\
                      .format(datetime.now(), epoch, loss_v_epoch, acc_1_epoch * 100, acc_5_epoch * 100, sec_per_batch), flush=True)
                # End training batches

                # Test on the validation set
                if epoch % FLAGS.test_frequency == 0:
                    test_gen = MultiProcessGen(datagen_test.flow(x_test, y_test, batch_size=batch_size))
                    steps_per_epoch_test = x_test.shape[0] // batch_size
                    loss_test = 0
                    acc_1_test = 0
                    acc_5_test = 0
                    try:
                        for step in range(1, steps_per_epoch_test+1):
                            x_v, y_v = next(test_gen)
                            loss_v, acc_1, acc_5 = sess.run([loss, accuracy, top5_accuracy],
                                                                         feed_dict={
                                                                             x: x_v,
                                                                             labels: y_v
                                                                         })
                            print("\r\ttest steps: {}/{}".format(step, steps_per_epoch_test), end="")
                            loss_test += loss_v
                            acc_1_test += acc_1
                            acc_5_test += acc_5
                        loss_test /= steps_per_epoch_test
                        acc_1_test /= steps_per_epoch_test
                        acc_5_test /= steps_per_epoch_test
                        log("\r\tTest: loss: {}; top1 accuracy: {:.2f} %; top5 accuracy: {:2f} %.".format(loss_test, acc_1_test * 100, acc_5_test * 100), flush=True)
                    finally:
                        test_gen.stop()
                # End test on the validation set
            # End training
        finally:
            gen.stop()
        final_wg_scales_values, final_wg_buffer_scales_values = nf.amend_weight_grad_scales(learning_rate, wg_scales_values_min, wg_scales_values_max,
                                                                                            weight_data_scales, weight_grad_scales, wd_bit_widths, wg_bit_widths,
                                                                                            grad_buffer_width=FLAGS.grad_buffer_width)
        for wtn, gs, wgs_v, wgbs_v in zip(weight_tensor_names, weight_grad_scales, final_wg_scales_values, final_wg_buffer_scales_values):
            print("{:40}: final gradient fixed scale: {:>4d}; gradient buffer scale: {:>4d}".format(wtn, int(wgs_v), int(wgbs_v)))
            sess.run(tf.assign(gs, wgs_v))
        # TODO: dump weight saving buffer strategy by name
        if FLAGS.save_strategy:
            strategy_config_dct = {
                "by_name": {
                    n.split("/")[-2]: {
                        "name": "weightgradsaver_strategy",
                        "scale": int(v),
                        "bit_width": FLAGS.grad_buffer_width
                    } for n, v in zip(weight_tensor_names, final_wg_buffer_scales_values)
                }
            }
            with open(FLAGS.save_strategy, "w") as f:
                yaml.dump(strategy_config_dct, f)
            log("Dump hardware straetgy file to {}".format(FLAGS.save_strategy))

        if FLAGS.train_dir:
            if not os.path.exists(FLAGS.train_dir):
                subprocess.check_call("mkdir -p {}".format(FLAGS.train_dir),
                                      shell=True)
            log("Saved model to: ", saver.save(sess, FLAGS.train_dir))
예제 #4
0
def main(_):
    batch_size = FLAGS.batch_size  # default to 128
    num_classes = 10

    # The data, shuffled and split between train and test sets:
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    log(x_train.shape[0], " train samples")
    log(x_test.shape[0], " test samples")

    # Convert class vectors to binary class matrices.
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)

    x_train = x_train.astype("float32")
    x_test = x_test.astype("float32")
    x_train /= 255
    x_test /= 255

    # Construct the model
    cfgs = nf.parse_cfg_from_str("")
    if FLAGS.cfg is not None:
        cfgs = nf.parse_cfg_from_file(FLAGS.cfg)

    x = tf.placeholder(tf.float32, shape=[None] + list(x_train.shape[1:]))
    labels = tf.placeholder(tf.float32, [None, num_classes])
    weight_decay = FLAGS.weight_decay

    with nf.fixed_scope("fixed_mlp_mnist",
                        cfgs) as (s, training, fixed_mapping):
        training_placeholder = training
        # Using chaining writing style:
        logits = globals()[FLAGS.model](x, num_classes, training,
                                        weight_decay).tensor

    # Construct the fixed saver
    saver = nf.utils.fixed_model_saver(fixed_mapping)

    # Loss and metrics
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits))
    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    loss = cross_entropy + tf.add_n(reg_losses)
    index_label = tf.argmax(labels, 1)
    correct = tf.equal(tf.argmax(logits, 1), index_label)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    top5_correct = tf.nn.in_top_k(logits, index_label, 5)
    top5_accuracy = tf.reduce_mean(tf.cast(top5_correct, tf.float32))

    # Initialize the optimizer
    global_step = tf.Variable(0, name="global_step", trainable=False)
    # Learning rate is multiplied by 0.5 after training for every 30 epochs
    learning_rate = tf.train.exponential_decay(
        0.05,
        global_step=global_step,
        decay_steps=int(x_train.shape[0] / batch_size * 30),
        decay_rate=0.5,
        staircase=True)
    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)
    train_step = optimizer.minimize(loss, global_step=global_step)

    log("Using real-time data augmentation.")
    # This will do preprocessing and realtime data augmentation:
    datagen_train = ImageDataGenerator(
        featurewise_center=True,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=
        True,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=
        0,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=
        0,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=
        0,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

    datagen_test = ImageDataGenerator(
        featurewise_center=True,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=
        True,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=
        0,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=
        0,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=
        0,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images

    # Compute quantities required for feature-wise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen_train.fit(x_train)
    datagen_test.fit(x_test)
    random_crop = RandomCrop(32, 4)  # padding 4 and crop 32x32

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        log("Start training...")
        # Training
        gen = MultiProcessGen(
            datagen_train.flow(x_train, y_train, batch_size=batch_size))
        try:
            for epoch in range(1, FLAGS.epochs + 1):
                start_time = time.time()
                steps_per_epoch = x_train.shape[0] // batch_size
                loss_v_epoch = 0
                acc_1_epoch = 0
                acc_5_epoch = 0

                # Train batches
                for step in range(1, steps_per_epoch + 1):
                    # TODO: use another thread to execute the data augumentation and enqueue
                    x_v, y_v = next(gen)
                    x_crop_v = random_crop(x_v)
                    _, loss_v, acc_1, acc_5 = sess.run(
                        [train_step, loss, accuracy, top5_accuracy],
                        feed_dict={
                            x: x_crop_v,
                            labels: y_v
                        })
                    print("\rEpoch {}: steps {}/{}".format(
                        epoch, step, steps_per_epoch),
                          end="")
                    loss_v_epoch += loss_v
                    acc_1_epoch += acc_1
                    acc_5_epoch += acc_5

                loss_v_epoch /= steps_per_epoch
                acc_1_epoch /= steps_per_epoch
                acc_5_epoch /= steps_per_epoch

                duration = time.time() - start_time
                sec_per_batch = duration / (steps_per_epoch * batch_size)
                log("\r{}: Epoch {}; (average) loss: {:.3f}; (average) top1 accuracy: {:.2f} %; (average) top5 accuracy: {:.2f} %. {:.3f} sec/batch"\
                      .format(datetime.now(), epoch, loss_v_epoch, acc_1_epoch * 100, acc_5_epoch * 100, sec_per_batch), flush=True)
                # End training batches

                # Test on the validation set
                if epoch % FLAGS.test_frequency == 0:
                    test_gen = MultiProcessGen(
                        datagen_test.flow(x_test,
                                          y_test,
                                          batch_size=batch_size))
                    steps_per_epoch = x_test.shape[0] // batch_size
                    loss_test = 0
                    acc_1_test = 0
                    acc_5_test = 0
                    try:
                        for step in range(1, steps_per_epoch + 1):
                            x_v, y_v = next(test_gen)
                            loss_v, acc_1, acc_5 = sess.run(
                                [loss, accuracy, top5_accuracy],
                                feed_dict={
                                    x: x_v,
                                    labels: y_v
                                })
                            print("\r\ttest steps: {}/{}".format(
                                step, steps_per_epoch),
                                  end="")
                            loss_test += loss_v
                            acc_1_test += acc_1
                            acc_5_test += acc_5
                        loss_test /= steps_per_epoch
                        acc_1_test /= steps_per_epoch
                        acc_5_test /= steps_per_epoch
                        log("\r\tTest: loss: {}; top1 accuracy: {:.2f} %; top5 accuracy: {:2f} %."
                            .format(loss_test, acc_1_test * 100,
                                    acc_5_test * 100),
                            flush=True)
                    finally:
                        test_gen.stop()
                # End test on the validation set
            # End training
        finally:
            gen.stop()

        if FLAGS.train_dir:
            if not os.path.exists(FLAGS.train_dir):
                subprocess.check_call("mkdir -p {}".format(FLAGS.train_dir),
                                      shell=True)
            log("Saved model to: ", saver.save(sess, FLAGS.train_dir))