def main(_): # Import data mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) training_placeholder = None x = tf.placeholder(tf.float32, shape=[None, 784]) # Define loss and optimizer y_ = tf.placeholder(tf.float32, [None, 10]) cfgs = nf.parse_cfg_from_str("") if FLAGS.cfg is not None: cfgs = nf.parse_cfg_from_file(FLAGS.cfg) with nf.fixed_scope("fixed_mlp_mnist", cfgs) as (s, training, fixed_mapping): training_placeholder = training # Using chaining writing style: res = nf.wrap(x).Dense(units=100, name="dense1").ReLU(name="relu1").Dense( units=10, name="dense2").tensor # Alternatively, you can use the normal writing style: # res = nf.Dense(nf.ReLU(nf.Dense(x, units=100, name="dense1"), name="relu1"), units=10, name="dense2") saver = nf.utils.fixed_model_saver(fixed_mapping) cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=res)) optimizer = tf.train.GradientDescentOptimizer(0.5) grads_and_vars = optimizer.compute_gradients(cross_entropy) train_step = optimizer.apply_gradients(grads_and_vars) sess = tf.InteractiveSession() tf.global_variables_initializer().run() # Restore the trained weights from snapshot saver.restore(sess, FLAGS.train_dir) # Test trained model correct_prediction = tf.equal(tf.argmax(res, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) weight_data_scales = tf.get_collection( nf.FixedKeys.FIXED_WEIGHT_DATA_SCALE) act_data_scales = tf.get_collection( nf.FixedKeys.FIXED_ACTIVATION_DATA_SCALE) acc, weight_scales, act_scales = sess.run( [accuracy, weight_data_scales, act_data_scales], feed_dict={ x: mnist.test.images, y_: mnist.test.labels, training_placeholder: False }) print("accuracy: ", acc) print("Data fix scales: ", "\n".join(["{} {}".format(*item) for item in \ zip([tensor.op.name for tensor in \ itertools.chain(weight_data_scales, act_data_scales)], itertools.chain(weight_scales, act_scales))]))
def main(_): # Import data mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) training_placeholder = None x = tf.placeholder(tf.float32, shape=[None, 784]) # Define loss and optimizer y_ = tf.placeholder(tf.float32, [None, 10]) cfgs = nf.parse_cfg_from_str("") s_cfgs = nf.parse_strategy_cfg_from_str("") if FLAGS.cfg is not None: cfgs = nf.parse_cfg_from_file(FLAGS.cfg) if FLAGS.scfg is not None: s_cfgs = nf.parse_strategy_cfg_from_file(FLAGS.scfg) with nf.fixed_scope("fixed_mlp_mnist", cfgs, s_cfgs) as (s, training, _): training_placeholder = training # Using chaining writing style: res = nf.wrap(x).Dense(units=100).ReLU().Dense(units=10).tensor # Alternatively, you can use the normal writing style: # res = nf.Dense(nf.ReLU(nf.Dense(x, units=100)), units=10) cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=res)) optimizer = tf.train.GradientDescentOptimizer(0.5) grads_and_vars = optimizer.compute_gradients(cross_entropy) train_step = optimizer.apply_gradients(grads_and_vars) sess = tf.InteractiveSession() tf.global_variables_initializer().run() # Train for _ in range(1000): batch_xs, batch_ys = mnist.train.next_batch(100) sess.run(train_step, feed_dict={ x: batch_xs, y_: batch_ys, training_placeholder: True }) # Test trained model correct_prediction = tf.equal(tf.argmax(res, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print( sess.run(accuracy, feed_dict={ x: mnist.test.images, y_: mnist.test.labels, training_placeholder: False }))
def main(_): batch_size = FLAGS.batch_size # default to 128 num_classes = 10 # The data, shuffled and split between train and test sets: (x_train, y_train), (x_test, y_test) = cifar10.load_data() log(x_train.shape[0], " train samples") log(x_test.shape[0], " test samples") # Convert class vectors to binary class matrices. y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) x_train = x_train.astype("float32") x_test = x_test.astype("float32") x_train /= 255 x_test /= 255 # Construct the model cfgs = nf.parse_cfg_from_str("") if FLAGS.cfg is not None: cfgs = nf.parse_cfg_from_file(FLAGS.cfg) s_cfgs = nf.parse_strategy_cfg_from_str("") if FLAGS.scfg is not None: s_cfgs = nf.parse_strategy_cfg_from_file(FLAGS.scfg) x = tf.placeholder(tf.float32, shape=[None] + list(x_train.shape[1:])) labels = tf.placeholder(tf.float32, [None, num_classes]) weight_decay = FLAGS.weight_decay with nf.fixed_scope("fixed_mlp_mnist", cfgs, s_cfgs) as (s, training, fixed_mapping): training_placeholder = training # Using chaining writing style: logits = globals()[FLAGS.model](x, num_classes, training, weight_decay).tensor # Construct the fixed saver saver = nf.utils.fixed_model_saver(fixed_mapping) # Loss and metrics cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits)) reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) loss = cross_entropy + tf.add_n(reg_losses) if reg_losses else cross_entropy index_label = tf.argmax(labels, 1) correct = tf.equal(tf.argmax(logits, 1), index_label) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) top5_correct = tf.nn.in_top_k(logits, index_label, 5) top5_accuracy = tf.reduce_mean(tf.cast(top5_correct, tf.float32)) # Initialize the optimizer global_step = tf.Variable(0, name="global_step", trainable=False) # Learning rate is multiplied by 0.5 after training for every 30 epochs learning_rate = tf.train.exponential_decay(0.05, global_step=global_step, decay_steps=int(x_train.shape[0] / batch_size * 30), decay_rate=0.5, staircase=True) # FIXME: momentum也不能要了... optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9) train_step = optimizer.minimize(loss, global_step=global_step) # Scales and summary op weight_tensor_names, weight_data_scales, weight_grad_scales, \ weight_data_cfgs, weight_grad_cfgs = zip(*sorted([(k.op.name, v["q_data_scale"], v["q_grad_scale"], v["data_cfg"], v["grad_cfg"]) for k, v in fixed_mapping[nf.DataTypes.WEIGHT].iteritems()], key=lambda x: x[0])) weight_data_names = [t.op.name for t in weight_data_scales] weight_grad_names = [t.op.name for t in weight_grad_scales] wd_bit_widths = [c.bit_width for c in weight_data_cfgs] wg_bit_widths = [c.bit_width for c in weight_grad_cfgs] wg_scales_values_min = None wg_scales_values_max = None # Summary weight data/grad scales for t in weight_data_scales: ind = t.op.name.index("/data") summary_name = t.op.name[:ind].replace("/", "_") + t.op.name[ind:] tf.summary.scalar(summary_name, t) for t in weight_grad_scales: ind = t.op.name.index("/grad") summary_name = t.op.name[:ind].replace("/", "_") + t.op.name[ind:] tf.summary.scalar(summary_name, t) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.summary_dir) log("Using real-time data augmentation.") # This will do preprocessing and realtime data augmentation: datagen_train = ImageDataGenerator( featurewise_center=True, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=True, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range=0, # randomly shift images horizontally (fraction of total width) height_shift_range=0, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images datagen_test = ImageDataGenerator( featurewise_center=True, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=True, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range=0, # randomly shift images horizontally (fraction of total width) height_shift_range=0, # randomly shift images vertically (fraction of total height) horizontal_flip=False, # randomly flip images vertical_flip=False) # randomly flip images # Compute quantities required for feature-wise normalization # (std, mean, and principal components if ZCA whitening is applied). datagen_train.fit(x_train) datagen_test.fit(x_test) random_crop = RandomCrop(32, 4) # padding 4 and crop 32x32 steps_per_epoch = x_train.shape[0] // batch_size total_iters = FLAGS.epochs * steps_per_epoch config = tf.ConfigProto() config.gpu_options.allow_growth=True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) if FLAGS.load_from is not None: log("Loading checkpoint from {}".format(FLAGS.load_from)) saver.restore(sess, FLAGS.load_from) log("Start training...") # Training gen = MultiProcessGen(datagen_train.flow(x_train, y_train, batch_size=batch_size)) iter_ = 0 try: for epoch in range(1, FLAGS.epochs+1): start_time = time.time() loss_v_epoch = 0 acc_1_epoch = 0 acc_5_epoch = 0 # Train batches for step in range(1, steps_per_epoch+1): # TODO: use another thread to execute the data augumentation and enqueue x_v, y_v = next(gen) x_crop_v = random_crop(x_v) _, loss_v, acc_1, acc_5, wd_scales_v, wg_scales_v, summary = sess.run([train_step, loss, accuracy, top5_accuracy, weight_data_scales, weight_grad_scales, summary_op], feed_dict={ x: x_crop_v, labels: y_v }) print("\rEpoch {}: steps {}/{}".format(epoch, step, steps_per_epoch), end="") loss_v_epoch += loss_v acc_1_epoch += acc_1 acc_5_epoch += acc_5 iter_ += 1 if iter_ > total_iters - FLAGS.weight_grad_iters: if wg_scales_values_min is None: wg_scales_values_min = np.array(wg_scales_v, dtype=np.int) wg_scales_values_max = np.array(wg_scales_v, dtype=np.int) else: wg_scales_values_min = np.minimum(wg_scales_values_min, wg_scales_v) wg_scales_values_max = np.maximum(wg_scales_values_max, wg_scales_v) summary_writer.add_summary(summary, iter_) loss_v_epoch /= steps_per_epoch acc_1_epoch /= steps_per_epoch acc_5_epoch /= steps_per_epoch duration = time.time() - start_time sec_per_batch = duration / (steps_per_epoch * batch_size) log("\r{}: Epoch {}; (average) loss: {:.3f}; (average) top1 accuracy: {:.2f} %; (average) top5 accuracy: {:.2f} %. {:.3f} sec/batch"\ .format(datetime.now(), epoch, loss_v_epoch, acc_1_epoch * 100, acc_5_epoch * 100, sec_per_batch), flush=True) # End training batches # Test on the validation set if epoch % FLAGS.test_frequency == 0: test_gen = MultiProcessGen(datagen_test.flow(x_test, y_test, batch_size=batch_size)) steps_per_epoch_test = x_test.shape[0] // batch_size loss_test = 0 acc_1_test = 0 acc_5_test = 0 try: for step in range(1, steps_per_epoch_test+1): x_v, y_v = next(test_gen) loss_v, acc_1, acc_5 = sess.run([loss, accuracy, top5_accuracy], feed_dict={ x: x_v, labels: y_v }) print("\r\ttest steps: {}/{}".format(step, steps_per_epoch_test), end="") loss_test += loss_v acc_1_test += acc_1 acc_5_test += acc_5 loss_test /= steps_per_epoch_test acc_1_test /= steps_per_epoch_test acc_5_test /= steps_per_epoch_test log("\r\tTest: loss: {}; top1 accuracy: {:.2f} %; top5 accuracy: {:2f} %.".format(loss_test, acc_1_test * 100, acc_5_test * 100), flush=True) finally: test_gen.stop() # End test on the validation set # End training finally: gen.stop() final_wg_scales_values, final_wg_buffer_scales_values = nf.amend_weight_grad_scales(learning_rate, wg_scales_values_min, wg_scales_values_max, weight_data_scales, weight_grad_scales, wd_bit_widths, wg_bit_widths, grad_buffer_width=FLAGS.grad_buffer_width) for wtn, gs, wgs_v, wgbs_v in zip(weight_tensor_names, weight_grad_scales, final_wg_scales_values, final_wg_buffer_scales_values): print("{:40}: final gradient fixed scale: {:>4d}; gradient buffer scale: {:>4d}".format(wtn, int(wgs_v), int(wgbs_v))) sess.run(tf.assign(gs, wgs_v)) # TODO: dump weight saving buffer strategy by name if FLAGS.save_strategy: strategy_config_dct = { "by_name": { n.split("/")[-2]: { "name": "weightgradsaver_strategy", "scale": int(v), "bit_width": FLAGS.grad_buffer_width } for n, v in zip(weight_tensor_names, final_wg_buffer_scales_values) } } with open(FLAGS.save_strategy, "w") as f: yaml.dump(strategy_config_dct, f) log("Dump hardware straetgy file to {}".format(FLAGS.save_strategy)) if FLAGS.train_dir: if not os.path.exists(FLAGS.train_dir): subprocess.check_call("mkdir -p {}".format(FLAGS.train_dir), shell=True) log("Saved model to: ", saver.save(sess, FLAGS.train_dir))
def main(_): batch_size = FLAGS.batch_size # default to 128 num_classes = 10 # The data, shuffled and split between train and test sets: (x_train, y_train), (x_test, y_test) = cifar10.load_data() log(x_train.shape[0], " train samples") log(x_test.shape[0], " test samples") # Convert class vectors to binary class matrices. y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) x_train = x_train.astype("float32") x_test = x_test.astype("float32") x_train /= 255 x_test /= 255 # Construct the model cfgs = nf.parse_cfg_from_str("") if FLAGS.cfg is not None: cfgs = nf.parse_cfg_from_file(FLAGS.cfg) x = tf.placeholder(tf.float32, shape=[None] + list(x_train.shape[1:])) labels = tf.placeholder(tf.float32, [None, num_classes]) weight_decay = FLAGS.weight_decay with nf.fixed_scope("fixed_mlp_mnist", cfgs) as (s, training, fixed_mapping): training_placeholder = training # Using chaining writing style: logits = globals()[FLAGS.model](x, num_classes, training, weight_decay).tensor # Construct the fixed saver saver = nf.utils.fixed_model_saver(fixed_mapping) # Loss and metrics cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits)) reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) loss = cross_entropy + tf.add_n(reg_losses) index_label = tf.argmax(labels, 1) correct = tf.equal(tf.argmax(logits, 1), index_label) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) top5_correct = tf.nn.in_top_k(logits, index_label, 5) top5_accuracy = tf.reduce_mean(tf.cast(top5_correct, tf.float32)) # Initialize the optimizer global_step = tf.Variable(0, name="global_step", trainable=False) # Learning rate is multiplied by 0.5 after training for every 30 epochs learning_rate = tf.train.exponential_decay( 0.05, global_step=global_step, decay_steps=int(x_train.shape[0] / batch_size * 30), decay_rate=0.5, staircase=True) optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9) train_step = optimizer.minimize(loss, global_step=global_step) log("Using real-time data augmentation.") # This will do preprocessing and realtime data augmentation: datagen_train = ImageDataGenerator( featurewise_center=True, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= True, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range= 0, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0, # randomly shift images horizontally (fraction of total width) height_shift_range= 0, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images datagen_test = ImageDataGenerator( featurewise_center=True, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= True, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range= 0, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0, # randomly shift images horizontally (fraction of total width) height_shift_range= 0, # randomly shift images vertically (fraction of total height) horizontal_flip=False, # randomly flip images vertical_flip=False) # randomly flip images # Compute quantities required for feature-wise normalization # (std, mean, and principal components if ZCA whitening is applied). datagen_train.fit(x_train) datagen_test.fit(x_test) random_crop = RandomCrop(32, 4) # padding 4 and crop 32x32 config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) log("Start training...") # Training gen = MultiProcessGen( datagen_train.flow(x_train, y_train, batch_size=batch_size)) try: for epoch in range(1, FLAGS.epochs + 1): start_time = time.time() steps_per_epoch = x_train.shape[0] // batch_size loss_v_epoch = 0 acc_1_epoch = 0 acc_5_epoch = 0 # Train batches for step in range(1, steps_per_epoch + 1): # TODO: use another thread to execute the data augumentation and enqueue x_v, y_v = next(gen) x_crop_v = random_crop(x_v) _, loss_v, acc_1, acc_5 = sess.run( [train_step, loss, accuracy, top5_accuracy], feed_dict={ x: x_crop_v, labels: y_v }) print("\rEpoch {}: steps {}/{}".format( epoch, step, steps_per_epoch), end="") loss_v_epoch += loss_v acc_1_epoch += acc_1 acc_5_epoch += acc_5 loss_v_epoch /= steps_per_epoch acc_1_epoch /= steps_per_epoch acc_5_epoch /= steps_per_epoch duration = time.time() - start_time sec_per_batch = duration / (steps_per_epoch * batch_size) log("\r{}: Epoch {}; (average) loss: {:.3f}; (average) top1 accuracy: {:.2f} %; (average) top5 accuracy: {:.2f} %. {:.3f} sec/batch"\ .format(datetime.now(), epoch, loss_v_epoch, acc_1_epoch * 100, acc_5_epoch * 100, sec_per_batch), flush=True) # End training batches # Test on the validation set if epoch % FLAGS.test_frequency == 0: test_gen = MultiProcessGen( datagen_test.flow(x_test, y_test, batch_size=batch_size)) steps_per_epoch = x_test.shape[0] // batch_size loss_test = 0 acc_1_test = 0 acc_5_test = 0 try: for step in range(1, steps_per_epoch + 1): x_v, y_v = next(test_gen) loss_v, acc_1, acc_5 = sess.run( [loss, accuracy, top5_accuracy], feed_dict={ x: x_v, labels: y_v }) print("\r\ttest steps: {}/{}".format( step, steps_per_epoch), end="") loss_test += loss_v acc_1_test += acc_1 acc_5_test += acc_5 loss_test /= steps_per_epoch acc_1_test /= steps_per_epoch acc_5_test /= steps_per_epoch log("\r\tTest: loss: {}; top1 accuracy: {:.2f} %; top5 accuracy: {:2f} %." .format(loss_test, acc_1_test * 100, acc_5_test * 100), flush=True) finally: test_gen.stop() # End test on the validation set # End training finally: gen.stop() if FLAGS.train_dir: if not os.path.exists(FLAGS.train_dir): subprocess.check_call("mkdir -p {}".format(FLAGS.train_dir), shell=True) log("Saved model to: ", saver.save(sess, FLAGS.train_dir))