Ejemplo n.º 1
0
def tower_loss(scope, images, labels):
    """Calculate the total loss on a single tower running the AlexNet model.

    Args:
      scope: unique prefix string identifying the AlexNet tower, e.g. 'tower_0'
      images: Images. 4D tensor of shape [batch_size, height, width, 3].
      labels: Labels. 1D tensor of shape [batch_size].

    Returns:
       Tensor of shape [] containing the total loss for a batch of data
    """

    # Build inference Graph.
    logits = alexnet.inference(images)

    # Build the portion of the Graph calculating the losses. Note that we will
    # assemble the total_loss using a custom function below.
    _ = alexnet.loss(logits, labels)

    # Assemble all of the losses for the current tower only.
    losses = tf.get_collection('losses', scope)

    # Calculate the total loss for the current tower.
    total_loss = tf.add_n(losses, name='total_loss')

    # Attach a scalar summary to all individual losses and the total loss; do the
    # same for the averaged version of the losses.
    for l in losses + [total_loss]:
        # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
        # session. This helps the clarity of presentation on tensorboard.
        loss_name = re.sub('%s_[0-9]*/' % alexnet.TOWER_NAME, '', l.op.name)
        tf.summary.scalar(loss_name, l)

    return total_loss
Ejemplo n.º 2
0
def train():
    with tf.Graph().as_default():
        global_step = tf.train.get_or_create_global_step()

        with tf.device('/cpu:0'):
            dataset = input_fn(FLAGS.batch_size)
            iterator = dataset.make_one_shot_iterator()
            next_examples, next_labels = iterator.get_next()

        # Build a Graph computing logits prediction from the
        # inference model
        logits = alexnet.inference(next_examples['image_data'])

        # Calculate loss
        loss = alexnet.loss(logits, next_labels)

        # Build a Graph training the model with one batch of examples and
        # updating the model parameters
        train_op = alexnet.train(loss, global_step)

        class _LoggerHook(tf.train.SessionRunHook):
            """Logs loss and runtime"""
            def begin(self):
                self._step = -1
                self._start_time = time.time()

            def before_run(self, run_context):
                self._step += 1
                return tf.train.SessionRunArgs(loss)  # Asks for loss value

            def after_run(self, run_context, run_values):
                if self._step % FLAGS.log_frequency == 0:
                    current_time = time.time()
                    duration = current_time - self._start_time
                    self._start_time = current_time

                    loss_value = run_values.results
                    example_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
                    sec_per_batch = float(duration / FLAGS.log_frequency)

                    print(
                        f'{datetime.now()}: step {self._step}, loss = {loss_value:.2f} '
                        f'({example_per_sec:.1f} examples/sec; {sec_per_batch:.3f} sec/batch)'
                    )

        # Automatically initializes and/or restores variables before returning
        # MonitoredSession.run() automatically recovers from PS failure,
        # and can run additional code in hooks
        with tf.train.MonitoredTrainingSession(
                checkpoint_dir=FLAGS.train_dir,
                hooks=[
                    tf.train.StopAtStepHook(last_step=FLAGS.max_step),
                    tf.train.NanTensorHook(loss),
                    _LoggerHook()
                ],
                config=tf.ConfigProto(
                    log_device_placement=False,
                    gpu_options=tf.GPUOptions(allow_growth=True))) as mon_sess:
            while not mon_sess.should_stop():
                mon_sess.run(train_op)
Ejemplo n.º 3
0
def evaluate(images, checkpoint_dir):
	weights = weight_variable()
	biases = bias_variable()
	x = tf.placeholder(tf.float32, [None, 227, 227, 3])
	y = tf.placeholder(tf.float32, [None, num_classes])
	keep_prob = tf.placeholder(tf.float32)

	pred = inference(x, weights, biases, keep_prob)

	saver = tf.train.Saver()
	with tf.Session() as sess:
		ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
		if ckpt and ckpt.model_checkpoint_path:
			print 'load model %s' %(ckpt.model_checkpoint_path)
			saver.restore(sess, ckpt.model_checkpoint_path)
		else:
			print('No checkpoint file found at %s' % checkpoint_dir)
		output = sess.run(pred, feed_dict={x: images, keep_prob: 1.})
	return output
Ejemplo n.º 4
0
def evalate_by_class(data_dir, checkpoint_dir=None, model_checkpoint_path=None):
	weights = weight_variable()
	biases = bias_variable()
	x = tf.placeholder(tf.float32, [None, 227, 227, 3])
	y = tf.placeholder(tf.float32, [None, num_classes])
	keep_prob = tf.placeholder(tf.float32)

	pred = inference(x, weights, biases, keep_prob)

	saver = tf.train.Saver()
	with tf.Session() as sess:
		if checkpoint_dir:
			ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
			if ckpt and ckpt.model_checkpoint_path:
				print 'load latest model %s' %(ckpt.model_checkpoint_path)
				saver.restore(sess, ckpt.model_checkpoint_path)
			else:
				print('No checkpoint file found at %s' % checkpoint_dir)
				return
		elif model_checkpoint_path:
			print 'load model %s' %(model_checkpoint_path)
			saver.restore(sess, model_checkpoint_path)
		else:
			return

		total_true = 0
		total_examples = 0
		for name in class_names:
			dataset = LazyDataSet(data_dir=data_dir, label=name, batch_size=50)
			true_count = 0
			for i in range(dataset.num_epochs):
				images, labels = dataset.next_batch()
				output = sess.run(pred, feed_dict={x: images, keep_prob: 1.})

				batch_true_count = eval_once(output, labels)
				true_count += batch_true_count
				print('step:%d, %d/%d, accuracy: %g' %(i, batch_true_count, dataset.batch_size, batch_true_count * 1.0 / dataset.batch_size))
			num_examples = dataset.num_epochs * dataset.batch_size
			print('class:%s, %d/%d, accuracy: %g' %(name, true_count, num_examples, true_count * 1.0 / num_examples))
			total_true += true_count
			total_examples += num_examples
		print('%d/%d, total accuracy: %g' %(total_true, total_examples, total_true * 1.0 / total_examples))
Ejemplo n.º 5
0
def main(argv=None):

    # test data input
    test_file = tf.train.match_filenames_once(
        "../cifar-10-data/eval.tfrecords")
    test_dataset = tf.data.TFRecordDataset(test_file)
    test_dataset = test_dataset.map(alexnet.test_parser)
    test_dataset = test_dataset.batch(1000)
    test_dataset = test_dataset.repeat(None)
    test_iterator = test_dataset.make_initializable_iterator()
    test_image, test_label = test_iterator.get_next()

    # test
    test_logits = alexnet.inference(test_image, False)
    test_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=test_logits, labels=test_label)
    test_loss = tf.reduce_mean(test_cross_entropy)
    correct_prediction = tf.equal(
        tf.argmax(test_logits, -1, output_type=tf.int32), test_label)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run([
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
        ])
        sess.run(test_iterator.initializer)
        ckpt = tf.train.get_checkpoint_state(alexnet_train.MODEL_SAVE_PATH)
        if ckpt and ckpt.model_checkpoint_path:
            # for i in range(10):
            saver.restore(sess, ckpt.model_checkpoint_path)
            global_step = ckpt.model_checkpoint_path.split('/')[-1].split(
                '-')[-1]
            test_loss_value, accuracy_score = sess.run([test_loss, accuracy])
            print("steps: %s, test_loss: %g, accuracy: %g" %
                  (global_step, test_loss_value, accuracy_score))
        else:
            print('No checkpoint file found')
            return
Ejemplo n.º 6
0
def main(argv=None):
	# train data input
	train_file = tf.train.match_filenames_once("../cifar-10-data/train.tfrecords")
	train_dataset = tf.data.TFRecordDataset(train_file)
	train_dataset = train_dataset.map(alexnet.train_parser)
	train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER).batch(BATCH_SIZE)
	train_dataset = train_dataset.repeat(None)
	train_iterator = train_dataset.make_initializable_iterator()
	train_image, train_label = train_iterator.get_next()

	# train
	y = alexnet.inference(train_image, True)
	global_step = tf.Variable(0, trainable=False)
	cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=train_label)
	cross_entropy_mean = tf.reduce_mean(cross_entropy)
	tf.add_to_collection('losses', cross_entropy_mean)
	loss = tf.add_n(tf.get_collection('losses'))
	correct_prediction = tf.equal(tf.argmax(y, -1, output_type=tf.int32), train_label)
	accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
	learning_rate = tf.train.exponential_decay(
		LEARNING_RATE_BASE,
		global_step,
		DECAY_STEPS, 
		LEARNING_RATE_DECAY,
		staircase=True)
	train_step = tf.train.MomentumOptimizer(learning_rate, MOMENTUM).minimize(loss, global_step=global_step)
	
	saver = tf.train.Saver()

	with tf.Session() as sess:
		sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
		sess.run(train_iterator.initializer)
		f = open('result/result2.txt', 'a')
		for i in range(TRAINING_STEPS):
			_, loss_value, step, accuracy_score = sess.run([train_step, loss, global_step, accuracy])

			if i % 100 == 0:
				print("steps: %d, loss: %g, accuracy: %g" % (step, loss_value, accuracy_score))
				f.write("%d\t%f\t%f\n" % (step, loss_value, accuracy_score))
				saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)
		f.close()
Ejemplo n.º 7
0
data = load_datasets()

learning_rate = 0.05
dropout = 0.8
training_iters = 20000
batch_size = 64
display_step = 10

x = tf.placeholder(tf.float32, [None, 227, 227, 3])
y = tf.placeholder(tf.float32, [None, num_classes])
keep_prob = tf.placeholder(tf.float32)

weights, biases = parameters()

pred = inference(x, weights, biases, keep_prob)

# cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
# optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

softmax_linear = tf.nn.softmax(pred)
cost = tf.reduce_mean(
    -tf.reduce_sum(y * tf.log(softmax_linear + 1e-10), reduction_indices=[1]))

# softmax_linear =  = tf.nn.log_softmax(pred)
# cost = -tf.reduce_sum(y * softmax_linear)

optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(cost)

correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
Ejemplo n.º 8
0
num_classes = 2
# 需要重新训练的层
train_layers = ['fc8', 'fc7', 'fc6']

# 读取本地图片,制作自己的训练集,返回image_batch,label_batch
train, train_label = input_data.get_files(train_dir)
x, y = input_data.get_batch(train, train_label, image_size, image_size, batch_size, 2000)

# 用于计算图输入和输出的TF占位符,每次读取一小部分数据作为当前的训练数据来执行反向传播算法
# x =tf.placeholder(tf.float32,[batch_size,227,227,3],name='x-input')
# y =tf.placeholder(tf.float32,[batch_size,num_classes])
keep_prob = tf.placeholder(tf.float32)

# 定义神经网络结构,初始化模型
# model = AlexNet(x, keep_prob, num_classes, train_layers)
score = inference(x)
# 获得神经网络前向传播的输出
# score = model.fc8

# 获得想要训练的层的可训练变量列表
var_list = [v for v in tf.trainable_variables() if v.name.split('/')[0] in train_layers]

# 定义损失函数,获得loss
with tf.name_scope("cross_ent"):
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=score, labels=y))

# 定义反向传播算法(优化算法)
with tf.name_scope("train"):
    # 获得所有可训练变量的梯度
    gradients = tf.gradients(loss, var_list)
    gradients = list(zip(gradients, var_list))
Ejemplo n.º 9
0
'''
MNIST number recognization Task.
1.Read MNIST data
2.Design Alexnet
3.Do training
4.Get training info
    - Validation accuracy for every 5 step.(Batch-normalization)
    - Get final accuracy
    - Add name and group for tensorboard
'''
import tensorflow as tf
import alexnet
from tensorflow.examples.tutorials.mnist import input_data



class MNISTRecognition:
    def __init__(self):
        return

    def inference(self, input):
        return



if __name__ == '__main__':
    mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
    alexnet.inference()