예제 #1
0
def train(is_ft=False):
    with tf.Graph().as_default():
        with tf.variable_scope("model") as scope:
            root_path = "tfData/part"
            train_queue = list()
            for part_index in range(1, 10):
                train_queue.append(root_path + str(part_index) + '.tfrecords')
            images, label = decode_from_tfrecords(train_queue, batch_size,
                                                  image_height, image_width)
            images = tf.py_func(cv_resize, [images, image_height, image_width],
                                tf.float32)
            images = tf.reshape(images,
                                [batch_size, image_height, image_width, 1])
            logits = inference(images) + images
            logits = tf.clip_by_value(logits, 0, 255)
            loss = tf.losses.mean_squared_error(logits, label)
            reg_loss = tf.add_n(tf.losses.get_regularization_losses())
            total_loss = loss

            opt = tf.train.AdamOptimizer(1e-4)
            global_step = tf.Variable(0, name='global_step', trainable=False)
            train_op = slim.learning.create_train_op(total_loss,
                                                     opt,
                                                     global_step=global_step)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            if update_ops:
                updates = tf.group(*update_ops)
                total_loss = control_flow_ops.with_dependencies([updates],
                                                                total_loss)

            saver = tf.train.Saver(tf.all_variables())
            init = tf.initialize_all_variables()

            sess = tf.Session(config=tf.ConfigProto(
                log_device_placement=False))
            sess.run(init)

            tf.train.start_queue_runners(sess=sess)

            if is_ft:
                model_file = tf.train.latest_checkpoint('./model')
                saver.restore(sess, model_file)
            tf.logging.set_verbosity(tf.logging.INFO)
            loss_cnt = 0.0

            for step in range(max_iters):
                _, loss_value, l = sess.run([train_op, loss, logits])
                assert not np.isnan(
                    loss_value), 'Model diverged with loss = NaN'
                if step % 10 == 0:
                    print l[0]

                loss_cnt += loss_value
                if step % 100 == 0:
                    format_str = ('%s: step %d, loss = %.2f')
                    print(format_str % (datetime.now(), step, loss_cnt / 10.0))
                    loss_cnt = 0.0
                if step % 500 == 0 or (step + 1) == max_iters:
                    checkpoint_path = os.path.join('../model', 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=step)
예제 #2
0
def train(is_ft=False):
    with tf.Graph().as_default():
        with tf.variable_scope("model") as scope:
#            train_queue = ["train_data2.tfrecords"]
            train_queue = ["train_data.tfrecords"]
            images, labels = decode_from_tfrecords(train_queue,128)
            logits = tiny_darknet(images)
            logits = tf.nn.softmax(tf.reduce_mean(logits,[1,2]))
            loss =  tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)
            reg_loss = tf.add_n(tf.losses.get_regularization_losses())
            total_loss = tf.reduce_mean(loss)+reg_loss
            opt = tf.train.MomentumOptimizer(0.01,0.9)
            global_step = tf.Variable(0, name='global_step', trainable=False)

            train_op = slim.learning.create_train_op(total_loss, opt, global_step=global_step)


            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            if update_ops:
                updates = tf.group(*update_ops)
                total_loss = control_flow_ops.with_dependencies([updates], total_loss)

            saver = tf.train.Saver(tf.all_variables())
            init = tf.initialize_all_variables()
#            sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
            sess = tf.Session()
            sess.run(init)

            tf.train.start_queue_runners(sess=sess)

            if is_ft:#if not train model
#                model_file=tf.train.latest_checkpoint('./model_max')
                model_file=tf.train.latest_checkpoint('/root/JZ_test/darknet0_model')
                saver.restore(sess, model_file)
            #is_ft = False
            tf.logging.set_verbosity(tf.logging.INFO)    
            loss_cnt = 0.0
            loss_flag = 999.0
            for step in range(max_iters):
                _, loss_value = sess.run([train_op, total_loss])
                assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
                loss_cnt+=loss_value
                if step % 10 == 0:
                    format_str = ('%s: step %d, loss = %.2f')
                    if step == 0:
                        avg_loss_cnt = loss_cnt
                    else:
                        avg_loss_cnt = loss_cnt/10.0
                    print(format_str % (datetime.now(), step, avg_loss_cnt))
                    loss_cnt = 0.0
                if step % 200 == 0 or (step + 1) == max_iters:
#                if step % 50 == 0 or (step + 1) == max_iters:
#                    checkpoint_path = os.path.join('/root/classify/model', 'model.ckpt')
                    checkpoint_path = os.path.join('/root/JZ_test/darknet0_model', 'model.ckpt')#save model path
                    saver.save(sess, checkpoint_path, global_step=step)
예제 #3
0
def train(is_ft=False):
    with tf.Graph().as_default():
        with tf.variable_scope("model") as scope:
            #            train_queue = ["train_data2.tfrecords"]
            train_queue = ["train_lj.tfrecords"]
            images, labels = decode_from_tfrecords(train_queue, 128)
            logits = tiny_darknet(images)
            #            tf.summary.image('iuput', images)
            #            logits = tf.nn.softmax(tf.reduce_mean(logits,[1,2]))
            logits = tf.reduce_mean(logits, [1, 2])
            #            print logits.get_shape().as_list()
            #            loss =  tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)
            loss = tf.nn.weighted_cross_entropy_with_logits(targets=labels,
                                                            logits=logits,
                                                            pos_weight=1.5)
            reg_loss = tf.add_n(tf.losses.get_regularization_losses())
            #            with tf.name_scope('total_loss'):
            total_loss = tf.reduce_mean(loss) + reg_loss
            ##################################################################
            thre = 0.9
            accuracy_ = tf.placeholder(tf.float32)
            logist_acc = tf.nn.sigmoid(logits)
            tf.summary.scalar('total_loss', total_loss)
            tf.summary.scalar('accuracy', accuracy_)
            ###################################################################

            opt = tf.train.MomentumOptimizer(0.5, 0.9)
            global_step = tf.Variable(0, name='global_step', trainable=False)

            #            learning_rate = tf.train.exponential_decay(0.1, global_step, 10200, 0.35, staircase=True)
            #            min_lr= tf.constant(0.00001, name='min_lr')
            #            if learning_rate<0.00001:
            #                learning_rate=0.00001
            #            learning_rate = tf.Session.run(tf.where(tf.greater(min_lr, learning_rate), min_lr, learning_rate))
            #            opt = tf.train.MomentumOptimizer(learning_rate,0.9)
            #            opt.minimize(total_loss, global_step=global_step)

            train_op = slim.learning.create_train_op(total_loss,
                                                     opt,
                                                     global_step=global_step)

            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            if update_ops:
                updates = tf.group(*update_ops)
                total_loss = control_flow_ops.with_dependencies([updates],
                                                                total_loss)

            saver = tf.train.Saver(tf.all_variables(), max_to_keep=400)
            init = tf.initialize_all_variables()
            sess = tf.Session(config=tf.ConfigProto(
                log_device_placement=False))
            sess = tf.Session()
            merged = tf.summary.merge_all()
            train_writer = tf.summary.FileWriter(
                '/root/linjian/darknet_0/models/try-linjian/lj_data/A-0/loss_wd4e5_lj-0.5',
                sess.graph)
            sess.run(init)

            tf.train.start_queue_runners(sess=sess)

            if is_ft:  #if not train model
                #                model_file=tf.train.latest_checkpoint('./model_max')
                #####################################################################################
                #                model_file=tf.train.latest_checkpoint('./models/finetune/lr0.1_wd4e5')
                #                saver.restore(sess, model_file)
                model_file = tf.train.get_checkpoint_state(
                    './models/try-linjian/JZ_data/0_lj-wd8e5-0.01')
                saver.restore(sess, model_file.all_model_checkpoint_paths[-1])
#####################################################################################
#            if learning_rate<0.00001:
#                learning_rate=0.00001
#is_ft = False
#            ckpt = tf.train.get_checkpoint_state('./models')
#            if ckpt and ckpt.model_checkpoint_path:
#                model_file=tf.train.latest_checkpoint('./models')
#                saver.restore(sess, model_file)
            tf.logging.set_verbosity(tf.logging.INFO)
            loss_cnt = 0.0
            loss_flag = 999.0
            acc_batch = 0.0
            for step in range(max_iters):
                #                _, loss_value = sess.run([train_op, total_loss])
                _, loss_value, acc, gt = sess.run(
                    [train_op, total_loss, logist_acc, labels])
                #################################################
                for i in range(128):
                    if acc[i][0] >= thre:
                        predict = 0
                    else:
                        predict = 1
                    if predict == gt[i]:
                        acc_batch += 1


##################################################
                assert not np.isnan(
                    loss_value), 'Model diverged with loss = NaN'
                loss_cnt += loss_value
                if step % 10 == 0:
                    format_str = ('%s: step %d, loss = %.4f, acc = %.4f')
                    if step == 0:
                        avg_loss_cnt = loss_cnt
                    else:
                        avg_loss_cnt = loss_cnt / 10.0
                    accuracy = acc_batch / float(1280)
                    #                    tf.summary.scalar('accuracy', accuracy)
                    #                    merged = tf.summary.merge_all()
                    summary_str = sess.run(merged,
                                           feed_dict={accuracy_: accuracy})
                    train_writer.add_summary(summary_str, step)
                    print(format_str %
                          (datetime.now(), step, avg_loss_cnt, accuracy))
                    #                    print("The accuracy is :"+str(accuracy))
                    acc_batch = 0.0
                    loss_cnt = 0.0
                if step % 50 == 0 or (step + 1) == max_iters:
                    #                if step % 50 == 0 or (step + 1) == max_iters:
                    #                    checkpoint_path = os.path.join('/root/classify/model', 'dp15_model.ckpt')
                    checkpoint_path = os.path.join(
                        '/root/linjian/darknet_0/models/try-linjian/lj_data/A-0/loss_wd4e5_lj-0.5',
                        'model.ckpt')  #save model path
                    saver.save(sess, checkpoint_path, global_step=step)
            train_writer.close()
예제 #4
0
def train(is_ft=True):
    with tf.Graph().as_default():
        with tf.variable_scope("model") as scope:
            #            train_queue = ["train_data2.tfrecords"]
            train_queue = ["train_quarter.tfrecords"]
            images, labels = decode_from_tfrecords(train_queue, 128)
            logits = tiny_darknet(images)
            tf.summary.image('iuput', images)
            #            logits = tf.nn.softmax(tf.reduce_mean(logits,[1,2]))
            logits = tf.reduce_mean(logits, [1, 2])
            print logits.get_shape().as_list()
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=labels, logits=logits)
            reg_loss = tf.add_n(tf.losses.get_regularization_losses())
            #            with tf.name_scope('total_loss'):
            total_loss = tf.reduce_mean(loss) + reg_loss
            tf.summary.scalar('total_loss', total_loss)

            opt = tf.train.MomentumOptimizer(0.01, 0.9)
            global_step = tf.Variable(0, name='global_step', trainable=False)

            #            learning_rate = tf.train.exponential_decay(0.1, global_step, 10200, 0.35, staircase=True)
            #            min_lr= tf.constant(0.00001, name='min_lr')
            #            if learning_rate<0.00001:
            #                learning_rate=0.00001
            #            learning_rate = tf.Session.run(tf.where(tf.greater(min_lr, learning_rate), min_lr, learning_rate))
            #            opt = tf.train.MomentumOptimizer(learning_rate,0.9)
            #            opt.minimize(total_loss, global_step=global_step)

            train_op = slim.learning.create_train_op(total_loss,
                                                     opt,
                                                     global_step=global_step)

            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            if update_ops:
                updates = tf.group(*update_ops)
                total_loss = control_flow_ops.with_dependencies([updates],
                                                                total_loss)

            saver = tf.train.Saver(tf.all_variables(), max_to_keep=50)
            init = tf.initialize_all_variables()
            sess = tf.Session(config=tf.ConfigProto(
                log_device_placement=False))
            sess = tf.Session()
            merged = tf.summary.merge_all()
            train_writer = tf.summary.FileWriter(
                '/root/linjian/darknet_0/models/lr0.01_iter30w_qnew/lr0.01',
                sess.graph)
            sess.run(init)

            tf.train.start_queue_runners(sess=sess)

            if is_ft:  #if not train model
                #                model_file=tf.train.latest_checkpoint('./model_max')
                model_file = tf.train.latest_checkpoint(
                    './models/lr0.01_iter30w_qnew')
                saver.restore(sess, model_file)


#            if learning_rate<0.00001:
#                learning_rate=0.00001
#is_ft = False
#            ckpt = tf.train.get_checkpoint_state('./models')
#            if ckpt and ckpt.model_checkpoint_path:
#                model_file=tf.train.latest_checkpoint('./models')
#                saver.restore(sess, model_file)
            tf.logging.set_verbosity(tf.logging.INFO)
            loss_cnt = 0.0
            loss_flag = 999.0
            for step in range(max_iters):
                _, loss_value = sess.run([train_op, total_loss])
                assert not np.isnan(
                    loss_value), 'Model diverged with loss = NaN'
                loss_cnt += loss_value
                if step % 10 == 0:
                    format_str = ('%s: step %d, loss = %.4f')
                    if step == 0:
                        avg_loss_cnt = loss_cnt
                    else:
                        avg_loss_cnt = loss_cnt / 10.0
                    summary_str = sess.run(merged)
                    train_writer.add_summary(summary_str, step)
                    print(format_str % (datetime.now(), step, avg_loss_cnt))
                    loss_cnt = 0.0
                if step % 4000 == 0 or (step + 1) == max_iters:
                    #                if step % 50 == 0 or (step + 1) == max_iters:
                    #                    checkpoint_path = os.path.join('/root/classify/model', 'dp15_model.ckpt')
                    checkpoint_path = os.path.join(
                        '/root/linjian/darknet_0/models/lr0.01_iter30w_qnew/lr0.01',
                        'model.ckpt')  #save model path
                    saver.save(sess, checkpoint_path, global_step=step)
            train_writer.close()