Beispiel #1
0
def train():
    setup_logging()

    cfg = load_config()
    dataset = create_dataset(cfg)

    batch_spec = get_batch_spec(cfg)
    batch, enqueue_op, placeholders = setup_preloading(batch_spec)

    losses = pose_net(cfg).train(batch)
    total_loss = losses['total_loss']
    print("total_loss:", total_loss)
    for k, t in losses.items():
        tf.summary.scalar(k, t)
    merged_summaries = tf.summary.merge_all()
    print("merged_summaries:", merged_summaries)
    variables_to_restore = slim.get_variables_to_restore(include=["resnet_v1"])
    restorer = tf.train.Saver(variables_to_restore)
    saver = tf.train.Saver(max_to_keep=5)

    sess = tf.Session()

    coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders)

    train_writer = tf.summary.FileWriter(cfg.log_dir, sess.graph)

    learning_rate, train_op = get_optimizer(total_loss, cfg)
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    # Restore variables from disk.
    restorer.restore(sess, cfg.init_weights)

    max_iter = int(cfg.multi_step[-1][1])

    display_iters = cfg.display_iters
    cum_loss = 0.0
    lr_gen = LearningRate(cfg)
    for it in range(max_iter + 1):
        current_lr = lr_gen.get_lr(it)
        [_, loss_val,
         summary] = sess.run([train_op, total_loss, merged_summaries],
                             feed_dict={learning_rate: current_lr})
        cum_loss += loss_val
        train_writer.add_summary(summary, it)
        if it % display_iters == 0:
            average_loss = cum_loss / display_iters
            cum_loss = 0.0
            logging.info("iteration: {} loss: {} lr: {}".format(
                it, "{0:.4f}".format(average_loss), current_lr))

        # Save snapshot
        if (it % cfg.save_iters == 0 and it != 0) or it == max_iter:
            model_name = cfg.snapshot_prefix
            saver.save(sess, model_name, global_step=it)

    sess.close()
    coord.request_stop()
    coord.join([thread])
def train():
    # 设置日志
    setup_logging()

    # 载入训练配置文件pose_cfg.yaml
    cfg = load_config()
    # 创建数据集类的实例
    dataset = create_dataset(cfg)

    # 获取batch_spec
    # 包含输入图片大小
    # 关节heatmap的大小
    # 关节weight的大小
    # 精细化heatmap的大小
    # 精细化mask的大小
    batch_spec = get_batch_spec(cfg)
    # 根据batch_spec产生入队操作、placeholder和batch数据
    batch, enqueue_op, placeholders = setup_preloading(batch_spec)

    # 生成网络结构并且产生losses op
    # 其中losses包括很多类型的loss
    losses = pose_net(cfg).train(batch)
    total_loss = losses['total_loss']

    # 把多个loss合并起来
    for k, t in losses.items():
        # return a scalar Tensor of type string which contains a Summary protobuf.
        tf.summary.scalar(k, t)
    # returns a scalar Tensor of type string containing the serialized
    # Summary protocol buffer resulting from the merging
    merged_summaries = tf.summary.merge_all()

    # 获取/resnet_v1下面的所有的变量
    variables_to_restore = slim.get_variables_to_restore(include=["resnet_v1"])
    # Create the saver which will be used to restore the variables.

    # 创建一个恢复resent_v1的权重的op
    restorer = tf.train.Saver(variables_to_restore)
    # 创建一个保存训练状态的op
    saver = tf.train.Saver(max_to_keep=5)

    sess = tf.Session()

    # 开启一个线程去读取数据并且装入到队列
    coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders)

    # 打开一个训练的记录器
    train_writer = tf.summary.FileWriter(cfg.log_dir, sess.graph)

    # 获取train_op和学习率op
    learning_rate, train_op = get_optimizer(total_loss, cfg)

    # 初始化全局和局部变量
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    # Restore variables from disk.从文件中读取权重到内存
    restorer.restore(sess, cfg.init_weights)

    # 从配置文件获取最大迭代次数
    max_iter = int(cfg.multi_step[-1][1])

    display_iters = cfg.display_iters
    cum_loss = 0.0

    # 生成一个学习率产生器的实例
    lr_gen = LearningRate(cfg)

    for it in range(max_iter + 1):
        # 根据当前迭代的次数产生一个学习率
        current_lr = lr_gen.get_lr(it)
        # 进行训练
        [_, loss_val,
         summary] = sess.run([train_op, total_loss, merged_summaries],
                             feed_dict={learning_rate: current_lr})
        # 累加loss
        cum_loss += loss_val
        # 将迭代次数保存起来
        train_writer.add_summary(summary, it)

        if it % display_iters == 0:  # 每隔display_iters就显示一次 loss
            average_loss = cum_loss / display_iters  # 平均loss
            cum_loss = 0.0
            logging.info("iteration: {} loss: {} lr: {}".format(
                it, "{0:.4f}".format(average_loss), current_lr))

        # Save snapshot
        # 每隔cfg.save_iters次就会保存
        if (it % cfg.save_iters == 0 and it != 0) or it == max_iter:
            # 获得模型的名称
            model_name = cfg.snapshot_prefix
            # 保存模型
            saver.save(sess, model_name, global_step=it)

    sess.close()
    # 请求数据读取线程停止
    coord.request_stop()
    # 等待数据读取线程结束
    coord.join([thread])
def train():
    setup_logging()

    cfg = load_config()
    dataset = create_dataset(cfg)

    batch_spec = get_batch_spec(cfg)
    batch, enqueue_op, placeholders = setup_preloading(batch_spec)

    losses = pose_net(cfg).train(batch)
    total_loss = losses['total_loss']

    for k, t in losses.items():
        tf.summary.scalar(k, t)
    merged_summaries = tf.summary.merge_all()

    variables_to_restore = slim.get_variables_to_restore(include=["resnet_v1"])
    restorer = tf.train.Saver(variables_to_restore)
    saver = tf.train.Saver(max_to_keep=5)

    sess = tf.Session()

    coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders)

    train_writer = tf.summary.FileWriter(cfg.log_dir, sess.graph)

    learning_rate, train_op = get_optimizer(total_loss, cfg)

    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    # Restore variables from disk.
    restorer.restore(sess, cfg.init_weights)

    max_iter = int(cfg.multi_step[-1][1])

    display_iters = cfg.display_iters
    cum_loss = 0.0
    lr_gen = LearningRate(cfg)

    for it in range(max_iter+1):
        current_lr = lr_gen.get_lr(it)
        [_, loss_val, summary] = sess.run([train_op, total_loss, merged_summaries],
                                          feed_dict={learning_rate: current_lr})
        cum_loss += loss_val
        train_writer.add_summary(summary, it)

        if it % display_iters == 0:
            average_loss = cum_loss / display_iters
            cum_loss = 0.0
            logging.info("iteration: {} loss: {} lr: {}"
                         .format(it, "{0:.4f}".format(average_loss), current_lr))

        # Save snapshot
        if (it % cfg.save_iters == 0 and it != 0) or it == max_iter:
            model_name = cfg.snapshot_prefix
            saver.save(sess, model_name, global_step=it)

    sess.close()
    coord.request_stop()
    coord.join([thread])
Beispiel #4
0
def train(cfg):
#    setup_logging()

    cfg = edict(cfg.__dict__)
    cfg = config.convert_to_deepcut(cfg)

    dirname = os.path.dirname(__file__)
    init_weights = os.path.join(dirname, 'models/resnet_v1_50.ckpt')

    if not os.path.exists(init_weights):
        # Download and save the pretrained resnet weights.
        logging.info('Downloading pretrained resnet 50 weights ...')
        urllib.urlretrieve('http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz', os.path.join(dirname,'models','resnet_v1_50_2016_08_28.tar.gz'))
        tar = tarfile.open(os.path.join(dirname,'models','resnet_v1_50_2016_08_28.tar.gz'))
        tar.extractall(path=os.path.join(dirname,'models'))
        tar.close()
        logging.info('Done downloading pretrained weights')

    db_file_name = os.path.join(cfg.cachedir, 'train_data.p')
    dataset = PoseDataset(cfg, db_file_name)
    train_info = {'train_dist':[],'train_loss':[],'val_dist':[],'val_loss':[],'step':[]}

    batch_spec = get_batch_spec(cfg)
    batch, enqueue_op, placeholders = setup_preloading(batch_spec)

    net = pose_net(cfg)
    losses = net.train(batch)
    total_loss = losses['total_loss']
    outputs = [net.heads['part_pred'], net.heads['locref']]

    for k, t in losses.items():
        tf.summary.scalar(k, t)

    variables_to_restore = slim.get_variables_to_restore(include=["resnet_v1"])
    restorer = tf.train.Saver(variables_to_restore)
    saver = tf.train.Saver(max_to_keep=50)

    sess = tf.Session()

    coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders)

    learning_rate, train_op = get_optimizer(total_loss, cfg)

    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    # Restore variables from disk.
    restorer.restore(sess, init_weights)

    #max_iter = int(cfg.multi_step[-1][1])
    max_iter = int(cfg.dl_steps)
    display_iters = cfg.display_step
    cum_loss = 0.0
    lr_gen = LearningRate(cfg)

    model_name = os.path.join( cfg.cachedir, cfg.expname + '_' + name)
    ckpt_file = os.path.join(cfg.cachedir, cfg.expname + '_' + name + '_ckpt')

    for it in range(max_iter+1):
        current_lr = lr_gen.get_lr(it)
        [_, loss_val] = sess.run([train_op, total_loss], # merged_summaries],
                                          feed_dict={learning_rate: current_lr})
        cum_loss += loss_val
 #       train_writer.add_summary(summary, it)

        if it % display_iters == 0:

            cur_out, batch_out = sess.run([outputs, batch], feed_dict={learning_rate: current_lr})
            scmap, locref = predict.extract_cnn_output(cur_out, cfg)

            # Extract maximum scoring location from the heatmap, assume 1 person
            loc_pred = predict.argmax_pose_predict(scmap, locref, cfg.stride)
            loc_in = batch_out[Batch.locs]
            dd = np.sqrt(np.sum(np.square(loc_pred[:,:,:2]-loc_in),axis=-1))
            dd = dd*cfg.dlc_rescale
            average_loss = cum_loss / display_iters
            cum_loss = 0.0
            print("iteration: {} loss: {} dist: {}  lr: {}"
                         .format(it, "{0:.4f}".format(average_loss),
                                 '{0:.2f}'.format(dd.mean()), current_lr))
            train_info['step'].append(it)
            train_info['train_loss'].append(loss_val)
            train_info['val_loss'].append(loss_val)
            train_info['val_dist'].append(dd.mean())
            train_info['train_dist'].append(dd.mean())

        if it % cfg.save_td_step == 0:
            save_td(cfg, train_info)
        # Save snapshot
        if (it % cfg.save_step == 0 ) or it == max_iter:
            saver.save(sess, model_name, global_step=it,
                       latest_filename=os.path.basename(ckpt_file))

    coord.request_stop()
    coord.join([thread])
    sess.close()
Beispiel #5
0
def train():
    setup_logging()

    cfg = load_config()

    # load newest snapshot
    snapshots = [fn.split('.')[0] for fn in os.listdir(os.getcwd()) if "index" in fn]
    if len(snapshots) > 0:
        iters = np.array([int(fn.split('-')[1]) for fn in snapshots])
        cfg['init_weights'] = snapshots[iters.argmax()]
        start = iters.max()
    else:
        start = 0

    dataset = create_dataset(cfg)

    batch_spec = get_batch_spec(cfg)
    batch, enqueue_op, placeholders = setup_preloading(batch_spec)

    losses = pose_net(cfg).train(batch)
    total_loss = losses['total_loss']

    for k, t in losses.items():
        tf.summary.scalar(k, t)
    merged_summaries = tf.summary.merge_all()

    if start==0:
        variables_to_restore = slim.get_variables_to_restore(include=["resnet_v1"])
        restorer = tf.train.Saver(variables_to_restore)
    else:
        restorer = tf.train.Saver()
    saver = tf.train.Saver(max_to_keep=5)

    sess = tf.Session()

    coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders)

    train_writer = tf.summary.FileWriter(cfg.log_dir, sess.graph)

    learning_rate, train_op = get_optimizer(total_loss, cfg)

    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    # Restore variables from disk.
    restorer.restore(sess, cfg.init_weights)

    max_iter = int(cfg.multi_step[-1][1])

    display_iters = cfg.display_iters
    cum_loss = 0.0
    lr_gen = LearningRate(cfg, start)

    startTime = time.time()

    for it in range(start, max_iter+1):
        current_lr = lr_gen.get_lr(it)
        [_, loss_val, summary] = sess.run([train_op, total_loss, merged_summaries],
                                          feed_dict={learning_rate: current_lr})
        cum_loss += loss_val
        train_writer.add_summary(summary, it)

        if it % display_iters == 0:
            average_loss = cum_loss / display_iters
            cum_loss = 0.0
            elapsed = timedelta(seconds=(time.time()-startTime))
            logging.info("iteration: {} loss: {} lr: {} time: {}"
                         .format(it, "{0:.4f}".format(average_loss), current_lr, elapsed))

        # Save snapshot
        if (it % cfg.save_iters == 0 and it != start) or it == max_iter:
            model_name = cfg.snapshot_prefix
            saver.save(sess, model_name, global_step=it)

    sess.close()
    coord.request_stop()
    coord.join([thread])