Example #1
0
def train():
    setup_logging()

    cfg = load_config()
    dataset = create_dataset(cfg)

    batch_spec = get_batch_spec(cfg)
    batch, enqueue_op, placeholders = setup_preloading(batch_spec)

    losses = pose_net(cfg).train(batch)
    total_loss = losses['total_loss']
    print("total_loss:", total_loss)
    for k, t in losses.items():
        tf.summary.scalar(k, t)
    merged_summaries = tf.summary.merge_all()
    print("merged_summaries:", merged_summaries)
    variables_to_restore = slim.get_variables_to_restore(include=["resnet_v1"])
    restorer = tf.train.Saver(variables_to_restore)
    saver = tf.train.Saver(max_to_keep=5)

    sess = tf.Session()

    coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders)

    train_writer = tf.summary.FileWriter(cfg.log_dir, sess.graph)

    learning_rate, train_op = get_optimizer(total_loss, cfg)
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    # Restore variables from disk.
    restorer.restore(sess, cfg.init_weights)

    max_iter = int(cfg.multi_step[-1][1])

    display_iters = cfg.display_iters
    cum_loss = 0.0
    lr_gen = LearningRate(cfg)
    for it in range(max_iter + 1):
        current_lr = lr_gen.get_lr(it)
        [_, loss_val,
         summary] = sess.run([train_op, total_loss, merged_summaries],
                             feed_dict={learning_rate: current_lr})
        cum_loss += loss_val
        train_writer.add_summary(summary, it)
        if it % display_iters == 0:
            average_loss = cum_loss / display_iters
            cum_loss = 0.0
            logging.info("iteration: {} loss: {} lr: {}".format(
                it, "{0:.4f}".format(average_loss), current_lr))

        # Save snapshot
        if (it % cfg.save_iters == 0 and it != 0) or it == max_iter:
            model_name = cfg.snapshot_prefix
            saver.save(sess, model_name, global_step=it)

    sess.close()
    coord.request_stop()
    coord.join([thread])
Example #2
0
def train():
    # 设置日志
    setup_logging()

    # 载入训练配置文件pose_cfg.yaml
    cfg = load_config()
    # 创建数据集类的实例
    dataset = create_dataset(cfg)

    # 获取batch_spec
    # 包含输入图片大小
    # 关节heatmap的大小
    # 关节weight的大小
    # 精细化heatmap的大小
    # 精细化mask的大小
    batch_spec = get_batch_spec(cfg)
    # 根据batch_spec产生入队操作、placeholder和batch数据
    batch, enqueue_op, placeholders = setup_preloading(batch_spec)

    # 生成网络结构并且产生losses op
    # 其中losses包括很多类型的loss
    losses = pose_net(cfg).train(batch)
    total_loss = losses['total_loss']

    # 把多个loss合并起来
    for k, t in losses.items():
        # return a scalar Tensor of type string which contains a Summary protobuf.
        tf.summary.scalar(k, t)
    # returns a scalar Tensor of type string containing the serialized
    # Summary protocol buffer resulting from the merging
    merged_summaries = tf.summary.merge_all()

    # 获取/resnet_v1下面的所有的变量
    variables_to_restore = slim.get_variables_to_restore(include=["resnet_v1"])
    # Create the saver which will be used to restore the variables.

    # 创建一个恢复resent_v1的权重的op
    restorer = tf.train.Saver(variables_to_restore)
    # 创建一个保存训练状态的op
    saver = tf.train.Saver(max_to_keep=5)

    sess = tf.Session()

    # 开启一个线程去读取数据并且装入到队列
    coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders)

    # 打开一个训练的记录器
    train_writer = tf.summary.FileWriter(cfg.log_dir, sess.graph)

    # 获取train_op和学习率op
    learning_rate, train_op = get_optimizer(total_loss, cfg)

    # 初始化全局和局部变量
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    # Restore variables from disk.从文件中读取权重到内存
    restorer.restore(sess, cfg.init_weights)

    # 从配置文件获取最大迭代次数
    max_iter = int(cfg.multi_step[-1][1])

    display_iters = cfg.display_iters
    cum_loss = 0.0

    # 生成一个学习率产生器的实例
    lr_gen = LearningRate(cfg)

    for it in range(max_iter + 1):
        # 根据当前迭代的次数产生一个学习率
        current_lr = lr_gen.get_lr(it)
        # 进行训练
        [_, loss_val,
         summary] = sess.run([train_op, total_loss, merged_summaries],
                             feed_dict={learning_rate: current_lr})
        # 累加loss
        cum_loss += loss_val
        # 将迭代次数保存起来
        train_writer.add_summary(summary, it)

        if it % display_iters == 0:  # 每隔display_iters就显示一次 loss
            average_loss = cum_loss / display_iters  # 平均loss
            cum_loss = 0.0
            logging.info("iteration: {} loss: {} lr: {}".format(
                it, "{0:.4f}".format(average_loss), current_lr))

        # Save snapshot
        # 每隔cfg.save_iters次就会保存
        if (it % cfg.save_iters == 0 and it != 0) or it == max_iter:
            # 获得模型的名称
            model_name = cfg.snapshot_prefix
            # 保存模型
            saver.save(sess, model_name, global_step=it)

    sess.close()
    # 请求数据读取线程停止
    coord.request_stop()
    # 等待数据读取线程结束
    coord.join([thread])
Example #3
0
import fnmatch
import logging
import papermill as pm
import shutil
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from email import encoders
from email.mime.base import MIMEBase
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from util.logging import setup_logging
from flask import Flask, g, current_app
from config import Config

setup_logging(
    os.path.join(os.path.abspath(os.path.dirname(__file__)),
                 'logging.conf'))  # important to do this first

# Notebook Scheduler
# ---------------------------------------
# This script helps with the automated processing of Jupyter Notebooks via
# papermill (https://github.com/nteract/papermill/)

snapshotDir = 'snapshots'


def create_app(config=Config):
    app = Flask(__name__)
    app.config.from_object(config)
    # db.init_app(app)
    app.app_context().push()
Example #4
0
def train():
    setup_logging()

    cfg = load_config()
    dataset = create_dataset(cfg)

    batch_spec = get_batch_spec(cfg)
    batch, enqueue_op, placeholders = setup_preloading(batch_spec)

    losses = pose_net(cfg).train(batch)
    total_loss = losses['total_loss']

    for k, t in losses.items():
        tf.summary.scalar(k, t)
    merged_summaries = tf.summary.merge_all()

    variables_to_restore = slim.get_variables_to_restore(include=["resnet_v1"])
    restorer = tf.train.Saver(variables_to_restore)
    saver = tf.train.Saver(max_to_keep=5)

    sess = tf.Session()

    coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders)

    train_writer = tf.summary.FileWriter(cfg.log_dir, sess.graph)

    learning_rate, train_op = get_optimizer(total_loss, cfg)

    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    # Restore variables from disk.
    restorer.restore(sess, cfg.init_weights)

    max_iter = int(cfg.multi_step[-1][1])

    display_iters = cfg.display_iters
    cum_loss = 0.0
    lr_gen = LearningRate(cfg)

    for it in range(max_iter+1):
        current_lr = lr_gen.get_lr(it)
        [_, loss_val, summary] = sess.run([train_op, total_loss, merged_summaries],
                                          feed_dict={learning_rate: current_lr})
        cum_loss += loss_val
        train_writer.add_summary(summary, it)

        if it % display_iters == 0:
            average_loss = cum_loss / display_iters
            cum_loss = 0.0
            logging.info("iteration: {} loss: {} lr: {}"
                         .format(it, "{0:.4f}".format(average_loss), current_lr))

        # Save snapshot
        if (it % cfg.save_iters == 0 and it != 0) or it == max_iter:
            model_name = cfg.snapshot_prefix
            saver.save(sess, model_name, global_step=it)

    sess.close()
    coord.request_stop()
    coord.join([thread])
Example #5
0
        logging.info('The application terminated successfully')
        return

    try:
        api_table_dict = get_table_dict_for_apis_in_list(
            api_fetch_list, avalanche_incident_list)
    except Exception as e:
        logging.exception(
            'Error fetching API data')
        raise e

    # Set new database connection
    db_manager.engine = create_db_connection()

    try:
        insert_data_for_table_dict(
            api_table_dict, db_manager, if_table_exists_in_database)
    except Exception as e:
        logging.exception(
            'Cannot add API data to database table')
        raise e

    logging.info('The application terminated successfully')


if __name__ == '__main__':
    # Setup application logging
    logging = setup_logging()

    main()
Example #6
0
def train():
    setup_logging()

    cfg = load_config()

    # load newest snapshot
    snapshots = [fn.split('.')[0] for fn in os.listdir(os.getcwd()) if "index" in fn]
    if len(snapshots) > 0:
        iters = np.array([int(fn.split('-')[1]) for fn in snapshots])
        cfg['init_weights'] = snapshots[iters.argmax()]
        start = iters.max()
    else:
        start = 0

    dataset = create_dataset(cfg)

    batch_spec = get_batch_spec(cfg)
    batch, enqueue_op, placeholders = setup_preloading(batch_spec)

    losses = pose_net(cfg).train(batch)
    total_loss = losses['total_loss']

    for k, t in losses.items():
        tf.summary.scalar(k, t)
    merged_summaries = tf.summary.merge_all()

    if start==0:
        variables_to_restore = slim.get_variables_to_restore(include=["resnet_v1"])
        restorer = tf.train.Saver(variables_to_restore)
    else:
        restorer = tf.train.Saver()
    saver = tf.train.Saver(max_to_keep=5)

    sess = tf.Session()

    coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders)

    train_writer = tf.summary.FileWriter(cfg.log_dir, sess.graph)

    learning_rate, train_op = get_optimizer(total_loss, cfg)

    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    # Restore variables from disk.
    restorer.restore(sess, cfg.init_weights)

    max_iter = int(cfg.multi_step[-1][1])

    display_iters = cfg.display_iters
    cum_loss = 0.0
    lr_gen = LearningRate(cfg, start)

    startTime = time.time()

    for it in range(start, max_iter+1):
        current_lr = lr_gen.get_lr(it)
        [_, loss_val, summary] = sess.run([train_op, total_loss, merged_summaries],
                                          feed_dict={learning_rate: current_lr})
        cum_loss += loss_val
        train_writer.add_summary(summary, it)

        if it % display_iters == 0:
            average_loss = cum_loss / display_iters
            cum_loss = 0.0
            elapsed = timedelta(seconds=(time.time()-startTime))
            logging.info("iteration: {} loss: {} lr: {} time: {}"
                         .format(it, "{0:.4f}".format(average_loss), current_lr, elapsed))

        # Save snapshot
        if (it % cfg.save_iters == 0 and it != start) or it == max_iter:
            model_name = cfg.snapshot_prefix
            saver.save(sess, model_name, global_step=it)

    sess.close()
    coord.request_stop()
    coord.join([thread])