def __init__(self,
                 config,
                 dataset):
        self.config = config
        self.train_dir = config.train_dir
        log.info("self.train_dir = %s", self.train_dir)

        # --- input ops ---
        self.batch_size = config.batch_size

        self.dataset = dataset

        check_data_id(dataset, config.data_id)
        _, self.batch = create_input_ops(dataset, self.batch_size,
                                         data_id=config.data_id,
                                         is_training=False,
                                         shuffle=False)

        # --- create model ---
        Model = self.get_model_class(config.model)
        log.infov("Using Model class : %s", Model)
        self.model = Model(config)

        self.global_step = tf.contrib.framework.get_or_create_global_step(graph=None)
        self.step_op = tf.no_op(name='step_no_op')

        tf.set_random_seed(1234)

        session_config = tf.ConfigProto(
            allow_soft_placement=True,
            gpu_options=tf.GPUOptions(allow_growth=True),
            device_count={'GPU': 1},
        )
        self.session = tf.Session(config=session_config)

        # --- checkpoint and monitoring ---
        self.saver = tf.train.Saver(max_to_keep=100)

        self.checkpoint_path = config.checkpoint_path
        if self.checkpoint_path is None and self.train_dir:
            self.checkpoint_path = tf.train.latest_checkpoint(self.train_dir)

        if self.checkpoint_path is None:
            log.warn("No checkpoint is given. Just random initialization :-)")
            self.session.run(tf.global_variables_initializer())
        else:
            log.info("Checkpoint path : %s", self.checkpoint_path)

        mean_std = np.load('../DatasetCreation/VG/mean_std.npz')
        self.img_mean = mean_std['img_mean']
        self.img_std = mean_std['img_std']
        self.coords_mean = mean_std['coords_mean']
        self.coords_std = mean_std['coords_std']
    def __init__(self,
                 config,
                 dataset,
                 dataset_train):
        self.config = config
        self.train_dir = config.train_dir
        log.info("self.train_dir = %s", self.train_dir)

        # --- input ops ---
        self.batch_size = config.batch_size

        self.dataset = dataset
        self.dataset_train = dataset_train

        check_data_id(dataset, config.data_id)
        _, self.batch = create_input_ops(dataset, self.batch_size,
                                         data_id=config.data_id,
                                         is_training=False,
                                         shuffle=False)

        (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
        self.total_y = np.concatenate((y_train,y_test))        
        
        # --- create model ---
        self.model = Model(config)

        self.global_step = tf.contrib.framework.get_or_create_global_step(graph=None)
        self.step_op = tf.no_op(name='step_no_op')

        tf.set_random_seed(123)

        session_config = tf.ConfigProto(
            allow_soft_placement=True,
            gpu_options=tf.GPUOptions(allow_growth=True),
            device_count={'GPU': 1},
        )
        self.session = tf.Session(config=session_config)

        # --- checkpoint and monitoring ---
        self.saver = tf.train.Saver(max_to_keep=100)

        self.checkpoint_path = config.checkpoint_path
        if self.checkpoint_path is None and self.train_dir:
            self.checkpoint_path = tf.train.latest_checkpoint(self.train_dir)
        if self.checkpoint_path is None:
            log.warn("No checkpoint is given. Just random initialization :-)")
            self.session.run(tf.global_variables_initializer())
        else:
            log.info("Checkpoint path : %s", self.checkpoint_path)
Example #3
0
    def __init__(self, config, dataset):
        self.config = config
        self.train_dir = config.train_dir
        log.info("self.train_dir = %s", self.train_dir)

        # --- input ops ---
        self.batch_size = config.batch_size

        self.dataset = dataset

        _, self.batch = create_input_ops(dataset,
                                         self.batch_size,
                                         is_training=False,
                                         shuffle=False)

        # --- create model ---
        self.model = Model(config, is_train=False)

        self.global_step = tf.contrib.framework.get_or_create_global_step(
            graph=None)
        self.step_op = tf.no_op(name='step_no_op')

        # --- vars ---
        all_vars = tf.trainable_variables()
        log.warn("********* var ********** ")
        slim.model_analyzer.analyze_vars(all_vars, print_info=True)

        tf.set_random_seed(123)

        session_config = tf.ConfigProto(
            allow_soft_placement=True,
            gpu_options=tf.GPUOptions(allow_growth=True),
            device_count={'GPU': 1},
        )
        self.session = tf.Session(config=session_config)

        # --- checkpoint and monitoring ---
        self.saver = tf.train.Saver(max_to_keep=100)

        self.checkpoint = config.checkpoint
        if self.checkpoint is None and self.train_dir:
            self.checkpoint = tf.train.latest_checkpoint(self.train_dir)
            log.info("Checkpoint path : %s", self.checkpoint)
        elif self.checkpoint is None:
            log.warn("No checkpoint is given. Just random initialization :-)")
            self.session.run(tf.global_variables_initializer())
        else:
            log.info("Checkpoint path : %s", self.checkpoint)
Example #4
0
    def __init__(self, config, model, dataset):
        self.config = config
        self.model = model
        self.train_dir = config.train_dir
        log.info("self.train_dir = %s", self.train_dir)

        # --- input ops ---
        self.batch_size = config.batch_size
        #############################################################################33
        self.dataset = dataset[0]

        check_data_id(dataset[0], config.data_id)
        _, self.batch = create_input_ops(dataset[0],
                                         self.batch_size,
                                         data_id=config.data_id,
                                         is_training=False,
                                         shuffle=False)

        ############# here for 10 cross validation   ###################
        ###################################################################################
        self.global_step = tf.contrib.framework.get_or_create_global_step(
            graph=None)
        self.step_op = tf.no_op(name='step_no_op')

        tf.set_random_seed(1234)

        session_config = tf.ConfigProto(
            allow_soft_placement=True,
            gpu_options=tf.GPUOptions(allow_growth=True),
            device_count={'GPU': 1},
        )
        self.session = tf.Session(config=session_config)

        # --- checkpoint and monitoring ---
        self.saver = tf.train.Saver(max_to_keep=100)

        self.checkpoint = config.checkpoint
        if self.checkpoint is None and self.train_dir:
            self.checkpoint = tf.train.latest_checkpoint(self.train_dir)
        if self.checkpoint is None:
            log.warn("No checkpoint is given. Just random initialization :-)")
            self.session.run(tf.global_variables_initializer())
        else:
            log.info("Checkpoint path : %s", self.checkpoint)
Example #5
0
    def __init__(self, config, dataset):
        self.config = config
        self.train_dir = config.train_dir
        log.info("self.train_dir = %s", self.train_dir)

        # --- input ops ---
        self.batch_size = config.batch_size

        self.dataset = dataset

        check_data_id(dataset, config.data_id)
        _, self.batch = create_input_ops(dataset,
                                         self.batch_size,
                                         data_id=config.data_id,
                                         num_threads=1,
                                         is_training=False,
                                         shuffle=False)

        # --- create model ---
        self.model = Model(config)

        self.global_step = tf.contrib.framework.get_or_create_global_step(
            graph=None)
        self.step_op = tf.no_op(name='step_no_op')

        tf.set_random_seed(1234)

        session_config = tf.ConfigProto(
            allow_soft_placement=True,
            gpu_options=tf.GPUOptions(allow_growth=True),
            device_count={'GPU': 1},
        )
        self.session = tf.Session(config=session_config)

        # --- checkpoint and monitoring ---
        self.saver = tf.train.Saver(max_to_keep=1000)

        self.checkpoint_path = config.checkpoint_path
        if self.checkpoint_path is None and self.train_dir:
            self.checkpoint_path = tf.train.latest_checkpoint(self.train_dir)
        log.info("Checkpoint path : %s", self.checkpoint_path)
    def __init__(self, config, model, dataset, dataset_test):
        self.config = config
        self.model = model
        hyper_parameter_str = '{}_lr_g_{}_d_{}_update_G{}D{}'.format(
            config.dataset, config.learning_rate_g, config.learning_rate_d,
            config.update_rate, 1
        )
        self.train_dir = './train_dir/%s-%s-%s' % (
            config.prefix,
            hyper_parameter_str,
            time.strftime("%Y%m%d-%H%M%S")
        )

        os.makedirs(self.train_dir)
        log.infov("Train Dir: %s", self.train_dir)

        # --- input ops ---
        self.batch_size = config.batch_size
        _, self.batch_train = create_input_ops(
            dataset, self.batch_size, is_training=True)
        _, self.batch_test = create_input_ops(
            dataset_test, self.batch_size, is_training=False)

        # --- optimizer ---
        self.global_step = tf.contrib.framework.get_or_create_global_step(graph=None)

        # --- checkpoint and monitoring ---
        all_var = tf.trainable_variables()

        d_var = [v for v in all_var if v.name.startswith('Discriminator')]
        slim.model_analyzer.analyze_vars(d_var, print_info=False)

        g_var = [v for v in all_var if v.name.startswith(('Generator'))]
        slim.model_analyzer.analyze_vars(g_var, print_info=False)

        rem_var = (set(all_var) - set(d_var) - set(g_var))

        self.d_optimizer = tf.contrib.layers.optimize_loss(
            loss=self.model.d_loss,
            global_step=self.global_step,
            learning_rate=self.config.learning_rate_d,
            optimizer=tf.train.AdamOptimizer(beta1=0.5),
            clip_gradients=20.0,
            name='d_optimize_loss',
            variables=d_var
        )

        # for policy gradient
        self.action_grads = tf.gradients(self.model.d_output_q, self.model.fake_image)
        self.params_grad = tf.gradients(self.model.g_output, self.model.g_weights, self.action_grads)
        grads = zip(self.params_grad, self.model.g_weights)
        self.g_optimizer = tf.train.AdamOptimizer(self.config.learning_rate_g).apply_gradients(grads)

        self.summary_op = tf.summary.merge_all()

        self.saver = tf.train.Saver(max_to_keep=1000)

        self.summary_writer = tf.summary.FileWriter(self.train_dir)

        self.supervisor = tf.train.Supervisor(
            logdir=self.train_dir,
            is_chief=True,
            saver=None,
            summary_op=None,
            summary_writer=self.summary_writer,
            save_summaries_secs=300,
            save_model_secs=600,
            global_step=self.global_step,
        )

        session_config = tf.ConfigProto(
            allow_soft_placement=True,
            gpu_options=tf.GPUOptions(allow_growth=True),
            device_count={'GPU': 1},
        )
        self.session = self.supervisor.prepare_or_wait_for_session(config=session_config)

        self.buffer = ReplayBuffer(config.buffer_size)
Example #7
0
    def __init__(self, config, dataset, dataset_test):
        self.config = config
        hyper_parameter_str = config.dataset + '_lr_' + str(
            config.learning_rate) + '_update_G' + str(
                config.update_rate) + '_D' + str(1)
        self.train_dir = './train_dir/%s-%s-%s' % (
            config.prefix, hyper_parameter_str, time.strftime("%Y%m%d-%H%M%S"))

        if not os.path.exists(self.train_dir): os.makedirs(self.train_dir)
        log.infov("Train Dir: %s", self.train_dir)

        # --- input ops ---
        self.batch_size = config.batch_size

        _, self.batch_train = create_input_ops(dataset,
                                               self.batch_size,
                                               is_training=True)
        _, self.batch_test = create_input_ops(dataset_test,
                                              self.batch_size,
                                              is_training=False)

        # --- create model ---
        self.model = Model(config)

        # --- optimizer ---
        self.global_step = tf.contrib.framework.get_or_create_global_step(
            graph=None)
        self.learning_rate = config.learning_rate
        if config.lr_weight_decay:
            self.learning_rate = tf.train.exponential_decay(
                self.learning_rate,
                global_step=self.global_step,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
                name='decaying_learning_rate')

        self.check_op = tf.no_op()

        # --- checkpoint and monitoring ---
        all_vars = tf.trainable_variables()

        d_var = [v for v in all_vars if v.name.startswith('Discriminator')]
        log.warn("********* d_var ********** ")
        slim.model_analyzer.analyze_vars(d_var, print_info=True)

        g_var = [v for v in all_vars if v.name.startswith(('Generator'))]
        log.warn("********* g_var ********** ")
        slim.model_analyzer.analyze_vars(g_var, print_info=True)

        rem_var = (set(all_vars) - set(d_var) - set(g_var))
        print([v.name for v in rem_var])
        assert not rem_var

        self.d_optimizer = tf.contrib.layers.optimize_loss(
            loss=self.model.d_loss,
            global_step=self.global_step,
            learning_rate=self.learning_rate * 0.5,
            optimizer=tf.train.AdamOptimizer(beta1=0.5),
            clip_gradients=20.0,
            name='d_optimize_loss',
            variables=d_var)

        self.g_optimizer = tf.contrib.layers.optimize_loss(
            loss=self.model.g_loss,
            global_step=self.global_step,
            learning_rate=self.learning_rate,
            optimizer=tf.train.AdamOptimizer(beta1=0.5),
            clip_gradients=20.0,
            name='g_optimize_loss',
            variables=g_var)

        self.summary_op = tf.summary.merge_all()

        self.saver = tf.train.Saver(max_to_keep=100)
        self.summary_writer = tf.summary.FileWriter(self.train_dir)

        self.checkpoint_secs = 600  # 10 min

        self.supervisor = tf.train.Supervisor(
            logdir=self.train_dir,
            is_chief=True,
            saver=None,
            summary_op=None,
            summary_writer=self.summary_writer,
            save_summaries_secs=300,
            save_model_secs=self.checkpoint_secs,
            global_step=self.global_step,
        )

        session_config = tf.ConfigProto(
            allow_soft_placement=True,
            gpu_options=tf.GPUOptions(allow_growth=True),
            device_count={'GPU': 1},
        )
        self.session = self.supervisor.prepare_or_wait_for_session(
            config=session_config)

        self.ckpt_path = config.checkpoint
        if self.ckpt_path is not None:
            log.info("Checkpoint path: %s", self.ckpt_path)
            self.pretrain_saver.restore(self.session, self.ckpt_path)
            log.info(
                "Loaded the pretrain parameters from the provided checkpoint path"
            )
Example #8
0
    def __init__(self, config, model, dataset_train, dataset_train_unlabel, dataset_test):
        self.config = config
        self.model = model
        hyper_parameter_str = '{}_lr_g_{}_d_{}_update_G{}D{}'.format(
            config.dataset, config.learning_rate_g, config.learning_rate_d, 
            config.update_rate, 1
        )
        self.train_dir = './train_dir/%s-%s-%s' % (
            config.prefix,
            hyper_parameter_str,
            time.strftime("%Y%m%d-%H%M%S")
        )

        os.makedirs(self.train_dir)
        log.infov("Train Dir: %s", self.train_dir)

        # --- input ops ---
        self.batch_size = config.batch_size
        print("1"*50)
        _, self.batch_train = create_input_ops(
            dataset_train, self.batch_size, is_training=True)
        print("2"*50)
        _,self.batch_train_unlabel = create_input_ops(
                dataset_train_unlabel, self.batch_size, is_training=True)
        print("3"*50)
        _, self.batch_test = create_input_ops(
            dataset_test, self.batch_size, is_training=False)
        print("4"*50)

        # --- optimizer ---
        self.global_step = tf.contrib.framework.get_or_create_global_step(graph=None)

        # --- checkpoint and monitoring ---
        all_var = tf.trainable_variables()

        d_var = [v for v in all_var if v.name.startswith('Discriminator')]
        log.warn("********* d_var ********** ")
        slim.model_analyzer.analyze_vars(d_var, print_info=True)

        g_var = [v for v in all_var if v.name.startswith(('Generator'))]
        log.warn("********* g_var ********** ")
        slim.model_analyzer.analyze_vars(g_var, print_info=True)

        rem_var = (set(all_var) - set(d_var) - set(g_var))
        print([v.name for v in rem_var])
        assert not rem_var

        self.d_optimizer = tf.contrib.layers.optimize_loss(
            loss=self.model.d_loss,
            global_step=self.global_step,
            learning_rate=self.config.learning_rate_d,
            optimizer=tf.train.AdamOptimizer(beta1=0.5),
            clip_gradients=20.0,
            name='d_optimize_loss',
            variables=d_var
        )

        self.g_optimizer = tf.contrib.layers.optimize_loss(
            loss=self.model.g_loss,
            global_step=self.global_step,
            learning_rate=self.config.learning_rate_g,
            optimizer=tf.train.AdamOptimizer(beta1=0.5),
            clip_gradients=20.0,
            name='g_optimize_loss',
            variables=g_var
        )

        self.summary_op = tf.summary.merge_all()

        self.saver = tf.train.Saver(max_to_keep=1000)
        self.summary_writer = tf.summary.FileWriter(self.train_dir)

        self.supervisor = tf.train.Supervisor(
            logdir=self.train_dir,
            is_chief=True,
            saver=None,
            summary_op=None,
            summary_writer=self.summary_writer,
            save_summaries_secs=300,
            save_model_secs=600,
            global_step=self.global_step,
        )

        session_config = tf.ConfigProto(
            allow_soft_placement=True,
            gpu_options=tf.GPUOptions(allow_growth=True),
            device_count={'GPU': 1},
        )
        self.session = self.supervisor.prepare_or_wait_for_session(config=session_config)

        self.ckpt_path = config.checkpoint
        if self.ckpt_path is not None:
            log.info("Checkpoint path: %s", self.ckpt_path)
            self.saver.restore(self.session, self.ckpt_path)
            log.info("Loaded the pretrain parameters from the provided checkpoint path")
Example #9
0
    def __init__(self, cfg, dataset):
        self.config = cfg
        self.train_dir = cfg.train_dir
        log.info("self.train_dir = %s", self.train_dir)

        # --- input ops ---
        self.batch_size = cfg.batch_size
        self.dataset = dataset
        self.batch = create_input_ops(dataset, self.batch_size, shuffle=False)

        # --- create model ---
        if cfg.model == 'baseline':
            from models.baseline import Model
        elif cfg.model == 'rn':
            from models.rn import Model
        elif cfg.model == 'film':
            from models.film import Model
        else:
            raise ValueError(cfg.model)
        log.infov("Using Model class : %s", Model)
        self.model = Model(Q_DIM, NUM_ANS, is_train=False)

        self.img = tf.placeholder(
            name='img',
            dtype=tf.float32,
            shape=[self.batch_size, cfg.image_size, cfg.image_size, 3],
        )
        self.q = tf.placeholder(name='q',
                                dtype=tf.float32,
                                shape=[cfg.batch_size, Q_DIM])
        self.a = tf.placeholder(name='a',
                                dtype=tf.float32,
                                shape=[cfg.batch_size, NUM_ANS])

        logits = self.model.build(self.img, self.q)

        correct_prediction = tf.equal(tf.argmax(logits, 1),
                                      tf.argmax(self.a, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        self.all_preds = tf.nn.softmax(logits)

        self.global_step = tf.contrib.framework.get_or_create_global_step(
            graph=None)
        self.step_op = tf.no_op(name='step_no_op')

        tf.set_random_seed(1234)

        session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(
            allow_growth=True))
        self.session = tf.Session(config=session_config)

        # --- checkpoint and monitoring ---
        self.saver = tf.train.Saver()

        self.checkpoint_path = cfg.checkpoint_path
        if self.checkpoint_path is None and self.train_dir:
            self.checkpoint_path = tf.train.latest_checkpoint(self.train_dir)
        if self.checkpoint_path is None:
            log.warn("No checkpoint is given. Just random initialization :-)")
            self.session.run(tf.global_variables_initializer())
        else:
            log.info("Checkpoint path : %s", self.checkpoint_path)
Example #10
0
    def __init__(self, config, model, dataset, dataset_test):
        self.config = config
        self.model = model
        learning_hyperparameter_str = '{}_{}_bs_{}_lr_g_{}_lr_d_{}_update_G{}D{}'.format(
            config.dataset, config.gan_type, config.batch_size,
            config.learning_rate_g, config.learning_rate_d, config.update_g,
            config.update_d)
        model_hyperparameter_str = 'G_deconv_{}_dis_conv_{}_{}_{}_norm'.format(
            config.deconv_type, config.num_dis_conv, config.g_norm_type,
            config.d_norm_type)

        self.train_dir = './train_dir/%s-%s-%s' % (
            config.prefix, learning_hyperparameter_str + '_' +
            model_hyperparameter_str, time.strftime("%Y%m%d-%H%M%S"))

        os.makedirs(self.train_dir)
        log.infov("Train Dir: %s", self.train_dir)

        # --- input ops ---
        self.batch_size = config.batch_size

        _, self.batch_train = create_input_ops(dataset,
                                               self.batch_size,
                                               is_training=True)
        _, self.batch_test = create_input_ops(dataset_test,
                                              self.batch_size,
                                              is_training=False)

        # --- optimizer ---
        self.global_step = tf.contrib.framework.get_or_create_global_step(
            graph=None)

        # --- checkpoint and monitoring ---
        all_var = tf.trainable_variables()

        d_var = [v for v in all_var if v.name.startswith('Discriminator')]
        log.warn("********* d_var ********** ")
        slim.model_analyzer.analyze_vars(d_var, print_info=True)

        g_var = [v for v in all_var if v.name.startswith(('Generator'))]
        log.warn("********* g_var ********** ")
        slim.model_analyzer.analyze_vars(g_var, print_info=True)

        rem_var = (set(all_var) - set(d_var) - set(g_var))
        print([v.name for v in rem_var])
        assert not rem_var

        self.g_optimizer = tf.train.AdamOptimizer(
            self.config.learning_rate_g,
            beta1=self.config.adam_beta1,
            beta2=self.config.adam_beta2).minimize(
                self.model.g_loss,
                var_list=g_var,
                name='g_optimize_loss',
                global_step=self.global_step)

        self.d_optimizer = tf.train.AdamOptimizer(
            self.config.learning_rate_d,
            beta1=self.config.adam_beta1,
            beta2=self.config.adam_beta2).minimize(self.model.d_loss,
                                                   var_list=d_var,
                                                   name='d_optimize_loss')

        self.summary_op = tf.summary.merge_all()

        self.saver = tf.train.Saver(max_to_keep=1000)
        pretrain_saver = tf.train.Saver(var_list=all_var, max_to_keep=1)
        pretrain_saver_g = tf.train.Saver(var_list=g_var, max_to_keep=1)
        pretrain_saver_d = tf.train.Saver(var_list=d_var, max_to_keep=1)
        self.summary_writer = tf.summary.FileWriter(self.train_dir)

        self.supervisor = tf.train.Supervisor(
            logdir=self.train_dir,
            is_chief=True,
            saver=None,
            summary_op=None,
            summary_writer=self.summary_writer,
            save_summaries_secs=300,
            save_model_secs=600,
            global_step=self.global_step,
        )

        session_config = tf.ConfigProto(
            allow_soft_placement=True,
            gpu_options=tf.GPUOptions(allow_growth=True),
            device_count={'GPU': 1},
        )
        self.session = self.supervisor.prepare_or_wait_for_session(
            config=session_config)

        def load_checkpoint(ckpt_path, saver, name=None):
            if ckpt_path is not None:
                log.info("Checkpoint path for {}: {}".format(name, ckpt_path))
                saver.restore(self.session, ckpt_path)
                log.info("Loaded the pretrain parameters " +
                         "from the provided checkpoint path.")

        load_checkpoint(config.checkpoint_g,
                        pretrain_saver_g,
                        name='Generator')
        load_checkpoint(config.checkpoint_d,
                        pretrain_saver_d,
                        name='Discriminator')
        load_checkpoint(config.checkpoint, pretrain_saver, name='All vars')
    def __init__(self, config, dataset_train, dataset_test):
        self.config = config
        hyper_parameter_str = config.dataset + '_lr_' + str(
            config.learning_rate)
        self.train_dir = './train_dir/classifier-%s-%s-%s' % (
            config.prefix, hyper_parameter_str, time.strftime("%Y%m%d-%H%M%S"))

        if not os.path.exists(self.train_dir):
            os.makedirs(self.train_dir)
        log.infov("Train Dir: %s", self.train_dir)

        # --- input ops ---
        self.batch_size = config.batch_size

        _, self.batch_train = create_input_ops(dataset_train,
                                               self.batch_size,
                                               is_training=True)
        _, self.batch_test = create_input_ops(dataset_test,
                                              self.batch_size,
                                              is_training=False)

        # --- create model ---
        self.model = Model(config)

        # --- optimizer ---
        self.global_step = tf.contrib.framework.get_or_create_global_step(
            graph=None)
        self.learning_rate = config.learning_rate
        if config.lr_weight_decay:
            self.learning_rate = tf.train.exponential_decay(
                config.learning_rate,
                global_step=self.global_step,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
                name='decaying_learning_rate')

        self.check_op = tf.no_op()

        # --- checkpoint and monitoring ---
        all_vars = tf.global_variables()

        counter_var = [v for v in all_vars if v.name.startswith('Counter')]
        log.warn("********* Counter var ********** ")
        slim.model_analyzer.analyze_vars(counter_var, print_info=True)

        classifier_var = [
            v for v in all_vars if not v.name.startswith('Counter')
        ]
        log.warn("********* Classifier var ********** ")
        slim.model_analyzer.analyze_vars(classifier_var, print_info=True)

        self.optimizer = tf.contrib.layers.optimize_loss(
            loss=self.model.loss,
            global_step=self.global_step,
            learning_rate=self.learning_rate,
            optimizer=tf.train.AdamOptimizer,
            clip_gradients=20.0,
            variables=classifier_var,
            name='optimizer_loss',
        )

        self.summary_op = tf.summary.merge_all()

        self.pretrain_saver = tf.train.Saver(var_list=counter_var,
                                             max_to_keep=1)
        self.saver = tf.train.Saver(max_to_keep=1000)
        self.summary_writer = tf.summary.FileWriter(self.train_dir)

        self.checkpoint_secs = 600  # 10 min

        self.supervisor = tf.train.Supervisor(
            logdir=self.train_dir,
            is_chief=True,
            saver=None,
            summary_op=None,
            summary_writer=self.summary_writer,
            save_summaries_secs=300,
            save_model_secs=self.checkpoint_secs,
            global_step=self.global_step,
        )

        session_config = tf.ConfigProto(
            allow_soft_placement=True,
            gpu_options=tf.GPUOptions(allow_growth=True),
            device_count={'GPU': 1},
        )
        self.session = self.supervisor.prepare_or_wait_for_session(
            config=session_config)

        self.ckpt_path = config.checkpoint
        try:
            log.info("Checkpoint path: %s", self.ckpt_path)
            self.pretrain_saver.restore(self.session, self.ckpt_path)
            log.info(
                "Loaded the pretrain parameters from the provided checkpoint path"
            )
        except:
            log.error(
                'Failed to load the pre-trained cgheckpoint from {}'.format(
                    self.ckpt_path))
Example #12
0
    def __init__(self, config, model, dataset):
        self.config = config
        self.model = model
        learning_hyperparameter_str = '{}_{}_bs_{}_lr_{}'.format(
            os.path.basename(config.dataset_path), config.loss_type,
            config.batch_size, config.learning_rate)
        model_hyperparameter_str = '{}_conv_{}_fc_{}_norm_act_{}'.format(
            config.num_d_conv, config.num_d_fc, config.d_norm_type,
            config.output_act_fn)

        self.train_dir = './train_dir/%s/%s-%s' % (
            config.prefix, learning_hyperparameter_str + '_' +
            model_hyperparameter_str, time.strftime("%Y%m%d-%H%M%S"))

        os.makedirs(self.train_dir)
        log.infov("Train Dir: %s", self.train_dir)

        # --- input ops ---
        self.batch_size = config.batch_size

        _, self.batch_train = create_input_ops(dataset,
                                               self.batch_size,
                                               is_training=True,
                                               shuffle=False)

        # --- optimizer ---
        self.global_step = tf.contrib.framework.get_or_create_global_step(
            graph=None)

        # --- checkpoint and monitoring ---
        all_var = tf.trainable_variables()

        # added in order to remove variables in VGG
        if self.config.load_pretrained:
            new_all_var = [v for v in all_var if 'block' not in v.name]
        else:
            new_all_var = all_var

        self.optimizer = tf.train.AdamOptimizer(
            self.config.learning_rate,
            beta1=self.config.adam_beta1,
            beta2=self.config.adam_beta2).minimize(
                self.model.loss,
                var_list=new_all_var,
                name='optimize_loss',
                global_step=self.global_step)
        """
        self.optimizer = tf.train.AdadeltaOptimizer(
            self.config.learning_rate, rho=0.95).minimize(self.model.loss,
                var_list=new_all_var, name='optimize_loss',
                global_step=self.global_step)


        self.optimizer = tf.train.GradientDescentOptimizer(
            self.config.learning_rate).minimize(self.model.loss,
                var_list=new_all_var, name='optimize_loss',
                global_step=self.global_step)

        """

        self.summary_op = tf.summary.merge_all()

        self.saver = tf.train.Saver(max_to_keep=1000)
        pretrain_saver = tf.train.Saver(var_list=all_var, max_to_keep=1)
        self.summary_writer = tf.summary.FileWriter(self.train_dir)

        self.supervisor = tf.train.Supervisor(
            logdir=self.train_dir,
            is_chief=True,
            saver=None,
            summary_op=None,
            summary_writer=self.summary_writer,
            save_summaries_secs=300,
            save_model_secs=600,
            global_step=self.global_step,
        )

        session_config = tf.ConfigProto(
            allow_soft_placement=True,
            gpu_options=tf.GPUOptions(allow_growth=True),
            device_count={'GPU': 1},
        )
        self.session = self.supervisor.prepare_or_wait_for_session(
            config=session_config)

        def load_checkpoint(ckpt_path, saver, name=None):
            if ckpt_path is not None:
                log.info("Checkpoint path for {}: {}".format(name, ckpt_path))
                saver.restore(self.session, ckpt_path)
                log.info("Loaded the pretrain parameters " +
                         "from the provided checkpoint path.")

        load_checkpoint(config.checkpoint, pretrain_saver, name='All vars')
Example #13
0
    def __init__(self, cfg, train_dataset, val_dataset):
        self.config = cfg
        dataset_base = os.path.basename(os.path.normpath(cfg.dataset_path))
        hyper_parameter_str = dataset_base + '_lr_' + str(cfg.learning_rate)
        self.train_dir = './train_dir/%s-%s/%s/%s' % (
            cfg.model, cfg.prefix, hyper_parameter_str,
            time.strftime("%Y%m%d-%H%M%S"))

        if not os.path.exists(self.train_dir):
            os.makedirs(self.train_dir)
        log.infov("Train Dir: %s", self.train_dir)

        # --- input ops ---
        self.batch_size = cfg.batch_size
        self.train_batch = create_input_ops(train_dataset, self.batch_size)
        self.val_batch = create_input_ops(val_dataset, self.batch_size)

        # --- create model ---
        if cfg.model == 'baseline':
            from models.baseline import Model
        elif cfg.model == 'rn':
            from models.rn import Model
        elif cfg.model == 'film':
            from models.film import Model
        else:
            raise ValueError(cfg.model)
        log.infov("Using Model class : %s", Model)
        self.model = Model(Q_DIM, NUM_ANS)

        # define placeholders: (image, question, answer)
        self.img = tf.placeholder(
            name='img',
            dtype=tf.float32,
            shape=[self.batch_size, cfg.image_size, cfg.image_size, 3],
        )
        self.q = tf.placeholder(name='q',
                                dtype=tf.float32,
                                shape=[cfg.batch_size, Q_DIM])
        self.a = tf.placeholder(name='a',
                                dtype=tf.float32,
                                shape=[cfg.batch_size, NUM_ANS])

        # compute logits and cross-entropy loss
        logits = self.model.build(self.img, self.q)
        loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                       labels=self.a)
        loss = tf.reduce_mean(loss)

        correct_prediction = tf.equal(tf.argmax(logits, 1),
                                      tf.argmax(self.a, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        self.all_preds = tf.nn.softmax(logits)
        self.loss, self.accuracy = loss, accuracy

        tf.summary.scalar("accuracy", self.accuracy)
        tf.summary.scalar("loss", self.loss)

        # --- optimizer ---
        self.global_step = tf.contrib.framework.get_or_create_global_step(
            graph=None)
        self.learning_rate = cfg.learning_rate
        if cfg.lr_weight_decay:
            # learning rate scheduling (optional)
            self.learning_rate = tf.train.exponential_decay(
                self.learning_rate,
                global_step=self.global_step,
                decay_steps=10000,
                decay_rate=0.95,
                name='decay_lr')
        self.check_op = tf.no_op()
        # Adam optimizer
        self.optimizer = tf.contrib.layers.optimize_loss(
            loss=self.loss,
            global_step=self.global_step,
            learning_rate=self.learning_rate,
            optimizer=tf.train.AdamOptimizer,
            clip_gradients=20.0,
            name='optimizer_loss')

        self.summary_op = tf.summary.merge_all()
        self.saver = tf.train.Saver()
        self.train_writer = tf.summary.FileWriter(self.train_dir + '/train')
        self.val_writer = tf.summary.FileWriter(self.train_dir + '/val')

        self.supervisor = tf.train.Supervisor(
            logdir=self.train_dir,
            is_chief=True,
            saver=None,
            summary_op=None,
            summary_writer=self.train_writer,
            save_summaries_secs=100,
            global_step=self.global_step,
        )
        session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(
            allow_growth=True))
        self.session = self.supervisor.prepare_or_wait_for_session(
            config=session_config)

        self.ckpt_path = cfg.checkpoint
        if self.ckpt_path is not None:
            log.info("Checkpoint path: %s", self.ckpt_path)
            self.saver.restore(self.session, self.ckpt_path)
            log.info(
                "Loaded the pretrain parameters from the provided checkpoint path"
            )
    def __init__(self, config, model, dataset, dataset_test):
        self.config = config
        self.model = model
        hyper_parameter_str = 'bs_{}_lr_{}'.format(
            config.batch_size,
            config.learning_rate,
        )

        self.train_dir = './train_dir/%s-%s-%s-%s' % (
            config.prefix, config.dataset, hyper_parameter_str,
            time.strftime("%Y%m%d-%H%M%S"))

        if not os.path.exists(self.train_dir): os.makedirs(self.train_dir)
        log.infov("Train Dir: %s", self.train_dir)

        # --- input ops ---
        self.batch_size = config.batch_size

        _, self.batch_train = create_input_ops(dataset,
                                               self.batch_size,
                                               is_training=True)
        _, self.batch_test = create_input_ops(dataset_test,
                                              self.batch_size,
                                              is_training=False)

        # --- optimizer ---
        self.global_step = tf.contrib.framework.get_or_create_global_step(
            graph=None)
        self.learning_rate = config.learning_rate

        self.check_op = tf.no_op()

        # --- checkpoint and monitoring ---
        all_var = tf.trainable_variables()
        tf.contrib.slim.model_analyzer.analyze_vars(all_var, print_info=True)

        self.optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(
            self.model.loss,
            global_step=self.global_step,
            var_list=all_var,
            name='optimizer_loss')

        self.train_summary_op = tf.summary.merge_all(key='train')
        self.test_summary_op = tf.summary.merge_all(key='test')

        self.saver = tf.train.Saver(max_to_keep=100)
        self.pretrain_saver = tf.train.Saver(var_list=all_var, max_to_keep=1)
        self.summary_writer = tf.summary.FileWriter(self.train_dir)
        self.max_steps = self.config.max_steps
        self.ckpt_save_step = self.config.ckpt_save_step
        self.log_step = self.config.log_step
        self.test_sample_step = self.config.test_sample_step
        self.write_summary_step = self.config.write_summary_step

        self.supervisor = tf.train.Supervisor(
            logdir=self.train_dir,
            is_chief=True,
            saver=None,
            summary_op=None,
            summary_writer=self.summary_writer,
            save_summaries_secs=300,
            save_model_secs=600,
            global_step=self.global_step,
        )

        session_config = tf.ConfigProto(
            allow_soft_placement=True,
            gpu_options=tf.GPUOptions(allow_growth=True),
            device_count={'GPU': 1},
        )
        self.session = self.supervisor.prepare_or_wait_for_session(
            config=session_config)

        self.ckpt_path = config.checkpoint
        if self.ckpt_path is not None:
            log.info("Checkpoint path: %s", self.ckpt_path)
            self.pretrain_saver.restore(
                self.session,
                self.ckpt_path,
            )
            log.info(
                "Loaded the pretrain parameters from the provided checkpoint path"
            )
Example #15
0
    def __init__(self, config, dataset, dataset_test):
        self.config = config
        hyper_parameter_str = config.dataset_path + '_lr_' + str(
            config.learning_rate)
        self.train_dir = './train_dir/%s-%s-%s-%s' % (
            config.model, config.prefix, hyper_parameter_str,
            time.strftime("%Y%m%d-%H%M%S"))

        if not os.path.exists(self.train_dir):
            os.makedirs(self.train_dir)
        log.infov("Train Dir: %s", self.train_dir)

        # --- input ops ---
        self.batch_size = config.batch_size

        _, self.batch_train = create_input_ops(dataset,
                                               self.batch_size,
                                               is_training=True)
        _, self.batch_test = create_input_ops(dataset_test,
                                              self.batch_size,
                                              is_training=False)

        # --- create model ---
        Model = self.get_model_class(config.model)
        log.infov("Using Model class : %s", Model)
        self.model = Model(config)

        # --- optimizer ---
        self.global_step = tf.contrib.framework.get_or_create_global_step(
            graph=None)
        self.learning_rate = config.learning_rate
        if config.lr_weight_decay:
            self.learning_rate = tf.train.exponential_decay(
                self.learning_rate,
                global_step=self.global_step,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
                name='decaying_learning_rate')

        self.check_op = tf.no_op()

        self.optimizer = tf.contrib.layers.optimize_loss(
            loss=self.model.loss,
            global_step=self.global_step,
            learning_rate=self.learning_rate,
            optimizer=tf.train.AdamOptimizer,
            clip_gradients=20.0,
            name='optimizer_loss')

        self.summary_op = tf.summary.merge_all()
        try:
            import tfplot
            self.plot_summary_op = tf.summary.merge_all(key='plot_summaries')
        except:
            pass

        self.saver = tf.train.Saver(max_to_keep=1)
        self.summary_writer = tf.summary.FileWriter(self.train_dir)

        self.checkpoint_secs = 600  # 10 min

        self.supervisor = tf.train.Supervisor(
            logdir=self.train_dir,
            is_chief=True,
            saver=None,
            summary_op=None,
            summary_writer=self.summary_writer,
            save_summaries_secs=300,
            save_model_secs=self.checkpoint_secs,
            global_step=self.global_step,
        )

        session_config = tf.ConfigProto(
            allow_soft_placement=True,
            # intra_op_parallelism_threads=1,
            # inter_op_parallelism_threads=1,
            gpu_options=tf.GPUOptions(allow_growth=True),
            device_count={'GPU': 1},
        )
        self.session = self.supervisor.prepare_or_wait_for_session(
            config=session_config)

        self.ckpt_path = config.checkpoint
        if self.ckpt_path is not None:
            log.info("Checkpoint path: %s", self.ckpt_path)
            self.saver.restore(self.session, self.ckpt_path)
            log.info(
                "Loaded the pretrain parameters from the provided checkpoint path"
            )
Example #16
0
    def __init__(self, config, model, dataset, dataset_test):
        self.config = config
        self.model = model
        hyper_parameter_str = 'bs_{}_lr_flow_{}_pixel_{}_d_{}'.format(
            config.batch_size,
            config.learning_rate_f,
            config.learning_rate_p,
            config.learning_rate_d,
        )

        self.train_dir = './train_dir/%s-%s-%s-num_input-%s-%s' % (
            config.dataset, config.prefix, hyper_parameter_str,
            str(config.num_input), time.strftime("%Y%m%d-%H%M%S"))

        if not os.path.exists(self.train_dir): os.makedirs(self.train_dir)
        log.infov("Train Dir: %s", self.train_dir)

        # --- input ops ---
        self.batch_size = config.batch_size

        _, self.batch_train = create_input_ops(dataset,
                                               self.batch_size,
                                               is_training=True)
        _, self.batch_test = create_input_ops(dataset_test,
                                              self.batch_size,
                                              is_training=False)

        # --- optimizer ---
        self.global_step = tf.contrib.framework.get_or_create_global_step(
            graph=None)
        self.learning_rate_p = config.learning_rate_p
        self.learning_rate_f = config.learning_rate_f
        self.learning_rate_d = config.learning_rate_d

        self.check_op = tf.no_op()

        # --- checkpoint and monitoring ---
        all_vars = tf.trainable_variables()

        f_var = [
            v for v in all_vars if 'Flow' in v.op.name or 'flow' in v.op.name
        ]
        log.warn("********* f_var ********** ")
        slim.model_analyzer.analyze_vars(f_var, print_info=True)

        p_var = [
            v for v in all_vars if 'Pixel' in v.op.name or 'pixel' in v.op.name
        ]
        log.warn("********* p_var ********** ")
        slim.model_analyzer.analyze_vars(p_var, print_info=True)

        d_var = [v for v in all_vars if v.op.name.startswith('Discriminator')]
        log.warn("********* d_var ********** ")
        slim.model_analyzer.analyze_vars(d_var, print_info=True)

        # the whole model without the discriminator
        g_var = p_var + f_var

        self.f_optimizer = tf.train.AdamOptimizer(
            self.learning_rate_f, ).minimize(self.model.flow_loss,
                                             var_list=f_var,
                                             name='optimizer_flow_loss')

        self.p_optimizer = tf.train.AdamOptimizer(
            self.learning_rate_p, ).minimize(self.model.pixel_loss,
                                             global_step=self.global_step,
                                             var_list=p_var,
                                             name='optimizer_pixel_loss')

        self.p_optimizer_gan = tf.train.AdamOptimizer(
            self.learning_rate_p,
            beta1=0.5).minimize(self.model.pixel_loss_gan,
                                global_step=self.global_step,
                                var_list=p_var,
                                name='optimizer_pixel_loss_gan')

        self.d_optimizer = tf.train.AdamOptimizer(
            self.learning_rate_d,
            beta1=0.5).minimize(self.model.d_loss,
                                global_step=self.global_step,
                                var_list=d_var,
                                name='optimizer_discriminator_loss')

        self.train_summary_op = tf.summary.merge_all(key='train')
        self.test_summary_op = tf.summary.merge_all(key='test')

        self.saver = tf.train.Saver(max_to_keep=100)
        self.pretrain_saver = tf.train.Saver(var_list=all_vars, max_to_keep=1)
        self.pretrain_saver_p = tf.train.Saver(var_list=p_var, max_to_keep=1)
        self.pretrain_saver_f = tf.train.Saver(var_list=f_var, max_to_keep=1)
        self.pretrain_saver_g = tf.train.Saver(var_list=g_var, max_to_keep=1)
        self.pretrain_saver_d = tf.train.Saver(var_list=d_var, max_to_keep=1)
        self.summary_writer = tf.summary.FileWriter(self.train_dir)
        self.max_steps = self.config.max_steps
        self.ckpt_save_step = self.config.ckpt_save_step
        self.log_step = self.config.log_step
        self.test_sample_step = self.config.test_sample_step
        self.write_summary_step = self.config.write_summary_step
        self.gan_start_step = self.config.gan_start_step

        self.checkpoint_secs = 600  # 10 min

        self.supervisor = tf.train.Supervisor(
            logdir=self.train_dir,
            is_chief=True,
            saver=None,
            summary_op=None,
            summary_writer=self.summary_writer,
            save_summaries_secs=300,
            save_model_secs=self.checkpoint_secs,
            global_step=self.global_step,
        )

        session_config = tf.ConfigProto(
            allow_soft_placement=True,
            gpu_options=tf.GPUOptions(allow_growth=True),
            device_count={'GPU': 1},
        )
        self.session = self.supervisor.prepare_or_wait_for_session(
            config=session_config)

        self.ckpt_path = config.checkpoint
        if self.ckpt_path is not None:
            log.info("Checkpoint path: %s", self.ckpt_path)
            self.pretrain_saver.restore(
                self.session,
                self.ckpt_path,
            )
            log.info(
                "Loaded the pretrain parameters from the provided checkpoint path"
            )

        self.ckpt_path_f = config.checkpoint_f
        if self.ckpt_path_f is not None:
            log.info("Checkpoint path: %s", self.ckpt_path_f)
            self.pretrain_saver_f.restore(self.session, self.ckpt_path_f)
            log.info(
                "Loaded the pretrain Flow module from the provided checkpoint path"
            )

        self.ckpt_path_p = config.checkpoint_p
        if self.ckpt_path_p is not None:
            log.info("Checkpoint path: %s", self.ckpt_path_p)
            self.pretrain_saver_p.restore(self.session, self.ckpt_path_p)
            log.info(
                "Loaded the pretrain Pixel module from the provided checkpoint path"
            )

        self.ckpt_path_g = config.checkpoint_g
        if self.ckpt_path_g is not None:
            log.info("Checkpoint path: %s", self.ckpt_path_g)
            self.pretrain_saver_g.restore(self.session, self.ckpt_path_g)
            log.info(
                "Loaded the pretrain Generator (Pixel&Flow) module from the provided checkpoint path"
            )

        self.ckpt_path_d = config.checkpoint_d
        if self.ckpt_path_d is not None:
            log.info("Checkpoint path: %s", self.ckpt_path_d)
            self.pretrain_saver_d.restore(self.session, self.ckpt_path_d)
            log.info(
                "Loaded the pretrain Discriminator module from the provided checkpoint path"
            )
Example #17
0
    def __init__(self,
                 config,
                 dataset,
                 dataset_test):
        self.config = config
        hyper_parameter_str = '{}_lr_{}_bs_{}_norm_type_{}'.format(
            config.dataset, config.learning_rate,
            config.batch_size, config.norm_type
        )
        self.train_dir = './train_dir/{}/{}-{}'.format(
            config.prefix,
            hyper_parameter_str,
            time.strftime("%Y%m%d-%H%M%S")
        )

        if not os.path.exists(self.train_dir): os.makedirs(self.train_dir)
        log.infov("Train Dir: %s", self.train_dir)

        # --- input ops ---
        self.batch_size = config.batch_size

        _, self.batch_train = create_input_ops(dataset, self.batch_size,
                                               is_training=True)
        _, self.batch_test = create_input_ops(dataset_test, self.batch_size,
                                              is_training=False)

        # --- create model ---
        self.model = Model(config)

        # --- optimizer ---
        self.global_step = tf.contrib.framework.get_or_create_global_step(graph=None)
        self.learning_rate = config.learning_rate

        self.check_op = tf.no_op()

        all_vars = tf.trainable_variables()
        slim.model_analyzer.analyze_vars(all_vars, print_info=True)

        if not config.no_adjust_learning_rate:
            config.learning_rate = config.learning_rate * config.batch_size

        if not config.dataset == 'ImageNet':
            self.optimizer = tf.contrib.layers.optimize_loss(
                loss=self.model.loss,
                global_step=self.global_step,
                learning_rate=self.learning_rate,
                optimizer=tf.train.AdamOptimizer,
                clip_gradients=20.0,
                name='optimizer_loss'
            )

            self.optimizer_dummy = tf.contrib.layers.optimize_loss(
                loss=self.model.loss,
                global_step=self.global_step,
                learning_rate=self.learning_rate,
                optimizer=tf.train.AdamOptimizer,
                clip_gradients=20.0,
                increment_global_step=False,
                name='optimizer_loss_dummy'
            )
        else:
            config.learning_rate = config.learning_rate * 1e2
            self.optimizer = tf.contrib.layers.optimize_loss(
                loss=self.model.loss,
                global_step=self.global_step,
                learning_rate=self.learning_rate,
                optimizer=tf.train.MomentumOptimizer(self.learning_rate, momentum=0.9),
                clip_gradients=20.0,
                name='optimizer_loss'
            )

            self.optimizer_dummy = tf.contrib.layers.optimize_loss(
                loss=self.model.loss,
                global_step=self.global_step,
                learning_rate=self.learning_rate,
                optimizer=tf.train.MomentumOptimizer(self.learning_rate, momentum=0.9),
                clip_gradients=20.0,
                increment_global_step=False,
                name='optimizer_loss_dummy'
            )

        self.train_summary_op = tf.summary.merge_all(key='train')
        self.test_summary_op = tf.summary.merge_all(key='test')

        self.saver = tf.train.Saver(max_to_keep=100)
        self.pretrain_saver = tf.train.Saver(var_list=tf.trainable_variables(),
                                             max_to_keep=100)
        self.summary_writer = tf.summary.FileWriter(self.train_dir)
        self.log_step = self.config.log_step
        self.test_sample_step = self.config.test_sample_step
        self.write_summary_step = self.config.write_summary_step

        self.checkpoint_secs = 600  # 10 min

        self.supervisor = tf.train.Supervisor(
            logdir=self.train_dir,
            is_chief=True,
            saver=None,
            summary_op=None,
            summary_writer=self.summary_writer,
            save_summaries_secs=300,
            save_model_secs=self.checkpoint_secs,
            global_step=self.global_step,
        )

        session_config = tf.ConfigProto(
            allow_soft_placement=True,
            gpu_options=tf.GPUOptions(allow_growth=True),
            device_count={'GPU': 1},
            log_device_placement=True
        )
        self.session = self.supervisor.prepare_or_wait_for_session(config=session_config)

        self.ckpt_path = config.checkpoint
        if self.ckpt_path is not None:
            if os.path.isdir(self.ckpt_path):
                self.ckpt_path = tf.train.latest_checkpoint(self.ckpt_path) #os.path.join(self.ckpt_path, "checkpoint")
            log.info("Checkpoint path: %s", self.ckpt_path)
            try:
                self.pretrain_saver.restore(self.session, self.ckpt_path)
                log.info("Loaded the pretrain parameters from the provided checkpoint path")
            except Exception as e:
                log.info(e)
                log.info("Couldn't load pretained model!!!")
Example #18
0
    def __init__(self, config, dataset_train, dataset_test):
        self.config = config
        hyper_parameter_str = config.dataset + '_lr_' + str(
            config.learning_rate)
        self.train_dir = './train_dir/%s-%s-%s' % (
            config.prefix, hyper_parameter_str, time.strftime("%Y%m%d-%H%M%S"))

        if not os.path.exists(self.train_dir):
            os.makedirs(self.train_dir)
        log.infov("Train Dir: %s", self.train_dir)

        # --- input ops ---
        self.batch_size = config.batch_size

        _, self.batch_train = create_input_ops(dataset_train,
                                               self.batch_size,
                                               is_training=True)
        _, self.batch_test = create_input_ops(dataset_test,
                                              self.batch_size,
                                              is_training=False)

        (x_train, y_train), (x_test,
                             y_test) = tf.keras.datasets.mnist.load_data()
        self.total_y = np.concatenate((y_train, y_test))

        # --- create model ---
        self.model = Model(config)

        # --- optimizer ---
        self.global_step = tf.contrib.framework.get_or_create_global_step(
            graph=None)
        self.learning_rate = config.learning_rate
        if config.lr_weight_decay:
            self.learning_rate = tf.train.exponential_decay(
                config.learning_rate,
                global_step=self.global_step,
                decay_steps=10000,
                decay_rate=0.5,
                staircase=True,
                name='decaying_learning_rate')

        self.check_op = tf.no_op()

        # --- checkpoint and monitoring ---
        log.warn("********* var ********** ")
        slim.model_analyzer.analyze_vars(tf.trainable_variables(),
                                         print_info=True)

        self.g_optimizer = tf.contrib.layers.optimize_loss(
            loss=self.model.loss,
            global_step=self.global_step,
            learning_rate=self.learning_rate,
            optimizer=tf.train.AdamOptimizer,
            clip_gradients=20.0,
            name='g_optimizer_loss',
        )

        self.summary_op = tf.summary.merge_all()

        self.saver = tf.train.Saver(max_to_keep=1000)
        self.summary_writer = tf.summary.FileWriter(self.train_dir)

        self.checkpoint_secs = 600  # 10 min

        self.supervisor = tf.train.Supervisor(
            logdir=self.train_dir,
            is_chief=True,
            saver=None,
            summary_op=None,
            summary_writer=self.summary_writer,
            save_summaries_secs=300,
            save_model_secs=self.checkpoint_secs,
            global_step=self.global_step,
        )

        session_config = tf.ConfigProto(
            allow_soft_placement=True,
            gpu_options=tf.GPUOptions(allow_growth=True),
            device_count={'GPU': 1},
            log_device_placement=True)
        self.session = self.supervisor.prepare_or_wait_for_session(
            config=session_config)

        self.ckpt_path = config.checkpoint
        if self.ckpt_path is not None:
            log.info("Checkpoint path: %s", self.ckpt_path)
            self.saver.restore(self.session, self.ckpt_path)
            log.info(
                "Loaded the pretrain parameters from the provided checkpoint path"
            )