def test_build_losses(self): """build_losses generates the loss function for both components.""" # Setup metadata = { 'details': [{ 'type': 'value', 'n': 5, }, { 'type': 'category', 'n': 2 }], 'num_columns': 2 } instance = GraphBuilder(metadata) logits_real = np.zeros((10, 10), dtype=np.float32) logits_fake = np.zeros((10, 10), dtype=np.float32) extra_g = 1.0 l2_norm = 0.001 inputs = [ np.full((200, 1), 0.0, dtype=np.float32), np.full((200, 5), 1.0, dtype=np.float32), np.full((200, 1), 0) ] with TowerContext('', is_training=False): instance.build_graph(*inputs) # Run result = instance.build_losses(logits_real, logits_fake, extra_g, l2_norm) # Check assert result is None
def _build_gan_trainer(self, input, model): """ We need to set tower_func because it's a TowerTrainer, and only TowerTrainer supports automatic graph creation for inference during training. If we don't care about inference during training, using tower_func is not needed. Just calling model.build_graph directly is OK. """ # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, model.get_input_signature()) with TowerContext('', is_training=True): self.tower_func(*input.get_input_tensors()) opt = model.get_optimizer() # Define the training iteration # by default, run one d_min after one g_min with tf.name_scope('optimize'): g_min = opt.minimize(model.g_loss, var_list=model.g_vars, name='g_op') with tf.control_dependencies([g_min]): d_min = opt.minimize(model.d_loss, var_list=model.d_vars, name='d_op') self.train_op = d_min
def test_discriminator(self): """ """ # Setup metadata = {} instance = GraphBuilder(metadata, num_dis_layers=1) vecs = [np.zeros((7, 10)), np.ones((7, 10))] # Run with TowerContext('', is_training=False): result = instance.discriminator(vecs) # Check assert result.name == 'dis_fc_top/output:0' assert result.shape.as_list() == [7, 1] assert result.dtype == tf.float64 graph = result.graph self.check_operation_nodes(graph, 'concat', 'ConcatV2', tf.float64, [7, 20], ['dis_fc0/fc/Reshape']) self.check_operation_nodes( graph, 'dis_fc0/fc/output', 'Identity', tf.float64, [7, 100], ['dis_fc0/fc_diversity/Reshape', 'dis_fc0/concat']) self.check_operation_nodes(graph, 'dis_fc0/fc_diversity/output', 'Identity', tf.float64, [7, 100], ['dis_fc0/Reshape']) self.check_operation_nodes(graph, 'dis_fc0/concat', 'ConcatV2', tf.float64, [7, 110], ['dis_fc0/bn/batchnorm/mul'])
def __init__(self, input, model): """ Args: input (InputSource): model (GANModelDesc): """ super(GANTrainer, self).__init__() assert isinstance(model, GANModelDesc), model inputs_desc = model.get_inputs_desc() # Setup input cbs = input.setup(inputs_desc) self.register_callback(cbs) """ We need to set tower_func because it's a TowerTrainer, and only TowerTrainer supports automatic graph creation for inference during training. If we don't care about inference during training, using tower_func is not needed. Just calling model.build_graph directly is OK. """ # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, inputs_desc) with TowerContext('', is_training=True): self.tower_func(*input.get_input_tensors()) opt = model.get_optimizer() # Define the training iteration # by default, run one d_min after one g_min with tf.name_scope('optimize'): g_min = opt.minimize(model.g_loss, var_list=model.g_vars, name='g_op') with tf.control_dependencies([g_min]): d_min = opt.minimize(model.d_loss, var_list=model.d_vars, name='d_op') self.train_op = d_min
def __init__(self, input, model, d_period=1, g_period=1): """ Args: d_period(int): period of each d_opt run g_period(int): period of each g_opt run """ super(SeparateGANTrainer, self).__init__() self._d_period = int(d_period) self._g_period = int(g_period) assert min(d_period, g_period) == 1 # Setup input cbs = input.setup(model.get_inputs_desc()) self.register_callback(cbs) # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, model.get_inputs_desc()) with TowerContext('', is_training=True), \ argscope(BatchNorm, internal_update=True): # should not hook the updates to both train_op, it will hurt training speed. self.tower_func(*input.get_input_tensors()) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if len(update_ops): logger.warn("Found {} ops in UPDATE_OPS collection!".format(len(update_ops))) logger.warn("Using SeparateGANTrainer with UPDATE_OPS may hurt your training speed a lot!") opt = model.get_optimizer() with tf.name_scope('optimize'): self.d_min = opt.minimize( model.d_loss, var_list=model.d_vars, name='d_min') self.g_min = opt.minimize( model.g_loss, var_list=model.g_vars, name='g_min')
def __init__(self, input, model): super(GANTrainer, self).__init__() assert isinstance(model, GANModelDesc), model inputs_desc = model.get_inputs_desc() cbs = input.setup(inputs_desc) # we need to set towerfunc because it's a TowerTrainer, # and only TowerTrainer supports automatic graph creation for inference during training. tower_func = TowerFuncWrapper(model.build_graph, inputs_desc) with TowerContext('', is_training=True): tower_func(*input.get_input_tensors()) opt = model.get_optimizer() # by default, run one d_min after one g_min with tf.name_scope('optimize'): g_min = opt.minimize(model.g_loss, var_list=model.g_vars, name='g_op') with tf.control_dependencies([g_min]): d_min = opt.minimize(model.d_loss, var_list=model.d_vars, name='d_op') self.train_op = d_min self.set_tower_func(tower_func) for cb in cbs: self.register_callback(cb)
def __init__(self, input, model, d_period=1, g_period=1): """Initialize object.""" super(SeparateGANTrainer, self).__init__() self._d_period = int(d_period) self._g_period = int(g_period) if not min(d_period, g_period) == 1: raise ValueError( 'The minimum between d_period and g_period must be 1.') # Setup input cbs = input.setup(model.get_inputs_desc()) self.register_callback(cbs) # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, model.get_inputs_desc()) with TowerContext('', is_training=True): self.tower_func(*input.get_input_tensors()) opt = model.get_optimizer() with tf.name_scope('optimize'): self.d_min = opt.minimize(model.d_loss, var_list=model.d_vars, name='d_min') self.g_min = opt.minimize(model.g_loss, var_list=model.g_vars, name='g_min')
def __init__(self, model, input_queue): """Initialize object.""" super().__init__() inputs_desc = model.get_inputs_desc() # Setup input cbs = input_queue.setup(inputs_desc) self.register_callback(cbs) # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, inputs_desc) with TowerContext('', is_training=True): self.tower_func(*input_queue.get_input_tensors()) opt = model.get_optimizer() # Define the training iteration by default, run one d_min after one g_min with tf.name_scope('optimize'): g_min_grad = opt.compute_gradients(model.g_loss, var_list=model.g_vars) g_min_grad_clip = [(tf.clip_by_value(grad, -5.0, 5.0), var) for grad, var in g_min_grad] g_min_train_op = opt.apply_gradients(g_min_grad_clip, name='g_op') with tf.control_dependencies([g_min_train_op]): d_min_grad = opt.compute_gradients(model.d_loss, var_list=model.d_vars) d_min_grad_clip = [(tf.clip_by_value(grad, -5.0, 5.0), var) for grad, var in d_min_grad] d_min_train_op = opt.apply_gradients(d_min_grad_clip, name='d_op') self.train_op = d_min_train_op
def __init__(self, input, model, d_period=1, g_period=1): """ Args: d_period(int): period of each d_opt run g_period(int): period of each g_opt run """ super(SeparateGANTrainer, self).__init__() self._d_period = int(d_period) self._g_period = int(g_period) assert min(d_period, g_period) == 1 # Setup input cbs = input.setup(model.get_inputs_desc()) self.register_callback(cbs) # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, model.get_inputs_desc()) with TowerContext('', is_training=True): self.tower_func(*input.get_input_tensors()) opt = model.get_optimizer() with tf.name_scope('optimize'): self.d_min = opt.minimize(model.d_loss, var_list=model.d_vars, name='d_min') self.g_min = opt.minimize(model.g_loss, var_list=model.g_vars, name='g_min')
def __init__(self, config): """ Args: config (PredictConfig): the config to use. """ self._input_names = config.input_names self.graph = config._maybe_create_graph() with self.graph.as_default(): input = PlaceholderInput() input.setup(config.input_signature) with TowerContext('', is_training=False): config.tower_func(*input.get_input_tensors()) input_tensors = get_tensors_by_names(config.input_names) output_tensors = get_tensors_by_names(config.output_names) config.session_init._setup_graph() self.saver = tf.train.Saver() init_op = [ tf.global_variables_initializer(), tf.local_variables_initializer() ] self.sess = config.session_creator.create_session() self.sess.run(init_op) config.session_init._run_init(self.sess) super(OfflinePredictorWithSaver, self).__init__(input_tensors, output_tensors, config.return_input, self.sess)
def forward(self, inputs, is_training=False): image = self.image_preprocess(inputs) assert self.data_format in ['NCHW', 'NHWC'] if self.data_format == 'NCHW': image = tf.transpose(image, [0, 3, 1, 2]) with TowerContext('', is_training): logits = self.get_logits(image) return logits
def graph(self, x, y, i, x_max, x_min): with TowerContext("model_tower", is_training=False): logits, _, endpoints = network.model(x, FLAGS.attack_networks[0]) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y) noise = tf.gradients(loss, x)[0] if not FLAGS.universal else tf.zeros_like(x) with TowerContext('RHP_tower', is_training=False): with tf.variable_scope('RHP'): noise = conv_with_rn(noise) noise = noise / ( tf.reduce_mean(tf.abs(noise), [1, 2, 3], keepdims=True) + 1e-12) x = x + self.step_size * tf.sign(noise) x = tf.clip_by_value(x, x_min, x_max) i = tf.add(i, 1) return x, y, i, x_max, x_min
def forward(self, inputs,is_training=False): self._parseInputs(inputs) self.image = self.preprocess(self.X) # 1CHW with TowerContext('',is_training): self.features = self._backbone() self.rpn_box_logit,self.rpn_label_logit,self.proposals_boxes=self._rpn_head(is_training) self.roi_box_logit,self.roi_label_logit=self._roi_head(is_training)
def build_imagenet_model(image, label, reuse=False, conf=1): args = container() args.depth = 101 with TowerContext(tower_name='', is_training=False): with tf.variable_scope("", auxiliary_name_scope=False, reuse=reuse): model = ResNeXtDenoiseAllModel(args) model.build_graph(image, label) return model.logits
def build_graph(self): batch_shape = [None, 299, 299, 3] self.x_input = tf.placeholder(tf.float32, shape=batch_shape) self.y_input = tf.placeholder(tf.int64, shape=batch_shape[0]) self.acc_list = [] self.predictions = [] with TowerContext("model_tower", is_training=False): for network_name in FLAGS.test_networks: acc, predictions = network.model(self.x_input, network_name, label=self.y_input) self.acc_list.append(acc) self.predictions.append(predictions)
def _build_trainer(self, input, model): # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, model.inputs()) with TowerContext('', is_training=True): self.tower_func(*input.get_input_tensors()) # Define the training iteration with tf.name_scope("optimize"): opt = model.get_optimizer() op_min = opt.minimize(model.loss, var_list=model.vars, colocate_gradients_with_ops=True, name="op_min") self.train_op = op_min
def __init__(self, gan_input, a3c_input, gan_model, a3c_model): """ Args: input (InputSource): model (GANModelDesc): """ super(AGTrainer, self).__init__() assert isinstance(gan_model, GANModelDesc), gan_model gan_inputs_desc = gan_model.get_inputs_desc() a3c_inputs_desc = a3c_model.get_inputs_desc() self.register_callback( gan_input.setup(gan_inputs_desc) + a3c_input.setup(a3c_inputs_desc)) """ We need to set tower_func because it's a TowerTrainer, and only TowerTrainer supports automatic graph creation for inference during training. If we don't care about inference during training, using tower_func is not needed. Just calling model.build_graph directly is OK. """ # Build the graph self.tower_func = TowerFuncWrapper( lambda *x: [ gan_model.build_graph(*x[:len(gan_inputs_desc)]), a3c_model.build_graph(*x[len(gan_inputs_desc):]) ], gan_inputs_desc + a3c_inputs_desc) with TowerContext('', is_training=True): self.tower_func(*(gan_input.get_input_tensors() + a3c_input.get_input_tensors())) gan_opt = gan_model.get_optimizer() with tf.name_scope('gan_optimize'): self.d_min = gan_opt.minimize(gan_model.d_loss, var_list=gan_model.d_vars, name='d_min') self.g_min = gan_opt.minimize(gan_model.g_loss, var_list=gan_model.g_vars, name='g_min') a3c_opt = a3c_model.get_optimizer() with tf.name_scope('a3c_optimize'): self.a3c_min = a3c_opt.minimize(a3c_model.loss, var_list=a3c_model.vars, name='a3c_min') self.generator = self.get_predictor(['z'], ['gen/gen']) self.memory = GoalMemory(MEMORY_SIZE) self.env = WrappedEnv() def reset(s, goal): s.super().reset() s.goal = goal self.env.reset = reset
def build_imagenet_model_old(image, label, reuse=False, conf=1): args = container() args.depth = 101 with TowerContext(tower_name='', is_training=False): with tf.variable_scope("", auxiliary_name_scope=False, reuse=reuse): model = ResNeXtDenoiseAllModel(args) model.build_graph(image, label) cont = container cont.logits = model.logits cont.label = tf.argmax(cont.logits, axis=-1) cont.acc_y = 1 - model.wrong_1 cont.acc_y_5 = 1 - model.wrong_5 cont.accuracy = tf.reduce_mean(1 - model.wrong_1) # wrong_5 cont.rev_xent = tf.reduce_mean( tf.log(1 - tf.reduce_sum(tf.nn.softmax(model.logits) * tf.one_hot(label, depth=1000), axis=-1))) cont.poss_loss = 1 - tf.reduce_mean( tf.reduce_sum( tf.nn.softmax(model.logits) * tf.one_hot(label, depth=1000), axis=-1)) label_one_hot = tf.one_hot(label, depth=1000) wrong_logit = tf.reduce_max(model.logits * (1 - label_one_hot) - label_one_hot * 1e7, axis=-1) true_logit = tf.reduce_sum(model.logits * label_one_hot, axis=-1) #wrong_logit = tf.contrib.nn.nth_element(model.logits * (1-label_one_hot) - label_one_hot * 1e7, n=5, reverse=True) wrong_logit5, _idx = tf.nn.top_k(model.logits * (1 - label_one_hot) - label_one_hot * 1e7, k=5, sorted=False) true_logit5 = tf.reduce_sum(model.logits * label_one_hot, axis=-1, keep_dims=True) cont.target_loss5 = -tf.reduce_sum( tf.nn.relu(true_logit5 - wrong_logit5 + conf), axis=1) cont.target_loss = -tf.nn.relu(true_logit - wrong_logit + conf) cont.xent_filter = tf.reduce_mean( (1.0 - model.wrong_1) * tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label, logits=model.logits), axis=-1) cont.xent = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label, logits=model.logits), axis=-1) #cont.target_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( # labels=label, logits=model.logits) * tf.nn.relu(tf.minimum(1.0, true_logit - wrong_logit + conf)) return cont
def test_compute_kl(self): """ """ # Setup real = np.array([1.0, 1.0]) pred = np.array([0.0, 1.0]) expected_result = np.array([0.0, 0.0]) # Run with self.test_session(): with TowerContext('', is_training=False): result = GraphBuilder.compute_kl(real, pred).eval() # Check assert_equal(result, expected_result)
def test_build_graph(self): """ """ # Setup metadata = {'details': [{'type': 'value', 'n': 5}]} instance = GraphBuilder(metadata) inputs = [ np.full((50, 5), 0.0, dtype=np.float32), np.full((50, 1), 1.0, dtype=np.float32) ] # Run with TowerContext('', is_training=False): result = instance.build_graph(*inputs) # Check assert result is None
def test_batch_diversity(self): """ """ # Setup layer = tf.Variable(np.zeros(15)) n_kernel = 20 kernel_dim = 30 expected_result = np.full((15, 20), 15.0) # Run result = GraphBuilder.batch_diversity(layer, n_kernel, kernel_dim) # Check - Output properties assert result.name == 'Sum_1:0' assert result.dtype == tf.float64 assert result.shape.as_list() == [15, 20] graph = result.graph # Check - Nodes self.check_operation_nodes(graph, 'fc_diversity/output', 'Identity', tf.float64, [15, 600], ['Reshape']) self.check_operation_nodes(graph, 'Reshape', 'Reshape', tf.float64, [15, 20, 30], ['Reshape_1', 'Reshape_2']) self.check_operation_nodes(graph, 'Reshape_1', 'Reshape', tf.float64, [15, 1, 20, 30], ['sub']) self.check_operation_nodes(graph, 'Reshape_2', 'Reshape', tf.float64, [1, 15, 20, 30], ['sub']) self.check_operation_nodes(graph, 'sub', 'Sub', tf.float64, [15, 15, 20, 30], ['Abs']) self.check_operation_nodes(graph, 'Abs', 'Abs', tf.float64, [15, 15, 20, 30], ['Sum']) self.check_operation_nodes(graph, 'Sum', 'Sum', tf.float64, [15, 15, 20], ['Neg']) self.check_operation_nodes(graph, 'Neg', 'Neg', tf.float64, [15, 15, 20], ['Exp']) self.check_operation_nodes(graph, 'Exp', 'Exp', tf.float64, [15, 15, 20], ['Sum_1']) self.check_operation_nodes(graph, 'Sum_1', 'Sum', tf.float64, [15, 20], []) with self.test_session(): with TowerContext('', is_training=False): tf.initialize_all_variables().run() result = result.eval() assert_equal(result, expected_result)
def _build_vde_trainer(self, input, model): """ Args: input (InputSource): model (VDEModelDesc): """ # Build the graph self.tower_func = TowerFuncWrapper( model.build_graph, model.get_input_signature()) with TowerContext('', is_training=True): self.tower_func(*input.get_input_tensors()) opt = model.get_optimizer() with tf.name_scope('optimize'): vde_min = opt.minimize(model.total_loss, var_list=[model.encode_vars, model.predict_vars, model.decode_vars], name='train_op') self.train_op = vde_min
def __init__(self, input, model): super(S2BTrainer, self).__init__() # assert isinstance(model, GANModelDesc), model inputs_desc = model.get_inputs_desc() # Setup input cbs = input.setup(inputs_desc) for cb in cbs: self.register_callback(cb) """ We need to set tower_func because it's a TowerTrainer, and only TowerTrainer supports automatic graph creation for inference during training. If we don't care about inference during training, using tower_func is not needed. Just calling model.build_graph directly is OK. """ # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, inputs_desc) with TowerContext('', is_training=True): self.tower_func(*input.get_input_tensors()) opt = model.get_optimizer() # Define the training iteration # by default, run one d_min after one g_min with tf.name_scope('Optimize'): self.train_op_d = opt.minimize(model.l_gan_d, var_list=model.d_vars, name='Train_Op_d') # with tf.control_dependencies([train_op_d]): train_op_gan_g = opt.minimize(model.l_gan_g, var_list=model.g_vars, name='Train_Op_gan_g') train_op_const = opt.minimize(model.l_const, var_list=model.g_vars, name='Train_Op_const') train_op_tid = opt.minimize(model.l_tid, var_list=model.g_vars, name='Train_Op_tid') train_op_tv = opt.minimize(model.l_tv, var_list=model.g_vars, name='Train_Op_tv') train_op_g = tf.group(train_op_gan_g, train_op_const, train_op_tid, train_op_tv) with tf.control_dependencies([train_op_g]): train_op_c_g = opt.minimize(model.l_c, var_list=model.c_vars + model.g_vars, name='Train_Op_c_g') self.train_op_c_g = train_op_c_g self.d_uncertainty = model.d_uncertainty self.threshold = model.d_uncertainty_threshold
def __init__(self, config): """ Args: config (PredictConfig): the config to use. """ self.graph = config._maybe_create_graph() with self.graph.as_default(): input = PlaceholderInput() input.setup(config.inputs_desc) with TowerContext('', is_training=False): config.tower_func(*input.get_input_tensors()) input_tensors = get_tensors_by_names(config.input_names) output_tensors = get_tensors_by_names(config.output_names) config.session_init._setup_graph() sess = config.session_creator.create_session() config.session_init._run_init(sess) super(OfflinePredictor, self).__init__( input_tensors, output_tensors, config.return_input, sess)
def __init__(self, input, model): super(BNNTrainer, self).__init__() cbs = input.setup(model.get_inputs_desc()) self.register_callback(cbs) self.tower_func = TowerFuncWrapper( model.build_graph, model.get_inputs_desc()) with TowerContext('', is_training=True): self.tower_func(*input.get_input_tensors()) opt = model.get_optimizer() with tf.name_scope('optimize'): opt_op = opt.minimize( model.inf_loss, var_list=model.inf_vars, name='inf_op') if len(model.map_vars) > 0: with tf.control_dependencies([opt_op]): opt_op = opt.minimize( model.map_loss, var_list=model.map_vars, name='map_op') self.train_op = opt_op
def __init__(self, input, model): super(GANTrainer, self).__init__() assert isinstance(model, GANModelDesc), model inputs_desc = model.get_inputs_desc() # Setup input cbs = input.setup(inputs_desc) self.register_callback(cbs) self.model = model """ We need to set tower_func because it's a TowerTrainer, and only TowerTrainer supports automatic graph creation for inference during training. If we don't care about inference during training, using tower_func is not needed. Just calling model.build_graph directly is OK. """ # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, inputs_desc) with TowerContext('', is_training=True): self.tower_func(*input.get_input_tensors()) opt = model.get_optimizer() # Define the training iteration # by default, run one d_min after one g_min with tf.name_scope('optimize'): g_min_grad = opt.compute_gradients(model.g_loss, var_list=model.g_vars) g_min_grad_clip = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in g_min_grad] g_min_train_op = opt.apply_gradients(g_min_grad_clip, name='g_op') with tf.control_dependencies([g_min_train_op]): d_min_grad = opt.compute_gradients(model.d_loss, var_list=model.d_vars) d_min_grad_clip = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in d_min_grad] d_min_train_op = opt.apply_gradients(d_min_grad_clip, name='d_op') self.train_op = d_min_train_op
def train(self): # Create session tfconfig = tf.ConfigProto(allow_soft_placement=True) tfconfig.gpu_options.allow_growth = True sess = tf.Session(config=tfconfig) losser = [] with sess.graph.as_default(): tf.set_random_seed(cfg.FLAGS.rng_seed) with TowerContext('', is_training=False): layers = self.net.create_architecture(sess, "TRAIN", self.imdb.num_classes, tag='default') loss = layers['total_loss'] lr = tf.Variable(cfg.FLAGS.learning_rate, trainable=False) momentum = cfg.FLAGS.momentum optimizer = tf.train.MomentumOptimizer(lr, momentum) # optimizer = tf.train.AdamOptimizer(lr) gvs = optimizer.compute_gradients(loss) # Double bias # Double the gradient of the bias if set if cfg.FLAGS.double_bias: final_gvs = [] with tf.variable_scope('Gradient_Mult'): for grad, var in gvs: scale = 1. if cfg.FLAGS.double_bias and '/biases:' in var.name: scale *= 2. if not np.allclose(scale, 1.0): grad = tf.multiply(grad, scale) final_gvs.append((grad, var)) train_op = optimizer.apply_gradients(final_gvs) else: train_op = optimizer.apply_gradients(gvs) # We will handle the snapshots ourselves self.saver = tf.train.Saver(max_to_keep=100000) # Write the train and validation information to tensorboard #writer = tf.summary.FileWriter(self.tbdir, sess.graph) #valwriter = tf.summary.FileWriter(self.tbvaldir) # Load weights # Fresh train directly from ImageNet weights #print('Loading initial model weights from {:s}'.format(cfg.FLAGS.pretrained_model)) variables = tf.global_variables() # Initialize all variables first # pretrained_model = r'F:\GhostNet\ghostnet\models\ghostnet_checkpoint' sess.run(tf.variables_initializer(variables, name='init')) var_keep_dic = self.get_variables_in_checkpoint_file(pretrained_model) #var_keep_dic = get_model_loader(pretrained_model) # Get the variables to restore, ignorizing the variables to fix variables_to_restore = self.net.get_variables_to_restore( variables, var_keep_dic) restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, pretrained_model) print('Loaded.') # Need to fix the variables before loading, so that the RGB weights are changed to BGR # For VGG16 it also changes the convolutional weights fc6 and fc7 to # fully connected weights self.net.fix_variables(sess, pretrained_model) print('Fixed.') sess.run(tf.assign(lr, cfg.FLAGS.learning_rate)) last_snapshot_iter = 0 timer = Timer() iter = last_snapshot_iter + 1 last_summary_time = time.time() fig = plt.figure() arx = fig.add_subplot(1, 1, 1) while iter < cfg.FLAGS.max_iters + 1: # Learning rate if iter == cfg.FLAGS.step_size + 1: # Add snapshot here before reducing the learning rate # self.snapshot(sess, iter) sess.run( tf.assign(lr, cfg.FLAGS.learning_rate * cfg.FLAGS.gamma)) timer.tic() # Get training data, one batch at a time blobs = self.data_layer.forward() # Compute the graph without summary rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss = self.net.train_step( sess, blobs, train_op) timer.toc() iter += 1 # Display training information if iter % (cfg.FLAGS.display) == 0: losser.append(total_loss) print('iter: %d / %d, total loss: %.6f\n >>> rpn_loss_cls: %.6f\n ' '>>> rpn_loss_box: %.6f\n >>> loss_cls: %.6f\n >>> loss_box: %.6f\n ' % \ (iter, cfg.FLAGS.max_iters, total_loss, rpn_loss_cls, rpn_loss_box, loss_cls, loss_box)) print('speed: {:.3f}s / iter'.format(timer.average_time)) #arx.cla() #arx.plot(losser,'bo-') #plt.pause(0.1) if iter % cfg.FLAGS.snapshot_iterations == 0: self.snapshot(sess, iter)
def _add_forward_graph(self, student=0.5): """NN architecture.""" def shufflenet_unit(l, out_channel, group, stride): in_shape = l.get_shape().as_list() in_channel = in_shape[1] shortcut = l first_split = group if in_channel != 24 else 1 l = Conv2D('conv1', l, out_channel // 4, kernel_shape=1, split=first_split, nl=BNReLU) l = channel_shuffle(l, group) l = DepthConv('dconv', l, out_channel // 4, kernel_shape=3, nl=BN, stride=stride) l = Conv2D('conv2', l, out_channel if stride == 1 else out_channel - in_channel, kernel_shape=1, split=group, nl=BN) if stride == 1: output = tf.nn.relu(shortcut + l) else: shortcut = AvgPooling('avgpool', shortcut, 3, 2, padding='SAME') output = tf.concat([shortcut, tf.nn.relu(l)], axis=1) return output def shufflenet_unit_add(l, out_channel, group, stride): in_shape = l.get_shape().as_list() in_channel = in_shape[1] shortcut = l first_split = group if in_channel != 24 else 1 l = Conv2D('conv1', l, out_channel // 4, kernel_shape=1, split=first_split, nl=BNReLU) l = channel_shuffle(l, group) l = DepthConv('dconv', l, out_channel // 4, kernel_shape=3, nl=BN, stride=stride) l = Conv2D('conv2', l, out_channel, kernel_shape=1, split=first_split, nl=BN) output = tf.nn.relu(shortcut + l) return output def shufflenet_unit_no_shortcut(l, out_channel, group, stride): in_shape = l.get_shape().as_list() in_channel = in_shape[1] first_split = group if in_channel != 24 else 1 l = Conv2D('conv1', l, out_channel // 4, kernel_shape=1, split=first_split, nl=BNReLU) l = channel_shuffle(l, group) l = DepthConv('dconv', l, out_channel // 4, kernel_shape=3, nl=BN, stride=stride) l = Conv2D('conv2', l, out_channel, kernel_shape=1, split=first_split, nl=BN) output = tf.nn.relu(l) return output mc = self.mc with argscope([Conv2D, MaxPooling, AvgPooling, GlobalAvgPooling, BatchNorm], data_format='NCHW'), \ argscope(Conv2D, use_bias=False): with TowerContext('', is_training=mc.IS_TRAINING): group = 3 channels = [ int(240 * student), int(480 * student), int(960 * student) ] l = tf.transpose(self.image_input, [0, 3, 1, 2]) l = Conv2D('conv1', l, 24, 3, stride=1, nl=BNReLU) l = MaxPooling('pool1', l, 3, 2, padding='SAME') with tf.variable_scope('group1'): for i in range(4): with tf.variable_scope('block{}'.format(i)): l = shufflenet_unit(l, channels[0], group, 2 if i == 0 else 1) with tf.variable_scope('group2'): for i in range(6): with tf.variable_scope('block{}'.format(i)): l = shufflenet_unit(l, channels[1], group, 2 if i == 0 else 1) with tf.variable_scope('group3'): for i in range(4): with tf.variable_scope('block{}'.format(i)): l = shufflenet_unit(l, channels[2], group, 2 if i == 0 else 1) with tf.variable_scope('added3'): with tf.variable_scope('block{}'.format(0)): l = shufflenet_unit_add(l, int(960 * student), 3, 1) with tf.variable_scope('block{}'.format(1)): l = shufflenet_unit_no_shortcut( l, int(768 * student), 3, 1) #768, 384, 192 l = tf.transpose(l, [0, 2, 3, 1]) dropout11 = tf.nn.dropout(l, self.keep_prob, name='drop11') num_output = mc.ANCHOR_PER_GRID * (mc.CLASSES + 1 + 4) self.preds = self._conv_layer_no_pretrain('conv12', dropout11, filters=num_output, size=3, stride=1, padding='SAME', xavier=False, relu=False, stddev=0.0001)
def _add_forward_graph(self, student=0.5): """NN architecture.""" self.image_input, self.input_mask, self.box_delta_input, \ self.box_input, self.labels, self.mimic_mask, self.mimic_mask2 = self.batch_data_queue.dequeue() def shufflenet_unit_supervisor(l, out_channel, group, stride): in_shape = l.get_shape().as_list() in_channel = in_shape[1] shortcut = l # We do not apply group convolution on the first pointwise layer # because the number of input channels is relatively small. first_split = group if in_channel != 16 else 1 l = Conv2D('conv1', l, out_channel // 4, kernel_shape=1, split=first_split, nl=BNReLU) l = channel_shuffle(l, group) l = DepthConv('dconv', l, out_channel // 4, kernel_shape=3, nl=BN, stride=stride) l = Conv2D('conv2', l, out_channel if stride == 1 else out_channel - in_channel, kernel_shape=1, split=first_split, nl=BN) if stride == 1: # unit (b) output = tf.nn.relu(shortcut + l) else: # unit (c) shortcut = AvgPooling('avgpool', shortcut, 3, 2, padding='SAME') output = tf.concat([shortcut, tf.nn.relu(l)], axis=1) return output def shufflenet_unit_add_supervisor(l, out_channel, group, stride): in_shape = l.get_shape().as_list() in_channel = in_shape[1] shortcut = l # We do not apply group convolution on the first pointwise layer # because the number of input channels is relatively small. first_split = group if in_channel != 24 else 1 l = Conv2D('conv1', l, out_channel // 4, kernel_shape=1, split=first_split, nl=BNReLU) l = channel_shuffle(l, group) l = DepthConv('dconv', l, out_channel // 4, kernel_shape=3, nl=BN, stride=stride) l = Conv2D('conv2', l, out_channel, kernel_shape=1, split=first_split, nl=BN) output = tf.nn.relu(shortcut + l) return output def shufflenet_unit_no_shortcut_supervisor(l, out_channel, group, stride): in_shape = l.get_shape().as_list() in_channel = in_shape[1] # We do not apply group convolution on the first pointwise layer # because the number of input channels is relatively small. first_split = group if in_channel != 24 else 1 l = Conv2D('conv1', l, out_channel // 4, kernel_shape=1, split=first_split, nl=BNReLU) l = channel_shuffle(l, group) l = DepthConv('dconv', l, out_channel // 4, kernel_shape=3, nl=BN, stride=stride) l = Conv2D('conv2', l, out_channel, kernel_shape=1, split=first_split, nl=BN) output = tf.nn.relu(l) return output def shufflenet_unit(l, out_channel, group, stride): in_shape = l.get_shape().as_list() in_channel = in_shape[1] shortcut = l # We do not apply group convolution on the first pointwise layer # because the number of input channels is relatively small. first_split = group if in_channel != 24 else 1 l = Conv2D('conv1', l, out_channel // 4, kernel_shape=1, split=first_split, nl=c_BNReLU) l = channel_shuffle(l, group) l = DepthConv('dconv', l, out_channel // 4, kernel_shape=3, nl=c_BN, stride=stride) l = Conv2D('conv2', l, out_channel if stride == 1 else out_channel - in_channel, kernel_shape=1, split=group, nl=c_BN) if stride == 1: # unit (b) output = tf.nn.relu(shortcut + l) else: # unit (c) shortcut = AvgPooling('avgpool', shortcut, 3, 2, padding='SAME') output = tf.concat([shortcut, tf.nn.relu(l)], axis=1) return output def shufflenet_unit_add(l, out_channel, group, stride): in_shape = l.get_shape().as_list() in_channel = in_shape[1] shortcut = l # We do not apply group convolution on the first pointwise layer # because the number of input channels is relatively small. first_split = group if in_channel != 24 else 1 l = Conv2D('conv1', l, out_channel // 4, kernel_shape=1, split=first_split, nl=c_BNReLU) l = channel_shuffle(l, group) l = DepthConv('dconv', l, out_channel // 4, kernel_shape=3, nl=c_BN, stride=stride) l = Conv2D('conv2', l, out_channel, kernel_shape=1, split=first_split, nl=c_BN) output = tf.nn.relu(shortcut + l) return output def shufflenet_unit_no_shortcut(l, out_channel, group, stride): in_shape = l.get_shape().as_list() in_channel = in_shape[1] # We do not apply group convolution on the first pointwise layer # because the number of input channels is relatively small. first_split = group if in_channel != 24 else 1 l = Conv2D('conv1', l, out_channel // 4, kernel_shape=1, split=first_split, nl=c_BNReLU) l = channel_shuffle(l, group) l = DepthConv('dconv', l, out_channel // 4, kernel_shape=3, nl=c_BN, stride=stride) l = Conv2D('conv2', l, out_channel, kernel_shape=1, split=first_split, nl=c_BN) output = tf.nn.relu(l) return output mc = self.mc # if mc.LOAD_PRETRAINED_MODEL: # assert tf.gfile.Exists(mc.PRETRAINED_MODEL_PATH), \ # 'Cannot find pretrained model at the given path:' \ # ' {}'.format(mc.PRETRAINED_MODEL_PATH) with argscope([Conv2D, MaxPooling, AvgPooling, GlobalAvgPooling, BatchNorm], data_format='NCHW'), \ argscope(Conv2D, use_bias=False): with TowerContext(tf.get_default_graph().get_name_scope(), is_training=False): with tf.variable_scope('shuffleDet_supervisor'): group = 3 channels = [240, 480, 960] l = tf.transpose(self.image_input, [0, 3, 1, 2]) l = Conv2D('conv1', l, 16, 3, stride=1, nl=BNReLU) l = MaxPooling('pool1', l, 3, 2, padding='SAME') with tf.variable_scope('group1'): for i in range(4): with tf.variable_scope('block{}'.format(i)): l = shufflenet_unit_supervisor( l, channels[0], group, 2 if i == 0 else 1) with tf.variable_scope('group2'): for i in range(6): with tf.variable_scope('block{}'.format(i)): l = shufflenet_unit_supervisor( l, channels[1], group, 2 if i == 0 else 1) with tf.variable_scope('group3'): for i in range(4): with tf.variable_scope('block{}'.format(i)): l = shufflenet_unit_supervisor( l, channels[2], group, 2 if i == 0 else 1) with tf.variable_scope('added3'): with tf.variable_scope('block{}'.format(0)): l = shufflenet_unit_add_supervisor(l, 960, 3, 1) with tf.variable_scope('block{}'.format(1)): l = shufflenet_unit_no_shortcut_supervisor( l, 768, 3, 1) supervisor_last_feature = tf.transpose(l, [0, 2, 3, 1]) self.inspect_last_feature = supervisor_last_feature with argscope( c_batch_norm, is_main_training_tower=int( tf.get_default_graph().get_name_scope()[-1]) == 0, data_format='NCHW'): with TowerContext( tf.get_default_graph().get_name_scope(), is_training=mc.IS_TRAINING, index=int( tf.get_default_graph().get_name_scope()[-1])): # with TowerContext(tf.get_default_graph().get_name_scope(), is_training=mc.IS_TRAINING): group = 3 # channels = [120, 240, 480] channels = [ int(240 * student), int(480 * student), int(960 * student) ] l = tf.transpose(self.image_input, [0, 3, 1, 2]) l = Conv2D('conv1', l, 24, 3, stride=1, nl=c_BNReLU) l = MaxPooling('pool1', l, 3, 2, padding='SAME') with tf.variable_scope('group1'): for i in range(4): with tf.variable_scope('block{}'.format(i)): l = shufflenet_unit(l, channels[0], group, 2 if i == 0 else 1) with tf.variable_scope('group2'): for i in range(6): with tf.variable_scope('block{}'.format(i)): l = shufflenet_unit(l, channels[1], group, 2 if i == 0 else 1) with tf.variable_scope('group3'): for i in range(4): with tf.variable_scope('block{}'.format(i)): l = shufflenet_unit(l, channels[2], group, 2 if i == 0 else 1) with tf.variable_scope('added3'): with tf.variable_scope('block{}'.format(0)): l = shufflenet_unit_add(l, int(960 * student), 3, 1) with tf.variable_scope('block{}'.format(1)): l = shufflenet_unit_no_shortcut( l, int(768 * student), 3, 1) # 768, 384, 192 l = tf.transpose(l, [0, 2, 3, 1]) with tf.variable_scope('adaptation'): student_adap = self._conv_layer_no_pretrain( 'conv', l, filters=768, size=3, stride=1, padding='SAME', xavier=False, relu=True, stddev=0.0001) # student_adap = Conv2D('conv', l, 768, 3, data_format='channels_last',nl=RELU) ###add for mimic with tf.variable_scope('mimic_loss'): mimic_mask = tf.cast(tf.expand_dims(self.mimic_mask, axis=-1), tf.float32) # this normalization is maybe too harsh # mask mimic if student == 0.5: normalization = tf.reduce_sum(mimic_mask) * 2. else: normalization = tf.reduce_sum(mimic_mask) * 4. self.mimic_loss = tf.div( tf.reduce_sum( tf.square(supervisor_last_feature - student_adap) * mimic_mask), normalization) if self.without_imitation: self.mimic_loss = self.mimic_loss * 0. tf.add_to_collection('losses', self.mimic_loss) dropout11 = tf.nn.dropout(l, self.keep_prob, name='drop11') num_output = mc.ANCHOR_PER_GRID * (mc.CLASSES + 1 + 4) self.preds = self._conv_layer_no_pretrain('conv12', dropout11, filters=num_output, size=3, stride=1, padding='SAME', xavier=False, relu=False, stddev=0.0001)
'our server and place them properly?').format(tfmodel + '.meta')) # set config tfconfig = tf.ConfigProto(allow_soft_placement=True) tfconfig.gpu_options.allow_growth = True # init session sess = tf.Session(config=tfconfig) # load network if demonet == 'mobilenetv1': net = GhostNet(batch_size=1) # elif demonet == 'res101': # net = resnetv1(batch_size=1, num_layers=101) else: raise NotImplementedError with TowerContext('', is_training=False): net.create_architecture(sess, "TEST", classes_num, tag='default', anchor_scales=[8, 16, 32]) saver = tf.train.Saver() saver.restore(sess, tfmodel) print('Loaded network {:s}'.format(tfmodel)) #im_names = ['000001.jpg', '000002.jpg', '000003.jpg', '000004.jpg', # '000005.jpg', '000006.jpg'] path1 = input("请输入测试图片的路径:") im_names = os.listdir(path1)