def setup_graph2(self, inputs_desc, input, get_cost_fn, get_policy_fn, get_opt_fn): get_cost_fn = TowerFuncWrapper(get_cost_fn, inputs_desc) get_policy_fn = TowerFuncWrapper(get_policy_fn, inputs_desc) get_opt_fn = memoized(get_opt_fn) self.tower_func = get_cost_fn # TODO setup may want to register monitor as well?? input_callbacks = self._setup_input(inputs_desc, input) train_callbacks = self._setup_graph2(input, get_cost_fn, get_policy_fn, get_opt_fn) self.register_callback(input_callbacks + train_callbacks)
def __init__(self, input, model, d_period=1, g_period=1): """ Args: d_period(int): period of each d_opt run g_period(int): period of each g_opt run """ super(SeparateGANTrainer, self).__init__() self._d_period = int(d_period) self._g_period = int(g_period) assert min(d_period, g_period) == 1 # Setup input cbs = input.setup(model.get_inputs_desc()) self.register_callback(cbs) # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, model.get_inputs_desc()) with TowerContext('', is_training=True), \ argscope(BatchNorm, internal_update=True): # should not hook the updates to both train_op, it will hurt training speed. self.tower_func(*input.get_input_tensors()) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if len(update_ops): logger.warn("Found {} ops in UPDATE_OPS collection!".format(len(update_ops))) logger.warn("Using SeparateGANTrainer with UPDATE_OPS may hurt your training speed a lot!") opt = model.get_optimizer() with tf.name_scope('optimize'): self.d_min = opt.minimize( model.d_loss, var_list=model.d_vars, name='d_min') self.g_min = opt.minimize( model.g_loss, var_list=model.g_vars, name='g_min')
def _build_multigpu_gan_trainer(self, input, model, num_gpu): assert num_gpu > 1 raw_devices = ['/gpu:{}'.format(k) for k in range(num_gpu)] # Build the graph with multi-gpu replication def get_cost(*inputs): model.build_graph(*inputs) return [model.d_loss, model.g_loss] self.tower_func = TowerFuncWrapper(get_cost, model.get_input_signature()) devices = [ LeastLoadedDeviceSetter(d, raw_devices) for d in raw_devices ] cost_list = DataParallelBuilder.build_on_towers( list(range(num_gpu)), lambda: self.tower_func(*input.get_input_tensors()), devices) # For simplicity, average the cost here. It might be faster to average the gradients with tf.name_scope('optimize'): d_loss = tf.add_n([x[0] for x in cost_list]) * (1.0 / num_gpu) g_loss = tf.add_n([x[1] for x in cost_list]) * (1.0 / num_gpu) opt = model.get_optimizer() # run one d_min after one g_min g_min = opt.minimize(g_loss, var_list=model.g_vars, colocate_gradients_with_ops=True, name='g_op') with tf.control_dependencies([g_min]): d_min = opt.minimize(d_loss, var_list=model.d_vars, colocate_gradients_with_ops=True, name='d_op') # Define the training iteration self.train_op = d_min
def _build_multigpu_trainer(self, input, model, num_gpu): assert num_gpu > 1, num_gpu raw_devices = ["/gpu:{}".format(k) for k in range(num_gpu)] # build the graph with multi-gpu replications def get_cost(*inputs): model.build_graph(*inputs) return model.loss self.tower_func = TowerFuncWrapper(get_cost, model.get_input_signature()) devices = [ LeastLoadedDeviceSetter(d, raw_devices) for d in raw_devices ] cost_list = DataParallelBuilder.build_on_towers( list(range(num_gpu)), lambda: self.tower_func(*input.get_input_tensors()), devices) # with tf.name_scope("optimize"): loss = tf.add_n(cost_list) * (1.0 / num_gpu) opt = model.get_optimizer() op_min = opt.minimize(loss, var_list=model.vars, colocate_gradients_with_op=True, name="op_min") self.train_op = op_min
def __init__(self, input, model, d_period=1, g_period=1): """ Args: d_period(int): period of each d_opt run g_period(int): period of each g_opt run """ super(SeparateGANTrainer, self).__init__() self._d_period = int(d_period) self._g_period = int(g_period) assert min(d_period, g_period) == 1 # Setup input cbs = input.setup(model.get_inputs_desc()) self.register_callback(cbs) # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, model.get_inputs_desc()) with TowerContext('', is_training=True): self.tower_func(*input.get_input_tensors()) opt = model.get_optimizer() with tf.name_scope('optimize'): self.d_min = opt.minimize(model.d_loss, var_list=model.d_vars, name='d_min') self.g_min = opt.minimize(model.g_loss, var_list=model.g_vars, name='g_min')
def _build_gan_trainer(self, input, model): """ We need to set tower_func because it's a TowerTrainer, and only TowerTrainer supports automatic graph creation for inference during training. If we don't care about inference during training, using tower_func is not needed. Just calling model.build_graph directly is OK. """ # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, model.get_input_signature()) with TowerContext('', is_training=True): self.tower_func(*input.get_input_tensors()) opt = model.get_optimizer() # Define the training iteration # by default, run one d_min after one g_min with tf.name_scope('optimize'): g_min = opt.minimize(model.g_loss, var_list=model.g_vars, name='g_op') with tf.control_dependencies([g_min]): d_min = opt.minimize(model.d_loss, var_list=model.d_vars, name='d_op') self.train_op = d_min
def __init__(self, model, input_queue): """Initialize object.""" super().__init__() inputs_desc = model.get_inputs_desc() # Setup input cbs = input_queue.setup(inputs_desc) self.register_callback(cbs) # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, inputs_desc) with TowerContext('', is_training=True): self.tower_func(*input_queue.get_input_tensors()) opt = model.get_optimizer() # Define the training iteration by default, run one d_min after one g_min with tf.name_scope('optimize'): g_min_grad = opt.compute_gradients(model.g_loss, var_list=model.g_vars) g_min_grad_clip = [(tf.clip_by_value(grad, -5.0, 5.0), var) for grad, var in g_min_grad] g_min_train_op = opt.apply_gradients(g_min_grad_clip, name='g_op') with tf.control_dependencies([g_min_train_op]): d_min_grad = opt.compute_gradients(model.d_loss, var_list=model.d_vars) d_min_grad_clip = [(tf.clip_by_value(grad, -5.0, 5.0), var) for grad, var in d_min_grad] d_min_train_op = opt.apply_gradients(d_min_grad_clip, name='d_op') self.train_op = d_min_train_op
def __init__(self, input, model, d_period=1, g_period=1): """Initialize object.""" super(SeparateGANTrainer, self).__init__() self._d_period = int(d_period) self._g_period = int(g_period) if not min(d_period, g_period) == 1: raise ValueError( 'The minimum between d_period and g_period must be 1.') # Setup input cbs = input.setup(model.get_inputs_desc()) self.register_callback(cbs) # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, model.get_inputs_desc()) with TowerContext('', is_training=True): self.tower_func(*input.get_input_tensors()) opt = model.get_optimizer() with tf.name_scope('optimize'): self.d_min = opt.minimize(model.d_loss, var_list=model.d_vars, name='d_min') self.g_min = opt.minimize(model.g_loss, var_list=model.g_vars, name='g_min')
def __init__(self, input, model): super(GANTrainer, self).__init__() assert isinstance(model, GANModelDesc), model inputs_desc = model.get_inputs_desc() cbs = input.setup(inputs_desc) # we need to set towerfunc because it's a TowerTrainer, # and only TowerTrainer supports automatic graph creation for inference during training. tower_func = TowerFuncWrapper(model.build_graph, inputs_desc) with TowerContext('', is_training=True): tower_func(*input.get_input_tensors()) opt = model.get_optimizer() # by default, run one d_min after one g_min with tf.name_scope('optimize'): g_min = opt.minimize(model.g_loss, var_list=model.g_vars, name='g_op') with tf.control_dependencies([g_min]): d_min = opt.minimize(model.d_loss, var_list=model.d_vars, name='d_op') self.train_op = d_min self.set_tower_func(tower_func) for cb in cbs: self.register_callback(cb)
def __init__(self, input, model): """ Args: input (InputSource): model (GANModelDesc): """ super(GANTrainer, self).__init__() assert isinstance(model, GANModelDesc), model inputs_desc = model.get_inputs_desc() # Setup input cbs = input.setup(inputs_desc) self.register_callback(cbs) """ We need to set tower_func because it's a TowerTrainer, and only TowerTrainer supports automatic graph creation for inference during training. If we don't care about inference during training, using tower_func is not needed. Just calling model.build_graph directly is OK. """ # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, inputs_desc) with TowerContext('', is_training=True): self.tower_func(*input.get_input_tensors()) opt = model.get_optimizer() # Define the training iteration # by default, run one d_min after one g_min with tf.name_scope('optimize'): g_min = opt.minimize(model.g_loss, var_list=model.g_vars, name='g_op') with tf.control_dependencies([g_min]): d_min = opt.minimize(model.d_loss, var_list=model.d_vars, name='d_op') self.train_op = d_min
def __init__(self, nr_gpu, input, model): super(MultiGPUGANTrainer, self).__init__() assert nr_gpu > 1 raw_devices = ['/gpu:{}'.format(k) for k in range(nr_gpu)] # setup input input = StagingInput(input, list(range(nr_gpu))) cbs = input.setup(model.get_inputs_desc()) def get_cost(*inputs): model.build_graph(inputs) return [model.d_loss, model.g_loss] tower_func = TowerFuncWrapper(get_cost, model.get_inputs_desc()) devices = [LeastLoadedDeviceSetter(d, raw_devices) for d in raw_devices] cost_list = DataParallelBuilder.build_on_towers( list(range(nr_gpu)), lambda: tower_func(*input.get_input_tensors()), devices) # simply average the cost. It might get faster to average the gradients with tf.name_scope('optimize'): d_loss = tf.add_n([x[0] for x in cost_list]) * (1.0 / nr_gpu) g_loss = tf.add_n([x[1] for x in cost_list]) * (1.0 / nr_gpu) opt = model.get_optimizer() # run one d_min after one g_min g_min = opt.minimize(g_loss, var_list=model.g_vars, colocate_gradients_with_ops=True, name='g_op') with tf.control_dependencies([g_min]): d_min = opt.minimize(d_loss, var_list=model.d_vars, colocate_gradients_with_ops=True, name='d_op') self.train_op = d_min self.set_tower_func(tower_func) for cb in cbs: self.register_callback(cb)
def __init__(self, gan_input, a3c_input, gan_model, a3c_model): """ Args: input (InputSource): model (GANModelDesc): """ super(AGTrainer, self).__init__() assert isinstance(gan_model, GANModelDesc), gan_model gan_inputs_desc = gan_model.get_inputs_desc() a3c_inputs_desc = a3c_model.get_inputs_desc() self.register_callback( gan_input.setup(gan_inputs_desc) + a3c_input.setup(a3c_inputs_desc)) """ We need to set tower_func because it's a TowerTrainer, and only TowerTrainer supports automatic graph creation for inference during training. If we don't care about inference during training, using tower_func is not needed. Just calling model.build_graph directly is OK. """ # Build the graph self.tower_func = TowerFuncWrapper( lambda *x: [ gan_model.build_graph(*x[:len(gan_inputs_desc)]), a3c_model.build_graph(*x[len(gan_inputs_desc):]) ], gan_inputs_desc + a3c_inputs_desc) with TowerContext('', is_training=True): self.tower_func(*(gan_input.get_input_tensors() + a3c_input.get_input_tensors())) gan_opt = gan_model.get_optimizer() with tf.name_scope('gan_optimize'): self.d_min = gan_opt.minimize(gan_model.d_loss, var_list=gan_model.d_vars, name='d_min') self.g_min = gan_opt.minimize(gan_model.g_loss, var_list=gan_model.g_vars, name='g_min') a3c_opt = a3c_model.get_optimizer() with tf.name_scope('a3c_optimize'): self.a3c_min = a3c_opt.minimize(a3c_model.loss, var_list=a3c_model.vars, name='a3c_min') self.generator = self.get_predictor(['z'], ['gen/gen']) self.memory = GoalMemory(MEMORY_SIZE) self.env = WrappedEnv() def reset(s, goal): s.super().reset() s.goal = goal self.env.reset = reset
def _build_trainer(self, input, model): # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, model.inputs()) with TowerContext('', is_training=True): self.tower_func(*input.get_input_tensors()) # Define the training iteration with tf.name_scope("optimize"): opt = model.get_optimizer() op_min = opt.minimize(model.loss, var_list=model.vars, colocate_gradients_with_ops=True, name="op_min") self.train_op = op_min
def __init__(self, input_queue, model, gpus): """Initialize object.""" super(MultiGPUGANTrainer, self).__init__() if not gpus: raise ValueError('gpus must be strictly greater than 1.') raw_devices = ['/gpu:{}'.format(k) for k in gpus] # Setup input input_queue = StagingInput(input_queue) cbs = input_queue.setup(model.get_inputs_desc()) self.register_callback(cbs) # Build the graph with multi-gpu replication def get_cost(*inputs): model.build_graph(*inputs) return [model.d_loss, model.g_loss] self.tower_func = TowerFuncWrapper(get_cost, model.get_inputs_desc()) devices = [ LeastLoadedDeviceSetter(d, raw_devices) for d in raw_devices ] cost_list = DataParallelBuilder.build_on_towers( gpus, lambda: self.tower_func(*input_queue.get_input_tensors()), devices) # Simply average the cost here. It might be faster to average the gradients with tf.name_scope('optimize'): d_loss = tf.add_n([x[0] for x in cost_list]) * (1.0 / len(gpus)) g_loss = tf.add_n([x[1] for x in cost_list]) * (1.0 / len(gpus)) opt = model.get_optimizer() # run one d_min after one g_min g_min = opt.minimize(g_loss, var_list=model.g_vars, colocate_gradients_with_ops=True, name='g_op') with tf.control_dependencies([g_min]): d_min = opt.minimize(d_loss, var_list=model.d_vars, colocate_gradients_with_ops=True, name='d_op') # Define the training iteration self.train_op = d_min
def _build_vde_trainer(self, input, model): """ Args: input (InputSource): model (VDEModelDesc): """ # Build the graph self.tower_func = TowerFuncWrapper( model.build_graph, model.get_input_signature()) with TowerContext('', is_training=True): self.tower_func(*input.get_input_tensors()) opt = model.get_optimizer() with tf.name_scope('optimize'): vde_min = opt.minimize(model.total_loss, var_list=[model.encode_vars, model.predict_vars, model.decode_vars], name='train_op') self.train_op = vde_min
def get_inference_func(self, attacker): """ Returns a tower function to be used for inference. It generates adv images with the given attacker and runs classification on it. """ def tower_func(image, label): assert not get_current_tower_context().is_training image = self.image_preprocess(image) image = tf.transpose(image, [0, 3, 1, 2]) image, target_label = attacker.attack(image, label, self.get_logits) logits = self.get_logits(image) ImageNetModel.compute_loss_and_error( logits, label) # compute top-1 and top-5 AdvImageNetModel.compute_attack_success(logits, target_label) return TowerFuncWrapper(tower_func, self.get_inputs_desc())
def __init__(self, input, model): super(S2BTrainer, self).__init__() # assert isinstance(model, GANModelDesc), model inputs_desc = model.get_inputs_desc() # Setup input cbs = input.setup(inputs_desc) for cb in cbs: self.register_callback(cb) """ We need to set tower_func because it's a TowerTrainer, and only TowerTrainer supports automatic graph creation for inference during training. If we don't care about inference during training, using tower_func is not needed. Just calling model.build_graph directly is OK. """ # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, inputs_desc) with TowerContext('', is_training=True): self.tower_func(*input.get_input_tensors()) opt = model.get_optimizer() # Define the training iteration # by default, run one d_min after one g_min with tf.name_scope('Optimize'): self.train_op_d = opt.minimize(model.l_gan_d, var_list=model.d_vars, name='Train_Op_d') # with tf.control_dependencies([train_op_d]): train_op_gan_g = opt.minimize(model.l_gan_g, var_list=model.g_vars, name='Train_Op_gan_g') train_op_const = opt.minimize(model.l_const, var_list=model.g_vars, name='Train_Op_const') train_op_tid = opt.minimize(model.l_tid, var_list=model.g_vars, name='Train_Op_tid') train_op_tv = opt.minimize(model.l_tv, var_list=model.g_vars, name='Train_Op_tv') train_op_g = tf.group(train_op_gan_g, train_op_const, train_op_tid, train_op_tv) with tf.control_dependencies([train_op_g]): train_op_c_g = opt.minimize(model.l_c, var_list=model.c_vars + model.g_vars, name='Train_Op_c_g') self.train_op_c_g = train_op_c_g self.d_uncertainty = model.d_uncertainty self.threshold = model.d_uncertainty_threshold
def __init__(self, input, model): super(BNNTrainer, self).__init__() cbs = input.setup(model.get_inputs_desc()) self.register_callback(cbs) self.tower_func = TowerFuncWrapper( model.build_graph, model.get_inputs_desc()) with TowerContext('', is_training=True): self.tower_func(*input.get_input_tensors()) opt = model.get_optimizer() with tf.name_scope('optimize'): opt_op = opt.minimize( model.inf_loss, var_list=model.inf_vars, name='inf_op') if len(model.map_vars) > 0: with tf.control_dependencies([opt_op]): opt_op = opt.minimize( model.map_loss, var_list=model.map_vars, name='map_op') self.train_op = opt_op
def __init__(self, input, model): super(GANTrainer, self).__init__() assert isinstance(model, GANModelDesc), model inputs_desc = model.get_inputs_desc() # Setup input cbs = input.setup(inputs_desc) self.register_callback(cbs) self.model = model """ We need to set tower_func because it's a TowerTrainer, and only TowerTrainer supports automatic graph creation for inference during training. If we don't care about inference during training, using tower_func is not needed. Just calling model.build_graph directly is OK. """ # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, inputs_desc) with TowerContext('', is_training=True): self.tower_func(*input.get_input_tensors()) opt = model.get_optimizer() # Define the training iteration # by default, run one d_min after one g_min with tf.name_scope('optimize'): g_min_grad = opt.compute_gradients(model.g_loss, var_list=model.g_vars) g_min_grad_clip = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in g_min_grad] g_min_train_op = opt.apply_gradients(g_min_grad_clip, name='g_op') with tf.control_dependencies([g_min_train_op]): d_min_grad = opt.compute_gradients(model.d_loss, var_list=model.d_vars) d_min_grad_clip = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in d_min_grad] d_min_train_op = opt.apply_gradients(d_min_grad_clip, name='d_op') self.train_op = d_min_train_op