def _build_multigpu_gan_trainer(self, input, model, num_gpu): assert num_gpu > 1 raw_devices = ['/gpu:{}'.format(k) for k in range(num_gpu)] # Build the graph with multi-gpu replication def get_cost(*inputs): model.build_graph(*inputs) return [model.d_loss, model.g_loss] self.tower_func = TowerFuncWrapper(get_cost, model.get_input_signature()) devices = [ LeastLoadedDeviceSetter(d, raw_devices) for d in raw_devices ] cost_list = DataParallelBuilder.build_on_towers( list(range(num_gpu)), lambda: self.tower_func(*input.get_input_tensors()), devices) # For simplicity, average the cost here. It might be faster to average the gradients with tf.name_scope('optimize'): d_loss = tf.add_n([x[0] for x in cost_list]) * (1.0 / num_gpu) g_loss = tf.add_n([x[1] for x in cost_list]) * (1.0 / num_gpu) opt = model.get_optimizer() # run one d_min after one g_min g_min = opt.minimize(g_loss, var_list=model.g_vars, colocate_gradients_with_ops=True, name='g_op') with tf.control_dependencies([g_min]): d_min = opt.minimize(d_loss, var_list=model.d_vars, colocate_gradients_with_ops=True, name='d_op') # Define the training iteration self.train_op = d_min
def __init__(self, nr_gpu, input, model): super(MultiGPUGANTrainer, self).__init__() assert nr_gpu > 1 raw_devices = ['/gpu:{}'.format(k) for k in range(nr_gpu)] # setup input input = StagingInput(input, list(range(nr_gpu))) cbs = input.setup(model.get_inputs_desc()) def get_cost(*inputs): model.build_graph(inputs) return [model.d_loss, model.g_loss] tower_func = TowerFuncWrapper(get_cost, model.get_inputs_desc()) devices = [LeastLoadedDeviceSetter(d, raw_devices) for d in raw_devices] cost_list = DataParallelBuilder.build_on_towers( list(range(nr_gpu)), lambda: tower_func(*input.get_input_tensors()), devices) # simply average the cost. It might get faster to average the gradients with tf.name_scope('optimize'): d_loss = tf.add_n([x[0] for x in cost_list]) * (1.0 / nr_gpu) g_loss = tf.add_n([x[1] for x in cost_list]) * (1.0 / nr_gpu) opt = model.get_optimizer() # run one d_min after one g_min g_min = opt.minimize(g_loss, var_list=model.g_vars, colocate_gradients_with_ops=True, name='g_op') with tf.control_dependencies([g_min]): d_min = opt.minimize(d_loss, var_list=model.d_vars, colocate_gradients_with_ops=True, name='d_op') self.train_op = d_min self.set_tower_func(tower_func) for cb in cbs: self.register_callback(cb)
def _build_multigpu_trainer(self, input, model, num_gpu): assert num_gpu > 1, num_gpu raw_devices = ["/gpu:{}".format(k) for k in range(num_gpu)] # build the graph with multi-gpu replications def get_cost(*inputs): model.build_graph(*inputs) return model.loss self.tower_func = TowerFuncWrapper(get_cost, model.get_input_signature()) devices = [ LeastLoadedDeviceSetter(d, raw_devices) for d in raw_devices ] cost_list = DataParallelBuilder.build_on_towers( list(range(num_gpu)), lambda: self.tower_func(*input.get_input_tensors()), devices) # with tf.name_scope("optimize"): loss = tf.add_n(cost_list) * (1.0 / num_gpu) opt = model.get_optimizer() op_min = opt.minimize(loss, var_list=model.vars, colocate_gradients_with_op=True, name="op_min") self.train_op = op_min
def __init__(self, input_queue, model, gpus): """Initialize object.""" super(MultiGPUGANTrainer, self).__init__() if not gpus: raise ValueError('gpus must be strictly greater than 1.') raw_devices = ['/gpu:{}'.format(k) for k in gpus] # Setup input input_queue = StagingInput(input_queue) cbs = input_queue.setup(model.get_inputs_desc()) self.register_callback(cbs) # Build the graph with multi-gpu replication def get_cost(*inputs): model.build_graph(*inputs) return [model.d_loss, model.g_loss] self.tower_func = TowerFuncWrapper(get_cost, model.get_inputs_desc()) devices = [ LeastLoadedDeviceSetter(d, raw_devices) for d in raw_devices ] cost_list = DataParallelBuilder.build_on_towers( gpus, lambda: self.tower_func(*input_queue.get_input_tensors()), devices) # Simply average the cost here. It might be faster to average the gradients with tf.name_scope('optimize'): d_loss = tf.add_n([x[0] for x in cost_list]) * (1.0 / len(gpus)) g_loss = tf.add_n([x[1] for x in cost_list]) * (1.0 / len(gpus)) opt = model.get_optimizer() # run one d_min after one g_min g_min = opt.minimize(g_loss, var_list=model.g_vars, colocate_gradients_with_ops=True, name='g_op') with tf.control_dependencies([g_min]): d_min = opt.minimize(d_loss, var_list=model.d_vars, colocate_gradients_with_ops=True, name='d_op') # Define the training iteration self.train_op = d_min
def __init__(self, config): nr_gpu = config.nr_tower assert nr_gpu > 1 raw_devices = ['/gpu:{}'.format(k) for k in config.tower] # setup input input = StagingInputWrapper(QueueInput(config.dataflow), config.tower) model = config.model cbs = input.setup(model.get_inputs_desc()) config.callbacks.extend(cbs) def get_cost(): model.build_graph(input) return [model.d_loss, model.g_loss] devices = [ LeastLoadedDeviceSetter(d, raw_devices) for d in raw_devices ] cost_list = DataParallelBuilder.build_on_towers( config.tower, get_cost, devices) # simply average the cost. It might get faster to average the gradients with tf.name_scope('optimize'): d_loss = tf.add_n([x[0] for x in cost_list]) * (1.0 / nr_gpu) g_loss = tf.add_n([x[1] for x in cost_list]) * (1.0 / nr_gpu) opt = model.get_optimizer() # run one d_min after one g_min g_min = opt.minimize(g_loss, var_list=model.g_vars, colocate_gradients_with_ops=True, name='g_op') with tf.control_dependencies([g_min]): d_min = opt.minimize(d_loss, var_list=model.d_vars, colocate_gradients_with_ops=True, name='d_op') self.train_op = d_min super(MultiGPUGANTrainer, self).__init__(config)