Beispiel #1
0
 def __init__(self, config):
     self._nr_gpu = config.nr_tower
     assert self._nr_gpu > 1
     self._raw_devices = ['/gpu:{}'.format(k) for k in config.tower]
     self._input_source = StagingInputWrapper(QueueInput(config.dataflow),
                                              self._raw_devices)
     super(MultiGPUGANTrainer, self).__init__(config)
Beispiel #2
0
    def __init__(self, config):
        nr_gpu = config.nr_tower
        assert nr_gpu > 1
        raw_devices = ['/gpu:{}'.format(k) for k in config.tower]

        # setup input
        input = StagingInputWrapper(QueueInput(config.dataflow), raw_devices)
        model = config.model
        cbs = input.setup(model.get_inputs_desc())
        config.callbacks.extend(cbs)

        def get_cost():
            model.build_graph(input)
            return [model.d_loss, model.g_loss]

        devices = [
            LeastLoadedDeviceSetter(d, raw_devices) for d in raw_devices
        ]
        cost_list = MultiGPUTrainerBase.build_on_multi_tower(
            config.tower, get_cost, devices)
        # simply average the cost. It might get faster to average the gradients
        with tf.name_scope('optimize'):
            d_loss = tf.add_n([x[0] for x in cost_list]) * (1.0 / nr_gpu)
            g_loss = tf.add_n([x[1] for x in cost_list]) * (1.0 / nr_gpu)

            opt = model.get_optimizer()
            # run one d_min after one g_min
            g_min = opt.minimize(g_loss,
                                 var_list=model.g_vars,
                                 colocate_gradients_with_ops=True,
                                 name='g_op')
            with tf.control_dependencies([g_min]):
                d_min = opt.minimize(d_loss,
                                     var_list=model.d_vars,
                                     colocate_gradients_with_ops=True,
                                     name='d_op')
        self.train_op = d_min
        super(MultiGPUGANTrainer, self).__init__(config)