Example #1
0
    def __init__(self, nr_gpu, input, model):
        super(MultiGPUGANTrainer, self).__init__()
        assert nr_gpu > 1
        raw_devices = ['/gpu:{}'.format(k) for k in range(nr_gpu)]

        # setup input
        input = StagingInput(input, list(range(nr_gpu)))
        cbs = input.setup(model.get_inputs_desc())

        def get_cost(*inputs):
            model.build_graph(inputs)
            return [model.d_loss, model.g_loss]
        tower_func = TowerFuncWrapper(get_cost, model.get_inputs_desc())
        devices = [LeastLoadedDeviceSetter(d, raw_devices) for d in raw_devices]
        cost_list = DataParallelBuilder.build_on_towers(
            list(range(nr_gpu)),
            lambda: tower_func(*input.get_input_tensors()),
            devices)
        # simply average the cost. It might get faster to average the gradients
        with tf.name_scope('optimize'):
            d_loss = tf.add_n([x[0] for x in cost_list]) * (1.0 / nr_gpu)
            g_loss = tf.add_n([x[1] for x in cost_list]) * (1.0 / nr_gpu)

            opt = model.get_optimizer()
            # run one d_min after one g_min
            g_min = opt.minimize(g_loss, var_list=model.g_vars,
                                 colocate_gradients_with_ops=True, name='g_op')
            with tf.control_dependencies([g_min]):
                d_min = opt.minimize(d_loss, var_list=model.d_vars,
                                     colocate_gradients_with_ops=True, name='d_op')
        self.train_op = d_min
        self.set_tower_func(tower_func)
        for cb in cbs:
            self.register_callback(cb)
Example #2
0
    def __init__(self, nr_gpu, input, model):
        super(MultiGPUGANTrainer, self).__init__()
        assert nr_gpu > 1
        raw_devices = ['/gpu:{}'.format(k) for k in range(nr_gpu)]

        # Setup input
        input = StagingInput(input)
        cbs = input.setup(model.get_inputs_desc())
        self.register_callback(cbs)

        # Build the graph with multi-gpu replication
        def get_cost(*inputs):
            model.build_graph(*inputs)
            return [model.d_loss, model.g_loss]
        self.tower_func = TowerFuncWrapper(get_cost, model.get_inputs_desc())
        devices = [LeastLoadedDeviceSetter(d, raw_devices) for d in raw_devices]
        cost_list = DataParallelBuilder.build_on_towers(
            list(range(nr_gpu)),
            lambda: self.tower_func(*input.get_input_tensors()),
            devices)
        # Simply average the cost here. It might be faster to average the gradients
        with tf.name_scope('optimize'):
            d_loss = tf.add_n([x[0] for x in cost_list]) * (1.0 / nr_gpu)
            g_loss = tf.add_n([x[1] for x in cost_list]) * (1.0 / nr_gpu)

            opt = model.get_optimizer()
            # run one d_min after one g_min
            g_min = opt.minimize(g_loss, var_list=model.g_vars,
                                 colocate_gradients_with_ops=True, name='g_op')
            with tf.control_dependencies([g_min]):
                d_min = opt.minimize(d_loss, var_list=model.d_vars,
                                     colocate_gradients_with_ops=True, name='d_op')
        # Define the training iteration
        self.train_op = d_min
Example #3
0
    def __init__(self, input_queue, model, gpus):
        """Initialize object."""
        super(MultiGPUGANTrainer, self).__init__()
        if not gpus:
            raise ValueError('gpus must be strictly greater than 1.')

        raw_devices = ['/gpu:{}'.format(k) for k in gpus]

        # Setup input
        input_queue = StagingInput(input_queue)
        cbs = input_queue.setup(model.get_inputs_desc())
        self.register_callback(cbs)

        # Build the graph with multi-gpu replication
        def get_cost(*inputs):
            model.build_graph(*inputs)
            return [model.d_loss, model.g_loss]

        self.tower_func = TowerFuncWrapper(get_cost, model.get_inputs_desc())
        devices = [
            LeastLoadedDeviceSetter(d, raw_devices) for d in raw_devices
        ]

        cost_list = DataParallelBuilder.build_on_towers(
            gpus, lambda: self.tower_func(*input_queue.get_input_tensors()),
            devices)

        # Simply average the cost here. It might be faster to average the gradients
        with tf.name_scope('optimize'):
            d_loss = tf.add_n([x[0] for x in cost_list]) * (1.0 / len(gpus))
            g_loss = tf.add_n([x[1] for x in cost_list]) * (1.0 / len(gpus))

            opt = model.get_optimizer()
            # run one d_min after one g_min
            g_min = opt.minimize(g_loss,
                                 var_list=model.g_vars,
                                 colocate_gradients_with_ops=True,
                                 name='g_op')

            with tf.control_dependencies([g_min]):
                d_min = opt.minimize(d_loss,
                                     var_list=model.d_vars,
                                     colocate_gradients_with_ops=True,
                                     name='d_op')

        # Define the training iteration
        self.train_op = d_min
Example #4
0
    def __init__(self, input, model, num_gpu=1):
        """
        Args:
            input (InputSource):
            model (VDEModelDesc):
        """
        super(Trainer, self).__init__()
        assert isinstance(model, ModelDesc), model

        if num_gpu > 1:
            input = StagingInput(input)

        # Setup input
        cbs = input.setup(model.get_input_signature())
        self.register_callback(cbs)

        assert num_gpu <= 1, "Should be 1 gpu for small data"

        self._build_vde_trainer(input, model)
Example #5
0
    def __init__(self, input, model, num_gpu=1):
        """
        Args:
            input (InputSource):
            model (GANModelDesc):
        """
        super(GANTrainer, self).__init__()
        assert isinstance(model, GANModelDesc), model

        if num_gpu > 1:
            input = StagingInput(input)

        # Setup input
        cbs = input.setup(model.get_input_signature())
        self.register_callback(cbs)

        if num_gpu <= 1:
            self._build_gan_trainer(input, model)
        else:
            self._build_multigpu_gan_trainer(input, model, num_gpu)
Example #6
0
    def __init__(self, input, model, num_gpu=1):
        """
        Paramaters
        ----------
        input : InputSource
        model : SynTexModelDesc
        """
        super(SynTexTrainer, self).__init__()
        assert isinstance(model, SynTexModelDesc), model

        if num_gpu > 1:
            input = StagingInput(input)

        # Setup input
        cbs = input.setup(model.get_input_signature())
        self.register_callback(cbs)

        if num_gpu <= 1:
            self._build_trainer(input, model)
        else:
            self._build_multigpu_trainer(input, model, num_gpu)