예제 #1
0
def make_network(env, h=None, w=None):
    with env.create_network() as net:
        if h is None:
            img = O.placeholder('img', shape=(1, None, None, 3))
        else:
            img = O.variable('img', np.zeros([1, h, w, 3]))
        net.add_output(img, name='img')

        _ = img
        _ = _ - get_env('neural_style.image_mean').reshape(1, 1, 1, 3)
        _ = O.pad_rb_multiple_of(_, 32)

        def stacked_conv(prefix, nr_convs, in_, channel, kernel=(3, 3), padding='SAME', nonlin=O.relu):
            for i in range(1, nr_convs + 1):
                in_ = O.conv2d('{}_{}'.format(prefix, i), in_, channel, kernel, padding=padding, nonlin=nonlin)
            return in_

        _ = stacked_conv('conv1', 2, _, 64)
        _ = O.pooling2d('pool1', _, (2, 2))
        _ = stacked_conv('conv2', 2, _, 128)
        _ = O.pooling2d('pool2', _, (2, 2))
        _ = stacked_conv('conv3', 3, _, 256)
        _ = O.pooling2d('pool3', _, (2, 2))
        _ = stacked_conv('conv4', 3, _, 512)
        _ = O.pooling2d('pool4', _, (2, 2))
        _ = stacked_conv('conv5', 3, _, 512)
        _ = O.pooling2d('pool5', _, (2, 2))

        for l in get_env('neural_style.content_layers'):
            net.add_output(net.find_var_by_name(l[0] + '/bias'), name=l[0])
        for l in get_env('neural_style.style_layers'):
            net.add_output(net.find_var_by_name(l[0] + '/bias'), name=l[0])
def make_dataflow_train(env):
    rng = random.gen_rng()

    def _outputs2action(outputs):
        epsilon = env.runtime['exp_epsilon']
        return outputs['q_argmax'] if rng.rand() > epsilon else rng.choice(
            get_player_nr_actions())

    collector = rl.train.SynchronizedExperienceCollector(
        env,
        make_player,
        _outputs2action,
        nr_workers=get_env('dqn.collector.nr_workers'),
        nr_predictors=get_env('dqn.collector.nr_workers'),
        predictor_output_names=get_env('dqn.collector.predictor_output_names'),
        mode=get_env('dqn.collector.mode'))

    return rl.train.QLearningDataFlow(collector,
                                      target=get_env('dqn.collector.target'),
                                      maxsize=get_env('dqn.expreplay.maxsize'),
                                      batch_size=get_env('trainer.batch_size'),
                                      epoch_size=get_env('trainer.epoch_size'),
                                      gamma=get_env('dqn.gamma'),
                                      nr_td_steps=get_env('dqn.nr_td_steps'),
                                      reward_cb=lambda r: np.clip(r, -1, 1))
예제 #3
0
def make_step(net):
    """iter only one step, providing end"""

    imgvar = net.outputs['img']
    target = net.outputs['end']
    netin = imgvar

    # random draw ox, oy
    jitter = get_env('deep_dream.jitter')
    ox, oy = np.random.randint(-jitter, jitter + 1, 2)

    img = netin.get_value()
    img = np.roll(np.roll(img, ox, 2), oy, 1)  # apply jitter shift

    # compute the gradient
    # one shuold note that we are actually use L2 loss for an activation map to
    # to compute the gradient for the input
    netin.set_value(img)
    loss = 0.5 * (target**2.).mean()
    grad = O.grad(loss, imgvar)
    grad = grad.eval()

    # apply gradient ascent, with normalized gradient
    img += get_env('deep_dream.learning_rate') / np.abs(grad).mean() * grad
    img = np.clip(img, 0, 255)

    img = np.roll(np.roll(img, -ox, 2), -oy, 1)  # unshift image
    netin.set_value(img)
def make_optimizer(env):
    wrapper = optimizer.OptimizerWrapper()
    lr = optimizer.make_optimizer_variable(
        'learning_rate',
        get_env('trainer.policy_learning_rate'),
        prefix='policy_')
    wrapper.set_base_optimizer(optimizer.base.AdamOptimizer(lr, epsilon=1e-3))
    wrapper.append_grad_modifier(
        optimizer.grad_modifier.LearningRateMultiplier([
            ('*/b', 2.0),
        ]))
    env.set_policy_optimizer(wrapper)

    use_linear_vr = get_env('ppo.use_linear_vr')
    if not use_linear_vr:
        wrapper = optimizer.OptimizerWrapper()
        lr = optimizer.make_optimizer_variable(
            'learning_rate',
            get_env('trainer.value_learning_rate'),
            prefix='value_')
        wrapper.set_base_optimizer(
            optimizer.base.AdamOptimizer(lr, epsilon=1e-3))
        wrapper.append_grad_modifier(
            optimizer.grad_modifier.LearningRateMultiplier([
                ('*/b', 2.0),
            ]))
        env.set_value_optimizer(wrapper)
def main_demo(env, func):
    df = iter(make_dataflow_demo(env))
    nr_samples = get_env('demo.nr_samples', 40 * 8)
    grid_desc = get_env('demo.grid_desc', ('20v', '16h'))

    while True:
        all_imgs_ab = []
        all_imgs_ba = []
        for i in range(nr_samples):
            feed_dict = next(df)
            results = func(**feed_dict)
            img_a, img_b = feed_dict['img_a'][0], feed_dict['img_b'][0]
            img_ab, img_ba = results['img_ab'][0] * 255, results['img_ba'][
                0] * 255
            img_aba, img_bab = results['img_aba'][0] * 255, results['img_bab'][
                0] * 255

            all_imgs_ab.append(np.hstack([img_a, img_ab]).astype('uint8'))
            all_imgs_ba.append(np.hstack([img_b, img_ba]).astype('uint8'))

        all_imgs_ab = image.image_grid(all_imgs_ab, grid_desc)
        all_imgs_ba = image.image_grid(all_imgs_ba, grid_desc)
        sep = np.ones((all_imgs_ab.shape[0], 64, 3), dtype='uint8') * 255
        all_imgs = np.hstack([all_imgs_ab, sep, all_imgs_ba])
        image.imwrite('discogan.png', all_imgs)
        image.imshow('AtoB; BtoA', all_imgs)
예제 #6
0
def make_player(is_train=True, dump_dir=None):
    p = rl.GymRLEnviron(get_env('a3c.env_name'), dump_dir=dump_dir)
    p = rl.HistoryFrameProxyRLEnviron(p, get_env('a3c.nr_history_frames'))
    p = rl.LimitLengthProxyRLEnviron(p, get_env('a3c.max_nr_steps'))
    if is_train:
        p = rl.AutoRestartProxyRLEnviron(p)
    return p
def main_train(trainer):
    from tartist.app.rl.utils.adv import GAEComputer
    from tartist.random.sampler import SimpleBatchSampler
    trainer.set_adv_computer(
        GAEComputer(get_env('ppo.gamma'), get_env('ppo.gae.lambda')))
    trainer.set_batch_sampler(
        SimpleBatchSampler(get_env('trainer.batch_size'),
                           get_env('trainer.data_repeat')))

    # Register plugins.
    from tartist.plugins.trainer_enhancer import summary
    summary.enable_summary_history(trainer,
                                   extra_summary_types={
                                       'inference/score': 'async_scalar',
                                   })
    summary.enable_echo_summary_scalar(
        trainer, summary_spec={'inference/score': ['avg', 'max']})

    from tartist.plugins.trainer_enhancer import progress
    progress.enable_epoch_progress(trainer)

    from tartist.plugins.trainer_enhancer import snapshot
    snapshot.enable_snapshot_saver(trainer, save_interval=1)

    def on_epoch_after(trainer):
        if trainer.epoch > 0 and trainer.epoch % 2 == 0:
            main_inference_play_multithread(trainer)

    # This one should run before monitor.
    trainer.register_event('epoch:after', on_epoch_after, priority=5)

    trainer.train()
예제 #8
0
    def parse_history(history):
        num = len(history)
        if is_over:
            r = 0
            env.players_history[identifier] = []
        elif num == get_env('a3c.nr_td_steps') + 1:
            history, last = history[:-1], history[-1]
            r = last.value
            env.players_history[identifier] = [last]
        else:
            return

        gamma = get_env('a3c.gamma')
        for i in history[::-1]:
            r = np.clip(i.reward, -1, 1) + gamma * r
            try:
                # MJY(20170910):: No wait!!! We need post_state.
                if env.rpredictor.waiting_for_data.is_set():
                    data_queue.put_nowait({
                        'state': i.state,
                        'action': i.action,
                        'future_reward': r
                    })
                else:
                    # Still set a timeout.
                    data_queue.put(
                        {
                            'state': i.state,
                            'action': i.action,
                            'future_reward': r
                        },
                        timeout=1)
            except queue.Full:
                pass
def make_dataflow_train(env):
    batch_size = get_env('trainer.batch_size')
    dfs = [_make_dataflow(batch_size, use_prefetch=True) for i in range(2)]

    df = gan.GANDataFlow(dfs[0], dfs[1], get_env('trainer.nr_g_per_iter', 1),
                         get_env('trainer.nr_d_per_iter', 1))

    return df
예제 #10
0
def make_dataflow_train(env):
    ensure_load()
    batch_size = get_env('trainer.batch_size')

    df = _mnist[0]
    df = flow.DOARandomSampleDataFlow(df)
    df = flow.BatchDataFlow(df, batch_size,
                            sample_dict={'img': np.empty(shape=(batch_size, 28, 28, 1), dtype='float32'), })
    df = gan.GANDataFlow(None, df, get_env('trainer.nr_g_per_iter', 1), get_env('trainer.nr_d_per_iter', 1))

    return df
def main_demo(env, func):
    func.compile(env.network.outputs['q_argmax'])

    dump_dir = get_env('dir.demo', os.path.join(get_env('dir.root'), 'demo'))
    logger.info('Demo dump dir: {}'.format(dump_dir))
    player = make_player(dump_dir=dump_dir)
    repeat_time = get_env('dqn.demo.nr_plays', 1)

    for i in range(repeat_time):
        player.play_one_episode(
            func=lambda state: func(state=state[np.newaxis])[0])
        logger.info('#{} play score={}'.format(i, player.stats['score'][-1]))
def main_demo(env, func):
    dump_dir = get_env('dir.demo', os.path.join(get_env('dir.root'), 'demo'))
    logger.info('Demo dump dir: {}'.format(dump_dir))
    player = make_player(dump_dir=dump_dir)
    repeat_time = get_env('cem.demo.nr_plays', 1)

    def get_action(inp, func=func):
        policy = func(state=inp[np.newaxis])['policy'][0]
        return _policy2action(policy)

    for i in range(repeat_time):
        player.play_one_episode(get_action)
        logger.info('#{} play score={}'.format(i, player.stats['score'][-1]))
예제 #13
0
def make_dataflow_inference(env):
    ensure_load()
    batch_size = get_env('inference.batch_size')
    epoch_size = get_env('inference.epoch_size')

    df = _mnist[1]  # use validation set actually
    df = flow.DictOfArrayDataFlow(df)
    df = flow.tools.cycle(df)
    df = flow.BatchDataFlow(df, batch_size,
                            sample_dict={'img': np.empty(shape=(batch_size, 28, 28, 1), dtype='float32'), })
    df = flow.EpochDataFlow(df, epoch_size)

    return df
예제 #14
0
def main_demo(env, func):
    dump_dir = get_env('dir.demo', os.path.join(get_env('dir.root'), 'demo'))
    logger.info('demo dump dir: {}'.format(dump_dir))
    player = make_player(is_train=False, dump_dir=dump_dir)
    repeat_time = get_env('a3c.demo.nr_plays', 1)

    def get_action(inp, func=func):
        action = func(**{'state': [[inp]]})['policy'][0].argmax()
        return action

    for i in range(repeat_time):
        player.play_one_episode(get_action)
        logger.info('#{} play score={}'.format(i, player.stats['score'][-1]))
def make_player(is_train=True, dump_dir=None):
    def resize_state(s):
        return image.resize(s, get_env('a3c.input_shape'), interpolation='NEAREST')

    p = rl.GymRLEnviron(get_env('a3c.env_name'), dump_dir=dump_dir)
    p = rl.MapStateProxyRLEnviron(p, resize_state)
    p = rl.HistoryFrameProxyRLEnviron(p, get_env('a3c.nr_history_frames'))

    p = rl.LimitLengthProxyRLEnviron(p, get_env('a3c.max_nr_steps'))
    if is_train:
        p = rl.AutoRestartProxyRLEnviron(p)
    else:
        p = rl.GymPreventStuckProxyRLEnviron(p, get_env('a3c.inference.max_antistuck_repeat'), 1)
    return p
def make_dataflow_train(env):
    def _outputs2action(outputs):
        return outputs['policy']

    collector = rl.train.SynchronizedExperienceCollector(
        env,
        make_player,
        _outputs2action,
        nr_workers=get_env('ppo.collector.nr_workers'),
        nr_predictors=get_env('ppo.collector.nr_workers'),
        predictor_output_names=get_env('ppo.collector.predictor_output_names'),
        mode='EPISODE-STEP')

    return rl.train.SynchronizedTrajectoryDataFlow(
        collector, target=get_env('ppo.collector.target'), incl_value=True)
예제 #17
0
파일: a3c.py 프로젝트: cosmic119/DiscoGAN
    def initialize_all_peers(self):
        nr_players = get_env('a3c.nr_players')

        self._player_master.initialize()
        self.initialize_all_variables()
        self._player_master.start(nr_players, daemon=True)
        self._inference_player_master.initialize()
예제 #18
0
파일: a3c.py 프로젝트: cosmic119/DiscoGAN
    def initialize_a3c(self):
        nr_predictors = get_env('a3c.nr_predictors')

        # making net funcs
        self._net_funcs = []
        all_devices = self.slave_devices
        if len(all_devices) == 0:
            all_devices = self.all_devices
        for i in range(nr_predictors):
            dev = all_devices[i % len(all_devices)]
            func = self._make_predictor_net_func(i, dev)
            self._net_funcs.append(func)

        self._player_master = A3CMaster(self, 'a3c-player', nr_predictors)
        self._inference_player_master = A3CMaster(self, 'a3c-inference-player', nr_predictors)
        self._data_queue = queue.Queue(get_env('trainer.batch_size') * get_env('a3c.data_queue_length_factor', 16))
예제 #19
0
def _predictor_func(pid, router, task_queue, func, is_inference=False):
    batch_size = get_env('a3c.predictor.batch_size')
    batched_state = np.empty((batch_size, ) + get_input_shape(), dtype='float32')

    while True:
        callbacks = []
        nr_total = 0
        for i in range(batch_size):
            if i == 0 or not is_inference:
                identifier, inp, callback = task_queue.get()
            else:
                try:
                    identifier, inp, callback = task_queue.get_nowait()
                except queue.Empty:
                    break

            batched_state[i] = inp[0]
            callbacks.append(callback)
            nr_total += 1

        out = func(state=batched_state[:nr_total])
        for i in range(nr_total):
            if is_inference:
                action = out['policy'][i]
            else:
                action = sample_action(out['policy_explore'][i])

            callbacks[i](action, out['value'][i])
예제 #20
0
def make_optimizer(env):
    wrapper = optimizer.OptimizerWrapper()
    wrapper.set_base_optimizer(optimizer.base.MomentumOptimizer(get_env('trainer.learning_rate'), 0.9))
    wrapper.append_grad_modifier(optimizer.grad_modifier.LearningRateMultiplier([
        ('*/b', 2.0),
    ]))
    env.set_optimizer(wrapper)
def _predictor_func(pid, router, task_queue, func, is_inference=False):
    batch_size = get_env('a3c.predictor.batch_size')
    batched_state = np.empty((batch_size, ) + get_input_shape(), dtype='float32')

    while True:
        callbacks = []
        nr_total = 0
        for i in range(batch_size):
            if i == 0 or not is_inference:
                identifier, inp, callback = task_queue.get()
            else:
                try:
                    identifier, inp, callback = task_queue.get_nowait()
                except queue.Empty:
                    break

            batched_state[i] = inp[0]
            callbacks.append(callback)
            nr_total += 1

        out = func(state=batched_state)
        for i in range(nr_total):
            policy = out['policy_explore'][i]
            if is_inference:
                # during inference, policy should be out['policy'][i]
                # but these two are equivalent under argmax operation
                # and we can only compile 'policy_explore' in output
                action = policy.argmax()
            else:
                action = random.choice(len(policy), p=policy)

            callbacks[i](action, out['value'][i])
예제 #22
0
def sample_action(policy):
    space = get_env('a3c.actor_space')
    action = []
    for i, s in enumerate(space):
        a = random.choice(len(s), p=policy[i])
        action.append(a)
    return action
예제 #23
0
def demo(feed_dict, result, extra_info):
    mode = get_env('demo.mode', 'vae')
    assert mode in ('vae', 'draw')

    if mode == 'vae':
        demo_vae(feed_dict, result, extra_info)
    elif mode == 'draw':
        demo_draw(feed_dict, result, extra_info)
예제 #24
0
def make_optimizer(env):
    opt = rl.train.TRPOOptimizer(env,
                                 max_kl=get_env('trpo.max_kl'),
                                 cg_damping=get_env('trpo.cg.damping'))
    env.set_policy_optimizer(opt)

    use_linear_vr = get_env('trpo.use_linear_vr')
    if not use_linear_vr:
        wrapper = optimizer.OptimizerWrapper()
        wrapper.set_base_optimizer(
            optimizer.base.AdamOptimizer(
                get_env('trainer.value_learning_rate'), epsilon=1e-3))
        wrapper.append_grad_modifier(
            optimizer.grad_modifier.LearningRateMultiplier([
                ('*/b', 2.0),
            ]))
        env.set_value_optimizer(wrapper)
예제 #25
0
 def json_summary_enable(trainer, js_path=json_path):
     if js_path is None:
         js_path = osp.join(get_env('dir.root'), 'summary.json')
     restored = 'restore_snapshot' in trainer.runtime
     if osp.exists(js_path) and not restored:
         logger.warn('Removing old summary json: {}.'.format(js_path))
         os.remove(js_path)
     trainer.runtime['json_summary_path'] = js_path
예제 #26
0
def main_demo(env, func):
    mode = get_env('demo.mode')
    assert mode is not None

    if mode == 'infogan':
        main_demo_infogan(env, func)
    else:
        assert False, 'Unknown mode {}'.format(mode)
def make_network(env):
    with env.create_network() as net:
        code_length = 20
        h, w, c = 28, 28, 1
        is_reconstruct = get_env('demo.is_reconstruct', False)

        dpc = env.create_dpcontroller()
        with dpc.activate():

            def inputs():
                img = O.placeholder('img', shape=(None, h, w, c))
                return [img]

            def forward(x):
                if is_reconstruct or env.phase is env.Phase.TRAIN:
                    with env.variable_scope('encoder'):
                        _ = x
                        _ = O.fc('fc1', _, 500, nonlin=O.tanh)
                        _ = O.fc('fc2', _, 500, nonlin=O.tanh)
                        mu = O.fc('fc3_mu', _, code_length)
                        log_var = O.fc('fc3_sigma', _, code_length)
                        var = O.exp(log_var)
                        std = O.sqrt(var)
                        epsilon = O.random_normal([x.shape[0], code_length])
                        z_given_x = mu + std * epsilon
                else:
                    z_given_x = O.random_normal([1, code_length])

                with env.variable_scope('decoder'):
                    _ = z_given_x
                    _ = O.fc('fc1', _, 500, nonlin=O.tanh)
                    _ = O.fc('fc2', _, 500, nonlin=O.tanh)
                    _ = O.fc('fc3', _, 784, nonlin=O.sigmoid)
                    _ = _.reshape(-1, h, w, c)
                    x_given_z = _

                if env.phase is env.Phase.TRAIN:
                    with env.variable_scope('loss'):
                        content_loss = O.raw_cross_entropy_prob(
                            'raw_content', x_given_z.flatten2(), x.flatten2())
                        content_loss = content_loss.sum(axis=1).mean(
                            name='content')
                        # distrib_loss = 0.5 * (O.sqr(mu) + O.sqr(std) - 2. * O.log(std + 1e-8) - 1.0).sum(axis=1)
                        distrib_loss = -0.5 * (1. + log_var - O.sqr(mu) -
                                               var).sum(axis=1)
                        distrib_loss = distrib_loss.mean(name='distrib')

                        loss = content_loss + distrib_loss
                    dpc.add_output(loss, name='loss', reduce_method='sum')

                dpc.add_output(x_given_z, name='output')

            dpc.set_input_maker(inputs).set_forward_func(forward)

        net.add_all_dpc_outputs(dpc, loss_name='loss')

        if env.phase is env.Phase.TRAIN:
            summary.inference.scalar('loss', net.loss)
예제 #28
0
def ensure_load(cifar_num_classes):
    global _cifar

    if len(_cifar) == 0:
        for xy in load_cifar(get_env('dir.data'), cifar_num_classes):
            _cifar.append(
                dict(img=xy[0].astype('float32').reshape(
                    -1, _cifar_img_dim, _cifar_img_dim, 3),
                     label=xy[1]))
예제 #29
0
def make_dataflow_train(env):
    num_classes = get_env('dataset.nr_classes')
    ensure_load(num_classes)
    batch_size = get_env('trainer.batch_size')

    df = _cifar[0]
    df = flow.DOARandomSampleDataFlow(df)
    df = flow.BatchDataFlow(df,
                            batch_size,
                            sample_dict={
                                'img':
                                np.empty(shape=(batch_size, _cifar_img_dim,
                                                _cifar_img_dim, 3),
                                         dtype='float32'),
                                'label':
                                np.empty(shape=(batch_size, ), dtype='int32')
                            })

    return df
예제 #30
0
def make_rpredictor_optimizer(env):
    wrapper = optimizer.OptimizerWrapper()
    wrapper.set_base_optimizer(
        optimizer.base.AdamOptimizer(get_env('rpredictor.learning_rate'),
                                     epsilon=1e-3))
    wrapper.append_grad_modifier(
        optimizer.grad_modifier.LearningRateMultiplier([
            ('*/b', 2.0),
        ]))
    env.set_optimizer(wrapper)