Ejemplo n.º 1
0
# training
with closing(ngt.make_transformer()) as transformer:

    train_computation_g = make_bound_computation(transformer,
                                                 gen_train_outputs,
                                                 gen_train_inputs)

    train_computation_d = make_bound_computation(transformer,
                                                 dis_train_outputs,
                                                 dis_train_inputs)

    train_data = {'Discriminator Cost': [],
                  'Generator Cost': [],
                  'Log Gradient Norm': []}

    progress_bar = ProgressBar(unit="iterations", ncols=100, total=args.num_iterations)

    for iteration in progress_bar(range(int(args.num_iterations))):

        for iter_g in range(1):
            noise_in = noise_gen.next()
            output_g = train_computation_g({'noise': noise_in})

        for iter_d in range(args.num_critic):
            noise_in = noise_gen.next()
            data_in = train_set.next()
            output_d = train_computation_d({'noise': noise_in, 'data': data_in})

        # save loss and gradient data to plot
        if iteration % 10 == 0:
            train_data['Discriminator Cost'].append([iteration, output_d['batch_cost']])
Ejemplo n.º 2
0
def rl_loop_train(environment, agent, episodes, render=False):
    """
    train an agent inside an environment for a set number of episodes

    This function should be common to all reinforcement learning algorithms.
    New algorithms should be written by implementing the Agent interface. An
    example of agent implementing such an interface can be found in dqn.py.
    """
    total_steps = 0
    rewards = deque(maxlen=500)
    progress_bar = ProgressBar(unit="episodes", ncols=100, total=episodes)
    for episode in progress_bar(iter(range(episodes))):
        state = environment.reset()
        done = False
        step = 0
        total_reward = 0
        trigger_evaluation = False
        while not done:
            if render:
                environment.render()

            action = agent.act(state)
            next_state, reward, done, _ = environment.step(action)
            agent.observe_results(state, action, reward, next_state, done,
                                  total_steps)

            state = next_state
            step += 1
            total_steps += 1
            total_reward += reward

            if total_steps % 50000 == 0:
                trigger_evaluation = True

        agent.end_of_episode()
        rewards.append(total_reward)
        progress_bar.set_description("Average Reward {:0.4f}".format(
            np.mean(rewards)))
        # we would like to evaluate the model at a consistent time measured
        # in update steps, but we can't start an evaluation in the middle of an
        # episode.  if we have accumulated enough updates to warrant an evaluation
        # set trigger_evaluation to true, and run an evaluation at the end of
        # the episode.
        if trigger_evaluation:
            trigger_evaluation = False

            logger.info({
                'type': 'training episode',
                'episode': episode,
                'total_steps': total_steps,
                'steps': step,
                'total_reward': total_reward,
                'running_average_reward': np.mean(rewards),
            })

            for epsilon in (0, 0.01, 0.05, 0.1):
                total_reward = evaluate_single_episode(environment, agent,
                                                       render, epsilon)

                logger.info({
                    'type': 'evaluation episode',
                    'epsilon': epsilon,
                    'total_steps': total_steps,
                    'total_reward': total_reward,
                })
Ejemplo n.º 3
0
        eval_computation = ng.computation(eval_output, "all")

    # Now bind the computations we are interested in
    with closing(ngt.make_transformer()) as transformer:
        train_function = transformer.add_computation(train_computation)
        if inference is True:
            eval_function = transformer.add_computation(eval_computation)

        start = time.time()
        transformer.initialize()
        stop = time.time()
        logger.debug("Initializing transformer took {} seconds".format(stop -
                                                                       start))

        progress_bar = ProgressBar(unit="batches",
                                   ncols=100,
                                   total=args.num_iterations)
        interval_cost = 0.0

        timing = list()
        for step, sample in progress_bar(enumerate(train_set)):
            feed_dict = {inputs[k]: sample[k] for k in inputs.keys()}
            start = time.time()
            [cost_val, net_val] = train_function(feed_dict=feed_dict)
            stop = time.time()
            timing.append(stop - start)
            cost_val = cost_val[()]

            progress_bar.set_description("Training {:0.4f}".format(cost_val))
            interval_cost += cost_val
            if (step + 1) % args.iter_interval == 0 and step > 0: