Python LOGGER.log Examples

Programming Language: Python

Namespace/Package Name: config

Class/Type: LOGGER

Method/Function: log

Examples at hotexamples.com: 3

Python LOGGER.log - 3 examples found. These are the top rated real world Python examples of config.LOGGER.log extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

debug(30)

error(30)

info(30)

exception(10)

warn(10)

warning(10)

critical(4)

p_log(4)

log(3)

get(1)

notice(1)

setLevel(1)

Example #1

Show file

File: optimizers.py Project: robertah/drl-optimization

    def evolve(self, population, save=True):
        """
        Evolve agents

        :param population:
        :type population:
        :param save: save agents weights and scores
        :type save: bool
        :return:
        """

        timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
        print("Optimization - started", timestamp)

        agents = population.create_population()

        for i in range(population.max_generations):
            if not self.terminate(population, i):
                try:
                    population.agents_weights[i] = np.array(
                        [a.model.get_weights() for a in agents],
                        dtype=np.ndarray)

                    for j, agent in enumerate(agents):  # TODO parallelize
                        score = agent.run_agent()
                        population.scores[i][j] = score

                    print_scores(i + 1, population.scores[i])

                    if save and (i + 1) % 50 == 0:
                        save_results(population.agents_weights[:i],
                                     population.scores[:i], timestamp)

                    if i < population.max_generations - 1:
                        self.generate_next_generation(population=population,
                                                      generation=i)

                        for k, a in enumerate(agents):
                            agents[k].model.set_weights(
                                population.agents_weights[i + 1][k])

                except KeyboardInterrupt:
                    LOGGER.log(environment=ENVIRONMENT.name,
                               timestamp=timestamp,
                               algorithm=self.__class__.__name__,
                               parameters=vars(self),
                               generations=i,
                               score=np.max(population.scores[i - 1]))
                    save_results(population.agents_weights[:i - 1],
                                 population.scores[:i - 1], timestamp)
                    sys.exit()

            else:
                population.agents_weights = population.agents_weights[:i]
                population.scores = population.scores[:i]
                break

        if save:
            LOGGER.log(environment=ENVIRONMENT.name,
                       timestamp=timestamp,
                       algorithm=self.__class__.__name__,
                       parameters=vars(self),
                       generations=i,
                       score=np.max(population.scores[i]))
            save_results(population.agents_weights, population.scores,
                         timestamp)

        return population.agents_weights, population.scores

Example #2

Show file

File: ddpg.py Project: robertah/drl-optimization

    def run(self, train=True):
        """
        Run DDPG agent

        :param train: train indicator
        :type train: bool
        :return:
        """

        timestamp = datetime.now().strftime('%Y%m%d%H%M%S')

        step = 0

        self.load_weights()

        try:

            for i in range(self.n_episodes):

                # get initial state
                state = self.environment.env.reset()

                total_reward = 0

                for j in range(self.environment.max_time):
                    # loss = 0

                    self.environment.env.render()

                    action = self.actor.model.predict(
                        state.reshape(1, state.shape[0]))
                    action = self.noise.get_noisy_action(action, j)

                    new_state, reward, done, info = self.environment.env.step(
                        action[0])
                    # td_error = reward + self.gamma * 1 - 1

                    self.buffer.add(state, action[0], reward, new_state, done)

                    s, a, r, new_s, d = self.buffer.get_batch(self.batch_size)

                    target_q = self.critic.target_model.predict(
                        [new_s, self.actor.target_model.predict(new_s)])

                    y = r
                    for k in range(len(d)):
                        # if d[k]:
                        #     y[k] = r[k]
                        # else:
                        #     y[k] = r[k] + self.gamma * target_q[k]
                        y[k] = r[k] + self.gamma * target_q[k]

                    if train:
                        # loss += self.critic.model.train_on_batch([s, a], y)
                        self.critic.model.train_on_batch([s, a], y)
                        a_grads = self.actor.model.predict(s)
                        grads = self.critic.gradients(s, a_grads)
                        self.actor.train(s, grads)

                        self.actor.update_target()
                        self.critic.update_target()

                    total_reward += reward

                    if i + 1 % 200 == 0:
                        LOGGER.log(environment=self.environment.name,
                                   timestamp=timestamp,
                                   algorithm=self.__class__.__name__,
                                   parameters=self.get_params(),
                                   total_steps=i,
                                   score=total_reward)
                        self.save_weights()

                    if done:  # or np.array_equal(np.around(new_state, 3), np.around(state, 3)):
                        previous_reward = total_reward
                        break

                    state = new_state

                    # print("Episode", i, "Step", step, "Action", action, "Reward", reward, "Loss", loss)

                    step += 1

                print(
                    "Episode: {:<5d}  Total Reward: {:<+10.3f}  Total Steps: {:<10d} "
                    " Replay Buffer size: {}".format(
                        i, total_reward, step, self.buffer.n_experiences))

        except KeyboardInterrupt:
            print("Training interrupted.")

        print("Saving weights...")
        self.save_weights()

Example #3

Show file

    def train(self):
        """
        Train BipedaWaker-v2 agent

        :return:
        """

        # get start timestamp
        timestamp = datetime.now().strftime('%Y%m%d%H%M%S')

        # saving paths
        model_path = os.path.join(td3_cfg.models_path,
                                  '{}-{}'.format(env_cfg.name,
                                                 timestamp), 'model.ckpt')
        results_path = os.path.join(td3_cfg.models_path,
                                    '{}-{}'.format(env_cfg.name,
                                                   timestamp), 'results.npy')
        distances_path = os.path.join(td3_cfg.models_path,
                                      '{}-{}'.format(env_cfg.name, timestamp),
                                      'distances.npy')
        weights_path = os.path.join(td3_cfg.models_path,
                                    '{}-{}'.format(env_cfg.name, timestamp),
                                    'weights_init_fin.npy')
        video_dir = os.path.join(td3_cfg.models_path,
                                 '{}-{}'.format(env_cfg.name,
                                                timestamp), 'video')

        # read environment information from config
        env = env_cfg.env

        # record video every 100 episodes
        if td3_cfg.record_videos:
            env = wrappers.Monitor(
                env,
                video_dir,
                video_callable=lambda ep: ep % td3_cfg.record_videos == 0)

        # arrays with rewards, distances for later optimality analysis
        rewards = []
        distances_consecutive = np.zeros(2, dtype=np.ndarray)
        distances_init = np.zeros(2, dtype=np.ndarray)

        with tf.Session() as sess:

            # initialization

            self.agent = TD3(sess)
            saver = tf.train.Saver()
            init = tf.global_variables_initializer()
            sess.run(init)

            self.agent.initialize()
            global_step = 0

            weights_init = get_actor_weights(sess)
            weights_old = weights_init

            try:

                for i in range(td3_cfg.n_episodes):

                    s = env.reset()  # get initial state
                    ep_reward = 0
                    ep_steps = 0
                    noises = []
                    actions = []
                    done = False

                    while not done:

                        env.render()

                        if ep_steps < 10:
                            action = self.agent.get_random_action()
                        else:
                            action, action_org, noise = self.agent.get_noisy_action(
                                s)
                            noises.append(noise)
                            actions.append(action_org)
                        action = action.squeeze()

                        s2, r, done, info = env.step(action.tolist())
                        ep_reward += r
                        ep_steps += 1
                        global_step += 1

                        # store transition in replay buffer
                        self.agent.store_experience(s, action, r, done, s2)

                        # use symmetry of leg 1 and leg 2
                        mirrored_s = mirror_state(s)
                        mirrored_s2 = mirror_state(s2)
                        mirrored_a = mirror_action(action)
                        self.agent.store_experience(mirrored_s, mirrored_a, r,
                                                    done, mirrored_s2)

                        # train agent
                        temp = self.agent.train(global_step)
                        if temp:
                            weights = temp

                        s = s2

                        if done:
                            # end of the episode
                            count = i + 1

                            # get trained weights
                            weights = get_actor_weights(sess)
                            for iw, w in enumerate(weights):
                                # compute the distances with the previous weights and the initial weights
                                con, init = compute_distance_episodes(
                                    weights_init[iw], weights_old[iw],
                                    weights[iw])
                                distances_consecutive[iw] = np.append(
                                    distances_consecutive[iw], con)
                                distances_init[iw] = np.append(
                                    distances_init[iw], init)
                            weights_old = weights

                            # evaluation
                            if count % td3_cfg.test_every == 0:
                                eval_ep_reward, eval_ep_steps = self.evaluate(
                                    env)
                                print(
                                    "Episode: {:<10d} Evaluation Reward: {:<+10.3f}  "
                                    "Total Training Steps: {:10d}".format(
                                        count, eval_ep_reward, global_step))
                                rewards.append(eval_ep_reward)

                            # saving
                            if count % td3_cfg.save_every == 0:
                                saver.save(sess, model_path, global_step=count)
                                np.save(results_path, rewards)
                                np.save(
                                    distances_path,
                                    np.vstack((distances_consecutive,
                                               distances_init)))
                                np.save(weights_path,
                                        np.append(weights_init, weights))
            except KeyboardInterrupt:
                print("Training interrupted.")

            # Finalize training and save results
            print('Total steps:', global_step)
            print("Saving results...")
            LOGGER.log(environment=env_cfg.name,
                       timestamp=timestamp,
                       algorithm=self.agent.__class__.__name__,
                       parameters=vars(td3_cfg),
                       total_steps=global_step,
                       score=eval_ep_reward)
            saver.save(sess, model_path, global_step=count)
            np.save(results_path, rewards)
            np.save(distances_path,
                    np.vstack((distances_consecutive, distances_init)))
            np.save(weights_path, np.append(weights_init, weights))

        env.close()