Esempio n. 1
0
 def Decide(
     self,
     env: q_base.Environment,
     qfunc: q_base.QFunction,
     state: q_base.State,
     episode_idx: int,
     num_of_episodes: int,
 ) -> q_base.Action:
     factor = numpy.log(0.5) / self._decay_half_life
     e = (self._final_e +
          (self._init_e - self._final_e) * numpy.exp(factor * episode_idx))
     if numpy.random.uniform(0, 1) < e:
         choice = env.GetRandomChoice()
         logging.vlog(
             20,
             'making random decision (current e: %f) for state %s choice: %d',
             e, state, choice)
         return env.GetActionFromChoice(choice)
     else:
         return self._greedy_policy.Decide(
             env=env,
             qfunc=qfunc,
             state=state,
             episode_idx=episode_idx,
             num_of_episodes=num_of_episodes,
         )
Esempio n. 2
0
def DownloadTweets(
    database_file_name: str,
    query: str,
    from_date: datetime.datetime,
    to_date: datetime.datetime = None,
    number_of_tweets_per_day: int = 1000,
) -> storage.StringTable:
  """Download tweets to a database."""
  conn = storage.StringTable(database_file_name)
  if to_date is None:
    to_date = datetime.datetime.now()
  
  for day in date.DaysBetween(from_date, to_date):
    tweets_df = Search(
      query,
      from_date=day,
      to_date=day + relativedelta.relativedelta(days=1),
      number_of_results=number_of_tweets_per_day,
    )
    logging.vlog(
        2,
        'Processed day %s: downloaded %d tweets',
        day,
        len(tweets_df))
    values = []
    for index, row in tweets_df.iterrows():
      values.append((
          datetime.datetime.fromtimestamp(row['created_at'] / 1000),
          '',
          row['tweet'],
        ))
    conn.AddValuesFull(values)
  return conn
Esempio n. 3
0
 def GetValues(
     self,
     states: States,
 ) -> Values:
     """Gets the Q values for states, for all actions."""
     values = self._protected_GetValues(states)
     logging.vlog(26, 'GET: (%s) -> %s', states, values)
     return values
Esempio n. 4
0
 def GetValues(
     self,
     states: base.States,
 ) -> Values:
     """Use Pi values to make decision."""
     self._CheckActive()
     pi_values, v = self._model.predict(states)
     logging.vlog(20, 'GET pi for state %s: %s', states, pi_values)
     return pi_values
Esempio n. 5
0
    def _SetValues(
        self,
        states: States,
        values: Values,
    ) -> None:
        """Sets/trains Q values for states.

    This function is the one subclass uses to update the value storage. The
    runners use UpdateValuesFromTransitions to indirectly set values.

    The number of states and values must equal. Values for all actions are
    set at the same time.
    """
        logging.vlog(26, 'SET: (%s) <- %s', states, values)
        self._protected_SetValues(states, values)
Esempio n. 6
0
 def Decide(
     self,
     env: q_base.Environment,
     qfunc: q_base.QFunction,
     state: q_base.State,
     episode_idx: int,
     num_of_episodes: int,
 ) -> q_base.Action:
     values = qfunc.GetValues(state)
     choice = int(numpy.argmax(values))
     logging.vlog(
         20,
         'making greedy decision for state %s using values: %s; choice: %d',
         state, values, choice)
     return env.GetActionFromChoice(choice)
    def OnEpisodeFinishedCallback(self, env: Environment, brain: Brain,
                                  episode_idx: int, num_of_episodes: int,
                                  episode_reward: float, steps: int):
        if self._use_rewards:
            self._values.append(episode_reward)
        else:
            self._values.append(steps)

        new_value = numpy.mean(
            self._values[-self._average_over_num_of_episodes:])
        if new_value <= self._best_value:
            return

        logging.vlog(6, 'saving model for new best value: %s', new_value)
        self._best_value = new_value
        brain.Save(self._save_filepath)
Esempio n. 8
0
 def Decide(
     self,
     env: q_base.Environment,
     qfunc: q_base.QFunction,
     state: q_base.State,
     episode_idx: int,
     num_of_episodes: int,
 ) -> q_base.Action:
     if numpy.random.uniform(0, 1) < self._e:
         choice = env.GetRandomChoice()
         logging.vlog(20, 'making random decision for state %s choice: %d',
                      state, choice)
         return env.GetActionFromChoice(choice)
     else:
         return self._greedy_policy.Decide(
             env=env,
             qfunc=qfunc,
             state=state,
             episode_idx=episode_idx,
             num_of_episodes=num_of_episodes,
         )
Esempio n. 9
0
    def Run(
        self,
        env: Environment,
        qfunc: QFunction,
        policy: Policy,
        num_of_episodes: int,
    ):
        """Runs an agent for some episodes.

    For each episode, the environment is reset first, then run until it's
    done. Between episodes, Report function is called to give user feedback.
    """
        for episode_idx in range(num_of_episodes):
            logging.vlog(5, 'Running episode: %d', episode_idx)

            s = env.Reset()
            step_idx = 0
            episode_reward = 0.0
            while True:
                logging.vlog(15, 'Running episode: %d, step: %d', episode_idx,
                             step_idx)
                tran = env.TakeAction(
                    policy.Decide(
                        env=env,
                        qfunc=qfunc,
                        state=s,
                        episode_idx=episode_idx,
                        num_of_episodes=num_of_episodes,
                    ))
                logging.vlog(18, '%s', tran)
                self._protected_ProcessTransition(qfunc=qfunc,
                                                  transition=tran,
                                                  step_idx=step_idx)
                episode_reward += tran.r
                s = tran.sp
                if tran.sp is None:
                    break
                step_idx += 1

            # Handle callback functions.
            for reporter in self._callbacks:
                reporter.OnEpisodeFinishedCallback(
                    env=env,
                    qfunc=qfunc,
                    episode_idx=episode_idx,
                    num_of_episodes=num_of_episodes,
                    episode_reward=episode_reward,
                    steps=step_idx,
                )

        # All runs finished.
        for reporter in self._callbacks:
            reporter.OnCompletionCallback(
                env=env,
                qfunc=qfunc,
                num_of_episodes=num_of_episodes,
            )
Esempio n. 10
0
    def Run(
        self,
        envs: t.Iterable[base.Environment],
        brain: base.Brain,
        policy: base.Policy,
        num_of_episodes: int,
    ):
        """Runs an agent for some episodes.

    For each episode, the environment is reset first, then run until it's
    done. Between episodes, Report function is called to give user feedback.
    """
        envs_list = list(envs)
        for episode_idx in range(num_of_episodes):
            logging.vlog(10, 'Running episode: %d', episode_idx)

            queue = [(env, env.Reset()) for env in envs_list]
            step_idx = 0
            episode_reward = 0.0
            while queue:
                env, s = queue.pop(0)
                logging.vlog(20,
                             'Running environment %s: episode: %d, step: %d',
                             env, episode_idx, step_idx)
                tran = env.TakeAction(
                    policy.Decide(
                        env=env,
                        brain=brain,
                        state=s,
                        episode_idx=episode_idx,
                        num_of_episodes=num_of_episodes,
                    ))
                logging.vlog(26, '%s', tran)
                self._protected_ProcessTransition(brain=brain,
                                                  transition=tran,
                                                  step_idx=step_idx)
                episode_reward += tran.r
                if tran.sp is not None:
                    queue.append((env, tran.sp))
                step_idx += 1

            # Handle callback functions.
            for reporter in self._callbacks:
                reporter.OnEpisodeFinishedCallback(
                    env=None,
                    brain=brain,
                    episode_idx=episode_idx,
                    num_of_episodes=num_of_episodes,
                    episode_reward=episode_reward / len(envs_list),
                    steps=float(step_idx) / len(envs_list),
                )

        # All runs finished.
        for reporter in self._callbacks:
            reporter.OnCompletionCallback()