def __init__(self, task, alpha=1.0, gamma=0.80, inv_temp=10.0, stop_criterion=0.001, mapping_prior=0.001, goal_prior=0.001): super(JointClusteringAgent, self).__init__(task) self.gamma = gamma self.inv_temp = inv_temp self.stop_criterion = stop_criterion self.alpha = alpha # initialize the hypothesis space with a single hypothesis that can be augmented # as new contexts are encountered self.goal_hypotheses = [ GoalHypothesis(self.task.n_goals, alpha, goal_prior) ] self.mapping_hypotheses = [ MappingHypothesis(self.task.n_primitive_actions, self.task.n_abstract_actions, alpha, mapping_prior) ] # initialize the belief spaces self.log_belief = np.zeros(1, dtype=float) self.map_likelihood = np.zeros(1, dtype=float) self.goal_likelihood = np.zeros(1, dtype=float)
def __init__(self, task, var_i=0.1, var_e=0.1, ucb_weight=1.0, gamma=0.80, inv_temp=10.0, stop_criterion=0.001, mapping_prior=0.001, goal_prior=0.001): super(FlatAgent, self).__init__(task) self.var_i = var_i # innovation gain self.var_e = var_e # self.ucb_weight = ucb_weight self.gamma = gamma self.inv_temp = inv_temp self.stop_criterion = stop_criterion # initialize the hypothesis space self.mapping_hypotheses = [ MappingHypothesis(self.task.n_primitive_actions, self.task.n_abstract_actions, 1.0, mapping_prior) ] # initialize the belief spaces # self.log_belief_goal = np.ones(1, dtype=float) self.log_belief_map = np.ones(1, dtype=float) # initialize the q-values for each of the goals as a dictionary self.mus = dict() self.sigmas = dict() self.n_goals = self.task.n_goals
def __init__(self, task, gamma=0.80, inv_temp=10.0, stop_criterion=0.001, mapping_prior=0.001, goal_prior=0.001, epsilon=0.02): super(FlatAgent, self).__init__(task) self.gamma = gamma self.inv_temp = inv_temp self.stop_criterion = stop_criterion # initialize the hypothesis space self.goal_hypotheses = [ GoalHypothesis(self.task.n_goals, 1.0, goal_prior) ] self.mapping_hypotheses = [ MappingHypothesis(self.task.n_primitive_actions, self.task.n_abstract_actions, 1.0, mapping_prior) ] # initialize the belief spaces self.log_belief_goal = np.ones(1, dtype=float) self.log_belief_map = np.ones(1, dtype=float)
def __init__(self, task, lr=0.1, gamma=0.80, inv_temp=10.0, stop_criterion=0.001, mapping_prior=0.001, goal_prior=0.001): super(NoCTX_QLearningAgent, self).__init__(task) self.lr = lr self.gamma = gamma self.inv_temp = inv_temp self.stop_criterion = stop_criterion # initialize the hypothesis space self.mapping_hypotheses = [ MappingHypothesis(self.task.n_primitive_actions, self.task.n_abstract_actions, 1.0, mapping_prior) ] # initialize the belief spaces # self.log_belief_goal = np.ones(1, dtype=float) self.log_belief_map = np.ones(1, dtype=float) # initialize the q-values for each of the goals as a dictionary self.q = np.ones(self.task.n_goals, dtype=float) / float( self.task.n_goals) self.n_goals = self.task.n_goals