def __init__(self, morl_problem, gamma=0.9): """ Constructor :param morl_problem: problem we train on :param gamma: discount factor """ self._problem = morl_problem self._gamma = gamma self._q_shape = (morl_problem.n_states, morl_problem.n_actions, morl_problem.reward_dimension) self.s_a_mapping = dict() for s in xrange(self._problem.n_states): for a in xrange(self._problem.n_actions): self.s_a_mapping[s, a] = len(self.s_a_mapping) self._Q_sets = list([[ 0, ] * self._problem.reward_dimension] for s in xrange(len(self.s_a_mapping))) self._V = list([[[ 0, ] * self._problem.reward_dimension] for s in xrange(self._problem.n_states)]) ref = [ -1.0, ] * self._problem.reward_dimension self.hv_calculator = HyperVolumeCalculator(ref) self._Q = np.zeros((morl_problem.n_states, morl_problem.n_actions))
def setUp(self): # create refpoints self.ref_point2d = [0.1, 0.1] self.ref_point3d = [0.1, 0.1, 0.1] # data set / random points between 0/0 - 1/1 self.set2d = np.zeros((70, 2)) self.set3d = np.zeros((100, 3)) for i in range(70): for u in range(2): self.set2d[i, u] = random.random() for i in range(100): for u in range(3): self.set3d[i, u] = random.random() # initialize calculator self.hv_2d_calc = HyperVolumeCalculator(self.ref_point2d) self.hv_3d_calc = HyperVolumeCalculator(self.ref_point3d)
from inspyred.ec.analysis import hypervolume import matplotlib as mpl if __name__ == '__main__': mpl.rc('text', usetex=True) mpl.rcParams['mathtext.fontset'] = 'stix' mpl.rcParams['font.family'] = 'STIXGeneral' count = 20 random.seed(18.9654) ref_point2d = [0.0, 0.3] set2d = np.zeros((count, 2)) for i in range(count): for u in range(2): rand = random.random() set2d[i, u] = rand if (rand > ref_point2d) or ( rand > 0.3) else random.random() hv_2d_calc = HyperVolumeCalculator(ref_point2d) pf = hv_2d_calc.extract_front(set2d) hv = hv_2d_calc.compute_hv(pf) size = 0.48 * 5.8091048611149611602 fig = plt.figure(figsize=[size, 0.75 * size]) fig.set_size_inches(size, 0.7 * size) ax = fig.add_subplot(111) ax.set_axisbelow(True) ########################################################################################### plt.axis( [-0.06, max(set2d[:, 0] * 1.06), 0 - 0.18,
# epsilon = propability that the agent choses a greedy action instead of a random action epsilon = 0.9 # learning rate alfah = 0.65 # how many interactions should the action train per weight? interactions = 1000 # how many episodes in one interactions should be taken? (if the problem gets in a terminal state, it will be # interrupted anyway (episodes = steps in the environment = actions) max_per_interaction = 100 # count of final weighted average reward that don't differ from the last ones to interrupt and signify converge converging_criterium = 20 ref = [ -0.001, ] * problem.reward_dimension # we want to evaluate both policy set with hypervolume indicator hv_calculator = HyperVolumeCalculator(ref) # create agent # agent = MultipleCriteriaH(problem, n_vectors, delta, epsilon, alfa, interactions, max_per_interaction, # converging_criterium) agent = MultipleCriteriaH(problem, n_vectors, deltah, epsilon, alfah, interactions, max_per_interaction, converging_criterium) # start the training agent.weight_training() hvs = [] rho = [i for i in agent.rhos.values()] pf = [] for i in xrange(len(rho)): pf.append(rho[i])
from morlbench.helpers import HyperVolumeCalculator import matplotlib as mpl if __name__ == '__main__': random.seed(3323) mpl.rc('text', usetex=True) mpl.rcParams['mathtext.fontset'] = 'stix' mpl.rcParams['font.family'] = 'STIXGeneral' u = 20 ref_point2d = [0.1, 0.1] set2d = np.zeros((u, 2)) for i in range(u): for u in range(2): rand = random.random() set2d[i, u] = rand if (rand > ref_point2d) else random.random() hv_2d_calc = HyperVolumeCalculator(ref_point2d) pf = hv_2d_calc.extract_front(set2d) size = 0.48 * 5.8091048611149611602 fig = plt.figure(figsize=[size, 0.75 * size]) fig.set_size_inches(size, 0.7 * size) ax = fig.add_subplot(1, 1, 1) plt.axis( [0 - 0.1, max(set2d[:, 0] * 1.21), 0 - 0.1, max(set2d[:, 1] * 1.1)]) plt.setp(ax.get_xticklabels(), fontsize=9) plt.setp(ax.get_yticklabels(), fontsize=9) pfx = [pf[i][0] for i in range(len(pf))] pfy = [pf[u][1] for u in range(len(pf))] plt.plot(set2d[:, 0], set2d[:, 1], 'ro', markersize=4) plt.plot(pfx, pfy, 'bo', markersize=4)
import matplotlib.pyplot as plt if __name__ == '__main__': # create Problem problem = MORLGridworld() # create an initialize randomly a weight vector scalarization_weights = [1.0, 0.0, 0.0] # tau is for chebyshev agent tau = 4.0 # ref point is used for Hypervolume calculation ref = [-1.0, ]*problem.reward_dimension # learning rate alfacheb = 0.11 # Propability of epsilon greedy selection eps = 0.1 hv_calc = HyperVolumeCalculator(ref) # create one agent using chebyshev scalarization method chebyagent = MORLScalarizingAgent(problem, epsilon=eps, alpha=alfacheb, scalarization_weights=scalarization_weights, ref_point=ref, tau=tau) linearagent = MORLScalarizingAgent(problem, epsilon=eps, alpha=alfacheb, scalarization_weights=scalarization_weights, ref_point=ref, tau=tau, function='linear') # both agents interact (times): interactions = 1000 c_payouts, c_moves, c_states = morl_interact_multiple_episodic(chebyagent, problem, interactions, max_episode_length=150) l_payouts, l_moves, l_states = morl_interact_multiple_episodic(linearagent, problem, interactions, max_episode_length=150)