def GetOption(mdp, k=1, sample=False, matrix=None, intToS=None, method='eigen', option_type='subgoal'): if matrix is not None: A = matrix elif sample: A, intToS = GetIncidenceMatrix(mdp) else: A, intToS = GetAdjacencyMatrix(mdp) if method == 'eigen': B, options, vectors = Eigenoptions(A, k) elif method == 'fiedler': B, options, _, vectors = FiedlerOptions(A, k) elif method == 'bet': # TODO: B is empty. B, options, vectors = BetweennessOptions(A, k) if not option_type == 'subgoal': return B, options, intToS, vectors # #print('knwon region=', known_region) egoal_list = [[]] * (len(options) * 2) for i, o in enumerate(options): if type(o[0]) is list: for ss in o[0]: egoal_list[i * 2].append(intToS[ss]) for ss in o[1]: egoal_list[i * 2 + 1].append(intToS[ss]) else: egoal_list[i * 2] = [intToS[o[0]]] egoal_list[i * 2 + 1] = [intToS[o[1]]] # print('eogallist=', egoal_list[i*2]) # for evec in evectors: # print('evec=', evec) # print('type(evec)=', type(evec)) evector_list = [dict()] * (len(options) * 2) for i, o in enumerate(options): for j in intToS.keys(): # print('hash(', j, ')=', hash(intToS[j])) # print('s[j]=', intToS[j]) # for i in intToS[j].data.flatten(): # if i > 0: # print(i) evector_list[i * 2][intToS[j]] = -vectors[i][j] evector_list[i * 2 + 1][intToS[j]] = vectors[i][j] # TODO: Why we were using hash here? # evector_list[i * 2][hash(intToS[j])] = -vectors[i][j] # evector_list[i * 2 + 1][hash(intToS[j])] = vectors[i][j] return B, egoal_list, intToS, evector_list
def test_utility(args, mdp): # The number of options to the performance # TODO: Compare the utility of point options vs. subgoal options? now_ts = str(datetime.now().timestamp()) origMatrix, intToS = GetAdjacencyMatrix(mdp) known_region = list(intToS.values()) # Known region is a set of MDPStates. n_ops_list = [2, 4, 8, 16, 32] agents = [] ql_agent = QLearningAgent(actions=mdp.get_actions()) agents.append(ql_agent) method = 'fiedler' for n_ops in n_ops_list: _, foptions, _, fvectors = GetOption(mdp, n_ops, matrix=origMatrix, intToS=intToS, option_type=args.optiontype, method=method) print('#options=', n_ops) print(foptions) if args.optiontype == 'subgoal': known_region = list( intToS.values()) # Known region is a set of MDPStates. eigenoption_agent = build_subgoal_option_agent( mdp, foptions, known_region, vectors=fvectors, name='-' + method + '-' + args.optiontype + '-' + str(n_ops)) else: eigenoption_agent = build_point_option_agent( mdp, foptions, agent=QLearningAgent, policy='vi', name='-' + method + '-' + args.optiontype + '-' + str(n_ops)) agents.append(eigenoption_agent) run_agents_on_mdp(agents, mdp, instances=args.ninstances, episodes=args.nepisodes, steps=args.nsteps, open_plot=True, track_disc_reward=True, cumulative_plot=True, dir_for_plot="results/")
def TestMatching(): domain = '5x5grid' fname = '../tasks/' + domain + '.txt' mdp = make_grid_world_from_file(fname) G, intToS = GetAdjacencyMatrix(mdp) c = GetCost(G) matrix, F, LB = MinimumWeightMatching(G, c) print('F\'=', F) print('LB=', LB) Gnx = nx.from_edgelist(F) dic = dict() for i, s in enumerate(intToS): dic[i] = (s.x, s.y) nx.draw_networkx_nodes(Gnx, pos=dic, node_size=300, node_color='g') nx.draw_networkx_edges(Gnx, pos=dic) plt.savefig('Matching.pdf')
if __name__ == "__main__": TestMatching() exit(0) # domain = '5x5grid' # goals = [(1, 5), (1, 1), (5, 5), (3, 3), (5, 1)] domain = '9x9grid' goals = [(1, 1), (1, 9), (9, 1), (9, 9), (5, 5)] # domain = 'fourroom' # goals = [(1, 1), (1, 11), (11, 1), (11, 11), (5, 5), (8, 7), (5, 7)] fname = '../../tasks/' + domain + '.txt' mdp = make_grid_world_from_file(fname) G, intToS = GetAdjacencyMatrix(mdp) c = np.ones_like(G, dtype=int) d = GetCost(G) # print('d=', d) # TODO K = StatesToArray(intToS, goals) # K = np.random.binomial(n=1, p=0.2, size=G.shape[0]) # np.ones(G.shape[0], dtype=int) print('K=', K) D = 15 tree, options = DiameterConstrainedSteinerTree(G, c, d, K, D, 0.1) print('tree', tree)
def test_offline_agent(args, mdp): ''' ''' ######################### # Parameters for the Offline option generations # Incidence matrix sampling smp_n_traj = args.nsepisodes smp_steps = args.nssteps # Option policy learning op_n_episodes = args.noepisodes op_n_steps = args.nosteps # Final Evaluation step n_episodes = args.nepisodes n_steps = args.nsteps n_instances = args.ninstances n_options = args.noptions option_type = args.optiontype now = datetime.now() now_ts = str(now.timestamp()) if args.incidence: origMatrix, intToS = GetIncidenceMatrix(mdp, n_traj=smp_n_traj, eps_len=smp_steps) else: origMatrix, intToS = GetAdjacencyMatrix(mdp) fiedlerMatrix, foptions, _, fvectors = GetOption(mdp, n_options, matrix=origMatrix, intToS=intToS, option_type=option_type, method='fiedler') eigenMatrix, eoptions, _, evectors = GetOption(mdp, n_options, matrix=origMatrix, intToS=intToS, option_type=option_type, method='eigen') _, boptions, _, bvectors = GetOption(mdp, n_options, matrix=origMatrix, intToS=intToS, option_type=option_type, method='bet') # fiedlerMatrix, foptions, _, fvectors = GetOption(mdp, n_options, option_type=option_type, method='fiedler') # eigenMatrix, eoptions, _, evectors = GetOption(mdp, n_options, option_type=option_type, method='eigen') # if method == 'Fiedler': # fiedlerMatrix, options, intToS = GetFiedlerOption(mdp, 32) # elif method == 'Eigen': # eigenMatrix, options, intToS = GetEigenoptions(mdp, 32) # elif method == 'Drawing': # drawingMatrix, options, intToS = GetGraphDrawingOptions(mdp, 2) # else: # print('No known method named', method) # print('options=', options) ###################################### # Use the options for the learning ### # Make goal-based option agent. # TODO: Generate a set of MDPs with each goal set to the subgoal discovered by the algorithm # print('eigengoals=', eoptions) # print('fiedlergoals=', foptions) # print('fvector=', fvectors) vec = fvectors[0] # for key, items in vec.items(): # print('x,y=', key.x, '-', key.y) # print('fval=', items) def ffunc(x, y): for key, item in vec.items(): if key.x == x and key.y == y: return item return 0.0 xr = mdp.width yr = mdp.height val = np.zeros((yr, xr)) for x in range(xr): for y in range(yr): val[y][x] = ffunc(x + 1, y + 1) gpos = mdp.goal_locs[0] gval = val[gpos[1] - 1][gpos[0] - 1] for x in range(xr): for y in range(yr): val[y][x] = abs(gval - val[y][x]) euclid = False if euclid: for x in range(xr): for y in range(yr): val[y][x] = ((x - gpos[0] + 1)**2 + (y - gpos[1] + 1)**2)**0.5 print('val=', val) maxval = np.amax(val) minval = np.amin(val) cmap = matplotlib.cm.get_cmap('Blues') norm = matplotlib.colors.Normalize(vmin=minval, vmax=maxval) # rgba = cmap(norm(val)) rgba_ = cmap(norm(val)) rgba = np.ones_like(rgba_) for w in mdp.walls: rgba[w[1] - 1, w[0] - 1, :3] = 0, 0, 0 rgba[gpos[1] - 1, gpos[0] - 1, :3] = 1, 1, 1 # fig, ax = plt.subplots() # im = ax.imshow(norm(val), visible=False, cmap=cmap) # im = ax.imshow(norm(val), visible=False, cmap=cmap) # fig.colorbar(im) plt.imshow(rgba, interpolation='nearest') # X, Y = np.meshgrid(x, y) # # zs = [] # for xv in x: # for yv in y: # zs.append(ffunc(xv, yv)) # zss = np.asarray(zs) # Z = zss.reshape(X.shape) # # print('X=', X) # print('Y=', Y) # print('Z=', Z) # fig = plt.figure() # ax = fig.add_subplot(111, projection='3d') # # from matplotlib import cm # ax.plot_surface(X, Y, Z, cmap=cm.coolwarm) if euclid: plt.savefig('euclid.pdf', bbox_inches='tight', pad_inches=0) else: plt.savefig('eigenfunc.pdf', bbox_inches='tight', pad_inches=0) ############################################################### ############################################################### exit(0) ################################# # Point options ################################# # eigenoption_agent = build_point_option_agent(mdp, eoptions, name='-eigen-point') # fiedleroption_agent = build_point_option_agent(mdp, foptions, name='-fiedler-point') ################################# # Subgoal options ################################# if option_type == 'subgoal': known_region = list( intToS.values()) # Known region is a set of MDPStates. # TODO: how is the state represented here in intToS? eigenoption_agent = build_subgoal_option_agent(mdp, eoptions, known_region, vectors=evectors, name='-eigen', n_trajs=op_n_episodes, n_steps=op_n_steps) fiedleroption_agent = build_subgoal_option_agent(mdp, foptions, known_region, vectors=fvectors, name='-fiedler', n_trajs=op_n_episodes, n_steps=op_n_steps) betoption_agent = build_subgoal_option_agent(mdp, boptions, known_region, vectors=fvectors, name='-bet', n_trajs=op_n_episodes, n_steps=op_n_steps) else: eigenoption_agent = build_point_option_agent(mdp, eoptions, agent=QLearningAgent, policy='vi', name='-eigen') fiedleroption_agent = build_point_option_agent(mdp, foptions, agent=QLearningAgent, policy='vi', name='-fiedler') betoption_agent = build_point_option_agent(mdp, boptions, agent=QLearningAgent, policy='vi', name='-bet') ql_agent = QLearningAgent(actions=mdp.get_actions(), default_q=1.0) rand_agent = RandomAgent(mdp.get_actions()) # run_agents_on_mdp([ql_agent, rand_agent], mdp, instances=n_instances, episodes=n_episodes, steps=n_steps, open_plot=True, cumulative_plot=True, track_disc_reward=True, dir_for_plot="results/" + now_ts) run_agents_on_mdp([ fiedleroption_agent, eigenoption_agent, betoption_agent, ql_agent, rand_agent ], mdp, instances=n_instances, episodes=n_episodes, steps=n_steps, open_plot=True, cumulative_plot=True, track_disc_reward=True, dir_for_plot="results/", reset_at_terminal=False)
def GetGraphDrawingOptions(mdp, k=1): # print('GDO typemdp', type(mdp)) A, intToS = GetAdjacencyMatrix(mdp) B, options = GraphDrawingOptions(A, k) return B, options, intToS
def test_offline_agent(args, mdp): ''' ''' ######################### # Parameters for the Offline option generations # Incidence matrix sampling smp_n_traj = args.nsepisodes smp_steps = args.nssteps # Option policy learning op_n_episodes = args.noepisodes op_n_steps = args.nosteps # Final Evaluation step n_episodes = args.nepisodes n_steps = args.nsteps n_instances = args.ninstances n_options = args.noptions option_type = args.optiontype now = datetime.now() now_ts = str(now.timestamp()) if args.incidence: origMatrix, intToS = GetIncidenceMatrix(mdp, n_traj=smp_n_traj, eps_len=smp_steps) else: origMatrix, intToS = GetAdjacencyMatrix(mdp) fiedlerMatrix, foptions, _, fvectors = GetOption(mdp, n_options, matrix=origMatrix, intToS=intToS, option_type=option_type, method='fiedler') eigenMatrix, eoptions, _, evectors = GetOption(mdp, n_options, matrix=origMatrix, intToS=intToS, option_type=option_type, method='eigen') _, boptions, _, bvectors = GetOption(mdp, n_options, matrix=origMatrix, intToS=intToS, option_type=option_type, method='bet') ###################################### # Use the options for the learning vec = fvectors[0] # for key, items in vec.items(): # print('x,y=', key.x, '-', key.y) # print('fval=', items) def ffunc(x, y): for key, item in vec.items(): if key.x == x and key.y == y: return item return 0.0 ################################# # Point options ################################# # eigenoption_agent = build_point_option_agent(mdp, eoptions, name='-eigen-point') # fiedleroption_agent = build_point_option_agent(mdp, foptions, name='-fiedler-point') ################################# # Subgoal options ################################# if option_type == 'subgoal': known_region = list( intToS.values()) # Known region is a set of MDPStates. # TODO: how is the state represented here in intToS? eigenoption_agent = build_subgoal_option_agent(mdp, eoptions, known_region, vectors=evectors, name='-eigen', n_trajs=op_n_episodes, n_steps=op_n_steps) fiedleroption_agent = build_subgoal_option_agent(mdp, foptions, known_region, vectors=fvectors, name='-fiedler', n_trajs=op_n_episodes, n_steps=op_n_steps) betoption_agent = build_subgoal_option_agent(mdp, boptions, known_region, vectors=fvectors, name='-bet', n_trajs=op_n_episodes, n_steps=op_n_steps) else: eigenoption_agent = build_point_option_agent(mdp, eoptions, agent=QLearningAgent, policy='vi', name='-eigen') fiedleroption_agent = build_point_option_agent(mdp, foptions, agent=QLearningAgent, policy='vi', name='-fiedler') betoption_agent = build_point_option_agent(mdp, boptions, agent=QLearningAgent, policy='vi', name='-bet') ql_agent = QLearningAgent(actions=mdp.get_actions(), default_q=1.0) rand_agent = RandomAgent(mdp.get_actions()) # run_agents_on_mdp([ql_agent, rand_agent], mdp, instances=n_instances, episodes=n_episodes, steps=n_steps, open_plot=True, cumulative_plot=True, track_disc_reward=True, dir_for_plot="results/" + now_ts) run_agents_on_mdp([ fiedleroption_agent, eigenoption_agent, betoption_agent, ql_agent, rand_agent ], mdp, instances=n_instances, episodes=n_episodes, steps=n_steps, open_plot=True, cumulative_plot=True, track_disc_reward=True, dir_for_plot="results/", reset_at_terminal=False)
def GetGraphDrawingOptions(mdp, k=1): A, intToS = GetAdjacencyMatrix(mdp) B, options = GraphDrawingOptions(A, k) return B, options, intToS