def calculate_values(grid): # initialize V(s) V = {} states = grid.all_states() for s in states: V[s] = 0 # repeat until convergence # V[s] = max[a]{ sum[s',r] { p(s',r|s,a)[r + gamma*V[s']] } } i = 0 while i < 4: # biggest_change is referred to by the mathematical symbol delta in equations biggest_change = 0 for s in grid.non_terminal_states(): old_v = V[s] _, new_v = best_action_value(grid, V, s) V[s] = new_v biggest_change = max(biggest_change, np.abs(old_v - new_v)) if biggest_change < SMALL_ENOUGH: break print("values:") print_values(V, grid) i += 1 return V
def main(): grid = standard_grid(obey_prob=1.0, step_cost=None) print_values(grid.rewards, grid) V, Policy, Deltas = monte_carlo(grid) print_values(V, grid) print_policy(Policy, grid) plt.plot(Deltas) plt.show()
def visit(inst, s, solved, values): # TODO: add your code here. # Make use of compute_greedy_action_and_value, sample_successor, and # check_solved. # Return updated labeling solved and updated value function values. """ Run the LRTDP algorithm until it converges. """ def lrtdp(inst, values): solved = { s: False for s in inst.states } iteration = 1 while not solved[inst.init]: wait_for_input("Press enter for another iteration of LRTDP...".format(iteration)) solved, values = visit(inst, inst.init, solved, values) print("Values after iteration {}: ".format(iteration)) print_values(inst, values) print("Solved after iteration {}: ".format(iteration)) print_solved(inst, solved) iteration += 1 return values if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( 'algorithm', choices=['rtdp', 'lrtdp'], help="Choose the algorithm." ) args = parser.parse_args() inst = instance.get_example_instance() print(inst) values = { s : heuristic(inst, s) for s in inst.states } print("") print("Initial state-values:") print_values(inst, values) if args.algorithm == 'rtdp': values = rtdp(inst, values) elif args.algorithm == 'lrtdp': values = lrtdp(inst, values) else: sys.exit("Unknown algorithm") print("") print("Final values:") print_values(inst, values) policy = get_greedy_policy(inst, values) print("Corresponding final policy:") print_policy(inst, policy)
def lrtdp(inst, values): solved = { s: False for s in inst.states } iteration = 1 while not solved[inst.init]: wait_for_input("Press enter for another iteration of LRTDP...".format(iteration)) solved, values = visit(inst, inst.init, solved, values) print("Values after iteration {}: ".format(iteration)) print_values(inst, values) print("Solved after iteration {}: ".format(iteration)) print_solved(inst, solved) iteration += 1 return values
def main(): grid = standard_grid(obey_prob=1.0, step_cost=None) # print rewards print("rewards:") print_values(grid.rewards, grid) V, policy, deltas = monte_carlo(grid) print("final values:") print_values(V, grid) print("final policy:") print_policy(policy, grid) plt.plot(deltas) plt.show()
def test_data_store_variable(variable, name, indent=''): print('%s - Test %s:' % (indent, name)) print('%s - Name: %s' % (indent, variable.name())) print('%s - Unit: %s' % (indent, variable.unit())) print('%s - URI: %s' % (indent, variable.uri())) values_count = variable.values_count() test_data_store_variable_index(variable, -1, indent) test_data_store_variable_index(variable, 0, indent) test_data_store_variable_index(variable, values_count - 1, indent) test_data_store_variable_index(variable, values_count, indent) for run in range(variable.runs_count() + 3): print('%s - values(%d): ' % (indent, run - 2), end='') utils.print_values(variable.values(run - 2))
def rtdp(inst, values): iteration = 1 while True: wait_for_input("Press enter for another iteration of RTDP...") old_values = dict(values) values = perform_trial(inst, values) print("Values after iteration {}: ".format(iteration)) print_values(inst, values) change = compute_max_difference(old_values, values) if change < EPSILON: print("Converged in iteration {}".format(iteration)) break iteration += 1 return values
def forward(self, x): if self.filter_size > 0: return self.layers(x) #image, conv, batchnorm, relu else: y = torch.add(x.unsqueeze(2), self.noise * self.level) # (10, 3, 1, 32, 32) + (1, 3, 128, 32, 32) --> (10, 3, 128, 32, 32) if self.debug: print_values(x, self.noise, y, self.unique_masks) y = y.view(-1, self.in_channels * self.nmasks, self.input_size, self.input_size) y = self.layers(y) if self.mix_maps: y = self.mix_layers(y) return y #image, perturb, (relu?), conv1x1, batchnorm, relu + mix_maps (conv1x1, batchnorm relu)
from neutronclient.v2_0 import client from credentials import get_credentials from utils import print_values credentials = get_credentials() neutron = client.Client(**credentials) ports = neutron.list_ports() print_values(ports, 'ports')
} return keystone_client.Client(**params) tests = [ {"user": "******", "password": "******", "tenant": "admin"}, ] for test in tests: print "Attempting authentication for tenant %s by user %s" % ( test['tenant'], test['user'] ), try: ks = get_keystone_client(**test) print "Authorized" except: print "Denied" # # # #Listing All the networks from OpenStack environment. # # print "Listing All Networks." credentials = get_credentials() neutron = client.Client(**credentials) list_network = neutron.list_networks() print_values(list_network, 'networks')
from neutronclient.v2_0 import client from credentials import get_credentials_tenant_one from utils import print_values credentials = get_credentials_tenant_one("user1", "user1", "user1-project") neutron = client.Client(**credentials) netw = neutron.list_networks() print_values(netw, 'networks')
tests = [ { "user": "******", "password": "******", "tenant": "admin" }, ] for test in tests: print "Attempting authentication for tenant %s by user %s" % ( test['tenant'], test['user']), try: ks = get_keystone_client(**test) print "Authorized" except: print "Denied" # # # #Listing All the networks from OpenStack environment. # # print "Listing All Networks." credentials = get_credentials() neutron = client.Client(**credentials) list_network = neutron.list_networks() print_values(list_network, 'networks')
from neutronclient.v2_0 import client from credentials import get_credentials from utils import print_values try: credentials = get_credentials() neutron = client.Client(**credentials) routers_list = neutron.list_routers(retrieve_all=True) print_values(routers_list, 'routers') finally: print 'Execution Completed'
def list_the_ports(): ports = neutron.list_ports() print print_values(ports, 'ports') return
Q[s][a] = np.mean(returns[sa]) biggest_change = max(biggest_change, np.abs(old_q - Q[s][a])) seen_state_action_pairs.add(sa) deltas.append(biggest_change) for s in policy.keys(): a, _ = max_dict(Q[s]) policy[s] = a V = {} for s in policy.keys(): V[s] = max_dict(Q[s])[1] return V, policy, deltas if __name__ == '__main__': grid = standard_grid(obey_prob=1.0, step_cost=None) print("rewards:") print_values(grid.rewards, grid) V, policy, deltas = monte_carlo(grid) print("final values:") print_values(V, grid) print("final policy:") print_policy(policy, grid) plt.plot(deltas) plt.show()
for i in range(N): visited_states = set() states_and_returns = play_episode(standard_grid(), pi) for s, g in states_and_returns: if s not in visited_states: visited_states.add(s) if s not in all_returns: all_returns[s] = [] all_returns[s].append(g) V[s] = np.mean(all_returns[s]) return V ALL_POSSIBLE_ACTIONS = ['U', 'D', 'L', 'R'] if __name__ == '__main__': pi = { (0, 0): 'R', (0, 1): 'R', (0, 2): 'R', (1, 2): 'U', (2, 2): 'U', (2, 1): 'L', (2, 0): 'U', (1, 0): 'U', (2, 3): 'L', } grid = standard_grid() print_policy(pi, grid) V = first_visit_monte_carlo_prediction(pi, 100) print_values(V, grid)
#!/usr/bin/env python from neutronclient.v2_0 import client from credentials import get_credentials from utils import print_values #List the routers created by the create-router code try: credentials = get_credentials() neutron = client.Client(**credentials) routers_list = neutron.list_routers(retrieve_all=True) print_values(routers_list, 'routers') finally: print("Execution completed") #List the subnets which queries the neutron.list method credentials = get_credentials() neutron = client.Client(**credentials) subnets = neutron.list_subnets() print(subnets)
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('algorithm', choices=['rtdp', 'lrtdp'], help="Choose the algorithm.") args = parser.parse_args() inst = instance.get_example_instance() print(inst) values = {s: heuristic(inst, s) for s in inst.states} print("") print("Initial state-values:") print_values(inst, values) if args.algorithm == 'rtdp': values = rtdp(inst, values) elif args.algorithm == 'lrtdp': values = lrtdp(inst, values) else: sys.exit("Unknown algorithm") print("") print("Final values:") print_values(inst, values) policy = get_greedy_policy(inst, values) print("Corresponding final policy:") print_policy(inst, policy)