def test(test_network): network = network_parser.parse(test_network) net = Net(network.state_dim, network.action_dim) net.load_state_dict(torch.load(os.path.join('saved_model', 'subnet-10-machine-30-service-6-v0-ddqn-277.pt'))) net.eval() print('state_dim:', network.state_dim, 'action_dim: ', network.action_dim) num_episodes = 50 interval = 5 score_history = list() for i in range(num_episodes): done = False score = 0 state = network.reset() while not done: state = torch.tensor(state) state = state.unsqueeze(0) action_values = net(state, model='online') action = torch.argmax(action_values, axis=1).item() # print(action) next_state, reward, done, info = network.step(action) state = next_state score += reward score_history.append(score) avg_score = np.mean(score_history[-100:]) if i % interval == 0: print('eipode ', i, 'score %.2f average score %.2f' % (score, avg_score))
def test(test_network): network = network_parser.parse(test_network) actor = Actor(network.state_dim, network.action_dim) actor.load_state_dict( torch.load( os.path.join('saved_model', 'subnet-10-machine-30-service-6-v0-actor-187.pt'))) actor.eval() print('state_dim:', network.state_dim, 'action_dim: ', network.action_dim) num_episodes = 50 interval = 5 score_history = list() for i in range(num_episodes): done = False score = 0 state = network.reset() while not done: state = torch.tensor(state) state = state.unsqueeze(0) prob = actor(state) prob = torch.distributions.Categorical(prob) action = prob.sample().item() # action = torch.argmax(prob).item() # print(action) next_state, reward, done, info = network.step(action) state = next_state score += reward score_history.append(score) avg_score = np.mean(score_history[-100:]) if i % interval == 0: print('eipode ', i, 'score %.2f average score %.2f' % (score, avg_score))
try: args = parser.parse_args() except: parser.print_help() exit(0) if args.config is None: raise Exception('Unrecognized config file.') else: config_path = args.config logging.basicConfig(filename='result.log', level=logging.INFO) logging.info("start parsing settings") params = parse(config_path) logging.info("finish parsing settings") dtype = torch.float32 # Check whether a GPU is available if torch.cuda.is_available(): device = 2#torch.device("cuda") cuda.init() c_device = aboutCudaDevices() print(c_device.info()) print("selected device: ", device) else: device = torch.device("cpu") print("No GPU is found")
def __init__(self, network_name, agents, plot_dir): super().__init__() self.network = network_parser.parse(network_name) self.plot_dir = plot_dir self.agents = agents self.agent_dict = {'a2c': A2C, 'ddqn': DDQN}
print('state_dim:', network.state_dim, 'action_dim: ', network.action_dim) num_episodes = 50 interval = 5 score_history = list() for i in range(num_episodes): done = False score = 0 state = network.reset() while not done: state = torch.tensor(state) state = state.unsqueeze(0) action_values = net(state, model='online') action = torch.argmax(action_values, axis=1).item() # print(action) next_state, reward, done, info = network.step(action) state = next_state score += reward score_history.append(score) avg_score = np.mean(score_history[-100:]) if i % interval == 0: print('eipode ', i, 'score %.2f average score %.2f' % (score, avg_score)) if __name__ == '__main__': network = network_parser.parse('subnet-10-machine-30-service-6-v0.json') agent = DDQN(network, './plots', './saved_model') agent.run(2000, 0) agent.plot() # test('subnet-10-machine-30-service-6-v0.json')
def parse_all(self, network_list): networks = list() for n in network_list: network = network_parser.parse(n) networks.append(network) return networks