Beispiel #1
0
def test(test_network):
    network = network_parser.parse(test_network)
    net = Net(network.state_dim, network.action_dim)
    net.load_state_dict(torch.load(os.path.join('saved_model', 'subnet-10-machine-30-service-6-v0-ddqn-277.pt')))
    net.eval()

    print('state_dim:', network.state_dim, 'action_dim: ', network.action_dim)
    num_episodes = 50
    interval = 5
    score_history = list()
    for i in range(num_episodes):
        done = False
        score = 0
        state = network.reset()
        
        while not done:
            state = torch.tensor(state)
            state = state.unsqueeze(0)
            action_values = net(state, model='online')
            action = torch.argmax(action_values, axis=1).item()

            # print(action)
            next_state, reward, done, info = network.step(action)
            state = next_state
            score += reward

        score_history.append(score)
        avg_score = np.mean(score_history[-100:])
        if i % interval == 0:
            print('eipode ', i, 'score %.2f average score %.2f' % (score, avg_score))
Beispiel #2
0
def test(test_network):
    network = network_parser.parse(test_network)
    actor = Actor(network.state_dim, network.action_dim)
    actor.load_state_dict(
        torch.load(
            os.path.join('saved_model',
                         'subnet-10-machine-30-service-6-v0-actor-187.pt')))
    actor.eval()

    print('state_dim:', network.state_dim, 'action_dim: ', network.action_dim)
    num_episodes = 50
    interval = 5
    score_history = list()
    for i in range(num_episodes):
        done = False
        score = 0
        state = network.reset()

        while not done:
            state = torch.tensor(state)
            state = state.unsqueeze(0)
            prob = actor(state)

            prob = torch.distributions.Categorical(prob)
            action = prob.sample().item()

            # action = torch.argmax(prob).item()

            # print(action)
            next_state, reward, done, info = network.step(action)
            state = next_state
            score += reward

        score_history.append(score)
        avg_score = np.mean(score_history[-100:])
        if i % interval == 0:
            print('eipode ', i,
                  'score %.2f average score %.2f' % (score, avg_score))
Beispiel #3
0
    try:
        args = parser.parse_args()
    except:
        parser.print_help()
        exit(0)

    if args.config is None:
        raise Exception('Unrecognized config file.')
    else:
        config_path = args.config

    logging.basicConfig(filename='result.log', level=logging.INFO)

    logging.info("start parsing settings")

    params = parse(config_path)

    logging.info("finish parsing settings")

    dtype = torch.float32

    # Check whether a GPU is available
    if torch.cuda.is_available():
        device = 2#torch.device("cuda")
        cuda.init()
        c_device = aboutCudaDevices()
        print(c_device.info())
        print("selected device: ", device)
    else:
        device = torch.device("cpu")
        print("No GPU is found")
Beispiel #4
0
 def __init__(self, network_name, agents, plot_dir):
     super().__init__()
     self.network = network_parser.parse(network_name)
     self.plot_dir = plot_dir
     self.agents = agents
     self.agent_dict = {'a2c': A2C, 'ddqn': DDQN}
Beispiel #5
0
    print('state_dim:', network.state_dim, 'action_dim: ', network.action_dim)
    num_episodes = 50
    interval = 5
    score_history = list()
    for i in range(num_episodes):
        done = False
        score = 0
        state = network.reset()
        
        while not done:
            state = torch.tensor(state)
            state = state.unsqueeze(0)
            action_values = net(state, model='online')
            action = torch.argmax(action_values, axis=1).item()

            # print(action)
            next_state, reward, done, info = network.step(action)
            state = next_state
            score += reward

        score_history.append(score)
        avg_score = np.mean(score_history[-100:])
        if i % interval == 0:
            print('eipode ', i, 'score %.2f average score %.2f' % (score, avg_score))

if __name__ == '__main__':
    network = network_parser.parse('subnet-10-machine-30-service-6-v0.json')
    agent = DDQN(network, './plots', './saved_model')
    agent.run(2000, 0)
    agent.plot()
    # test('subnet-10-machine-30-service-6-v0.json')
Beispiel #6
0
 def parse_all(self, network_list):
     networks = list()
     for n in network_list:
         network = network_parser.parse(n)
         networks.append(network)
     return networks