import epi_env
import numpy as np

EPISODES = 11500

env = epi_env.Epi_Env()
running_reward = 0
f = open("Trivial-Karate-Time.cvs")
for episode in range(EPISODES):
    env.reset()
    for node in range(len(env.graph)):
        env.sim.change_inf_rate(node, env.beta_low)
        env.sim.change_rec_rate(node, env.delta_hi)

    done = False
    trivial_action = np.ones(env.action_space)

    sum_reward = 0
    while done != True:
        state, reward, done, _ = env.step(trivial_action)
        sum_reward += reward
    running_reward = (running_reward * 0.995) + (sum_reward * 0.005)

    print(running_reward)
Beispiel #2
0
parser.add_argument('--clip-epsilon',
                    type=float,
                    default=0.2,
                    metavar='N',
                    help='Clipping for PPO grad')
parser.add_argument('--use-joint-pol-val',
                    action='store_true',
                    help='whether to use combined policy and value nets')
args = parser.parse_args()

import pickle
graph = pickle.load(open("Standard_Watts100.pkl", "rb"))
env = epi_env.Epi_Env(B=0.3,
                      graph=graph,
                      beta_hi=0.177,
                      beta_low=0.071,
                      delta_low=0.20,
                      delta_hi=0.285,
                      action_clusters=10)

print(env.observation_space)
num_inputs = env.observation_space
num_actions = env.action_space

env.seed(args.seed)
torch.manual_seed(args.seed)

if args.use_joint_pol_val:
    ac_net = ActorCritic(num_inputs, num_actions)
    opt_ac = optim.Adam(ac_net.parameters(), lr=0.001)
else:
                    help='batch size (default: 5000)')
parser.add_argument('--render', action='store_true',
                    help='render the environment')
parser.add_argument('--log-interval', type=int, default=20, metavar='N',
                    help='interval between training status logs (default: 10)')
parser.add_argument('--entropy-coeff', type=float, default=0.0, metavar='N',
                    help='coefficient for entropy cost')
parser.add_argument('--clip-epsilon', type=float, default=0.2, metavar='N',
                    help='Clipping for PPO grad')
parser.add_argument('--use-joint-pol-val', action='store_true',
                    help='whether to use combined policy and value nets')
args = parser.parse_args()

import pickle
graph = pickle.load(open("Standard_Watts100.pkl", "rb"))
env = epi_env.Epi_Env(B=0.3, graph=graph, beta_hi=0.177, beta_low=0.071, delta_low=0.20, delta_hi=0.285)

print(env.observation_space)
num_inputs = env.observation_space
num_actions = env.action_space

env.seed(args.seed)
torch.manual_seed(args.seed)

if args.use_joint_pol_val:
    ac_net = ActorCritic(num_inputs, num_actions)
    opt_ac = optim.Adam(ac_net.parameters(), lr=0.001)
else:
    policy_net = Policy(num_inputs, num_actions)
    value_net = Value(num_inputs)
    opt_policy = optim.Adam(policy_net.parameters(), lr=0.001)