import epi_env import numpy as np EPISODES = 11500 env = epi_env.Epi_Env() running_reward = 0 f = open("Trivial-Karate-Time.cvs") for episode in range(EPISODES): env.reset() for node in range(len(env.graph)): env.sim.change_inf_rate(node, env.beta_low) env.sim.change_rec_rate(node, env.delta_hi) done = False trivial_action = np.ones(env.action_space) sum_reward = 0 while done != True: state, reward, done, _ = env.step(trivial_action) sum_reward += reward running_reward = (running_reward * 0.995) + (sum_reward * 0.005) print(running_reward)
parser.add_argument('--clip-epsilon', type=float, default=0.2, metavar='N', help='Clipping for PPO grad') parser.add_argument('--use-joint-pol-val', action='store_true', help='whether to use combined policy and value nets') args = parser.parse_args() import pickle graph = pickle.load(open("Standard_Watts100.pkl", "rb")) env = epi_env.Epi_Env(B=0.3, graph=graph, beta_hi=0.177, beta_low=0.071, delta_low=0.20, delta_hi=0.285, action_clusters=10) print(env.observation_space) num_inputs = env.observation_space num_actions = env.action_space env.seed(args.seed) torch.manual_seed(args.seed) if args.use_joint_pol_val: ac_net = ActorCritic(num_inputs, num_actions) opt_ac = optim.Adam(ac_net.parameters(), lr=0.001) else:
help='batch size (default: 5000)') parser.add_argument('--render', action='store_true', help='render the environment') parser.add_argument('--log-interval', type=int, default=20, metavar='N', help='interval between training status logs (default: 10)') parser.add_argument('--entropy-coeff', type=float, default=0.0, metavar='N', help='coefficient for entropy cost') parser.add_argument('--clip-epsilon', type=float, default=0.2, metavar='N', help='Clipping for PPO grad') parser.add_argument('--use-joint-pol-val', action='store_true', help='whether to use combined policy and value nets') args = parser.parse_args() import pickle graph = pickle.load(open("Standard_Watts100.pkl", "rb")) env = epi_env.Epi_Env(B=0.3, graph=graph, beta_hi=0.177, beta_low=0.071, delta_low=0.20, delta_hi=0.285) print(env.observation_space) num_inputs = env.observation_space num_actions = env.action_space env.seed(args.seed) torch.manual_seed(args.seed) if args.use_joint_pol_val: ac_net = ActorCritic(num_inputs, num_actions) opt_ac = optim.Adam(ac_net.parameters(), lr=0.001) else: policy_net = Policy(num_inputs, num_actions) value_net = Value(num_inputs) opt_policy = optim.Adam(policy_net.parameters(), lr=0.001)