def atari(env, default_seed=False, **kwargs): if default_seed: seed = 2 set_seed(seed, env) # reproducible in_dim = env.observation_space.shape act_dim = env.action_space.n params = dict( number_timesteps=int(1e7), # for raw-pixel test_episodes=10, save_path=None, save_interval=1e4, batch_size=32, double_q=True, buffer_size=10000, exploration_rate=0.1, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=True, prioritized_alpha=0.6, prioritized_beta0=0.4, dueling=True) params.update(kwargs) if params.get('network') is None: params['network'] = CNNQNet(in_dim, act_dim, params.pop('dueling')) if params.get('optimizer') is None: params['optimizer'] = tf.optimizers.Adam(1e-4, epsilon=1e-5, clipnorm=10) return dict(), params
def classic_control(env, default_seed=False, **kwargs): if default_seed: seed = 2 set_seed(seed, env) # reproducible in_dim = env.observation_space.shape[0] act_dim = env.action_space.n params = dict( number_timesteps=int(1e4), test_episodes=10, save_path=None, save_interval=1e3, batch_size=32, double_q=True, buffer_size=1000, exploration_rate=0.2, exploration_final_eps=0.01, train_freq=4, learning_starts=200, target_network_update_freq=50, gamma=0.99, prioritized_replay=False, prioritized_alpha=0.6, prioritized_beta0=0.4, dueling=True, ) params.update(kwargs) if params.get('network') is None: params['network'] = MLPQNet(in_dim, act_dim, params.pop('dueling')) if params.get('optimizer') is None: params['optimizer'] = tf.optimizers.Adam(5e-3, epsilon=1e-5) return dict(), params
def box2d(env, default_seed=True): if default_seed: seed = 2 set_seed(seed, env) # reproducible alg_params = dict() if alg_params.get('net_list') is None: num_hidden_layer = 1 # number of hidden layers for the networks hidden_dim = 64 # dimension of hidden layers for the networks with tf.name_scope('PG'): with tf.name_scope('Policy'): policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) net_list = [policy_net] alg_params['net_list'] = net_list if alg_params.get('optimizers_list') is None: learning_rate = 0.02 policy_optimizer = tf.optimizers.Adam(learning_rate) optimizers_list = [policy_optimizer] alg_params['optimizers_list'] = optimizers_list learn_params = dict( train_episodes=200, test_episodes=100, max_steps=200, save_interval=100, gamma=0.95 ) return alg_params, learn_params
def main(): warnings.simplefilter(action='ignore', category=RuntimeWarning) args = parse_args() save_args(args) set_seed(args.seed) if args.heuristic == 'bbr': heuristic = BBR(False) elif args.heuristic == 'bbr_old': heuristic = BBR_old(False) elif args.heuristic == 'cubic': heuristic = Cubic(False) elif args.heuristic == 'optimal': heuristic = None else: raise ValueError genet = Genet(args.config_file, args.save_dir, black_box_function, heuristic, args.model_path, args.nproc, seed=args.seed, validation=args.validation, n_init_pts=args.n_init_pts, n_iter=args.n_iter, model_select=args.model_select, train_trace_file=args.train_trace_file, real_trace_prob=args.real_trace_prob, bo_only=args.bo_only, param_select=args.param_select) genet.train(args.bo_rounds, args.bo_steps)
def main(): args = parse_args() assert args.pretrained_model_path is None or args.pretrained_model_path.endswith( ".ckpt") os.makedirs(args.save_dir, exist_ok=True) save_args(args) set_seed(args.seed + COMM_WORLD.Get_rank() * 100) nprocs = COMM_WORLD.Get_size() # Initialize model and agent policy aurora = Aurora(args.seed + COMM_WORLD.Get_rank() * 100, args.save_dir, int(7200 / nprocs), args.pretrained_model_path, tensorboard_log=args.tensorboard_log) # training_traces, validation_traces, training_traces = [] val_traces = [] if args.train_trace_file: with open(args.train_trace_file, 'r') as f: for line in f: line = line.strip() if args.dataset == 'pantheon': queue = 100 # dummy value # if "ethernet" in line: # queue = 500 # elif "cellular" in line: # queue = 50 # else: # queue = 100 training_traces.append(Trace.load_from_pantheon_file( line, queue=queue, loss=0)) elif args.dataset == 'synthetic': training_traces.append(Trace.load_from_file(line)) else: raise ValueError if args.val_trace_file: with open(args.val_trace_file, 'r') as f: for line in f: line = line.strip() if args.dataset == 'pantheon': queue = 100 # dummy value # if "ethernet" in line: # queue = 500 # elif "cellular" in line: # queue = 50 # else: # queue = 100 val_traces.append(Trace.load_from_pantheon_file( line, queue=queue, loss=0)) elif args.dataset == 'synthetic': val_traces.append(Trace.load_from_file(line)) else: raise ValueError print(args.randomization_range_file) aurora.train(args.randomization_range_file, args.total_timesteps, tot_trace_cnt=args.total_trace_count, tb_log_name=args.exp_name, validation_flag=args.validation, training_traces=training_traces, validation_traces=val_traces)
def main(): args = parse_args() set_seed(args.seed) assert args.count < 100000 for i in range(args.count): trace = generate_trace_from_config_file(args.config_file) trace_file = os.path.join(args.save_dir, 'trace_{:05d}.json'.format(i)) os.makedirs(args.save_dir, exist_ok=True) trace.dump(trace_file)
def main(): args = parse_args() assert (not args.model_path or args.model_path.endswith(".ckpt")) os.makedirs(args.save_dir, exist_ok=True) save_args(args, args.save_dir) set_seed(args.seed) # Initialize model and agent policy if args.jump_action: pensieve = Pensieve(args.model_path, 6, 6, 3) else: pensieve = Pensieve(args.model_path) # args.seed, # args.save_dir, # int(args.val_freq / nagents), # tensorboard_log=args.tensorboard_log, # training_traces, validation_traces, training_traces = [] val_traces = [] if args.curriculum == "udr": config_file = args.config_file if args.train_trace_dir: all_time, all_bw, all_file_names = load_traces( args.train_trace_dir) training_traces = [ AbrTrace(t, bw, link_rtt=80, buffer_thresh=60, name=name) for t, bw, name in zip(all_time, all_bw, all_file_names) ] if args.val_trace_dir: all_time, all_bw, all_file_names = load_traces(args.val_trace_dir) val_traces = [ AbrTrace(t, bw, link_rtt=80, buffer_thresh=60, name=name) for t, bw, name in zip(all_time, all_bw, all_file_names) ] train_scheduler = UDRTrainScheduler( config_file, training_traces, percent=args.real_trace_prob, ) elif args.curriculum == "cl1": # config_file = args.config_files[0] # train_scheduler = CL1TrainScheduler(args.config_files, aurora) raise NotImplementedError elif args.curriculum == "cl2": # config_file = args.config_file # train_scheduler = CL2TrainScheduler( # config_file, aurora, args.baseline # ) raise NotImplementedError else: raise NotImplementedError pensieve.train(train_scheduler, val_traces, args.save_dir, args.nagent, args.total_epoch, args.video_size_file_dir)
def box2d(env, default_seed=True): if default_seed: seed = 2 set_seed(seed, env) # reproducible action_shape = env.action_space.shape # only continuous state_shape = env.observation_space.shape alg_params = dict( state_dim = state_shape[0], action_dim = action_shape[0], replay_buffer_capacity = 5e5, action_range = 1. ) if alg_params.get('net_list') is None: num_hidden_layer = 4 #number of hidden layers for the networks hidden_dim=64 # dimension of hidden layers for the networks, default as the same for each layer here with tf.name_scope('SAC'): with tf.name_scope('Q_Net1'): soft_q_net1 = QNetwork(env.observation_space, env.action_space, hidden_dim_list=num_hidden_layer*[hidden_dim]) with tf.name_scope('Q_Net2'): soft_q_net2 = QNetwork(env.observation_space, env.action_space, hidden_dim_list=num_hidden_layer*[hidden_dim]) with tf.name_scope('Target_Q_Net1'): target_soft_q_net1 = QNetwork(env.observation_space, env.action_space, hidden_dim_list=num_hidden_layer*[hidden_dim]) with tf.name_scope('Target_Q_Net2'): target_soft_q_net2 = QNetwork(env.observation_space, env.action_space, hidden_dim_list=num_hidden_layer*[hidden_dim]) with tf.name_scope('Policy'): policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, hidden_dim_list=num_hidden_layer*[hidden_dim]) net_list = [soft_q_net1, soft_q_net2, target_soft_q_net1, target_soft_q_net2, policy_net] alg_params['net_list'] = net_list if alg_params.get('optimizers_list') is None: soft_q_lr, policy_lr, alpha_lr = 3e-4, 3e-4, 3e-4 # soft_q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network; alpha_lr: learning rate of the variable alpha soft_q_optimizer1 = tf.optimizers.Adam(soft_q_lr) soft_q_optimizer2 = tf.optimizers.Adam(soft_q_lr) policy_optimizer = tf.optimizers.Adam(policy_lr) alpha_optimizer = tf.optimizers.Adam(alpha_lr) optimizers_list = [soft_q_optimizer1, soft_q_optimizer2, policy_optimizer, alpha_optimizer] alg_params['optimizers_list'] = optimizers_list learn_params = dict( max_steps=150, batch_size=64, explore_steps=200, update_itr=3, policy_target_update_interval = 3, reward_scale = 1. , AUTO_ENTROPY = True, DETERMINISTIC = False, train_episodes=1000, test_episodes=10, save_interval=100, ) return alg_params, learn_params
def atari(env, default_seed=True): if default_seed: assert isinstance(env, list) seed = np.arange(len(env)).tolist() # a list of seeds for each env set_seed(seed, env) # reproducible # for multi-threading if isinstance( env, list ): # judge if multiple envs are passed in for parallel computing num_env = len(env) # number of envs passed in env = env[0] # take one of the env as they are all the same else: num_env = 1 alg_params = dict(entropy_beta=0.005) if alg_params.get('net_list') is None: num_hidden_layer = 4 #number of hidden layers for the networks hidden_dim = 64 # dimension of hidden layers for the networks net_list2 = [ ] # networks list of networks list, each item for single thread/process for _ in range(num_env + 1): # additional one for global with tf.name_scope('AC'): with tf.name_scope('Critic'): critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) with tf.name_scope('Actor'): actor = StochasticPolicyNetwork( env.observation_space, env.action_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) net_list = [actor, critic] net_list2.append(net_list) alg_params['net_list'] = net_list2 if alg_params.get('optimizers_list') is None: a_lr, c_lr = 1e-3, 1e-3 # a_lr: learning rate of the actor; c_lr: learning rate of the critic a_optimizer = tf.optimizers.RMSprop(a_lr, name='RMS_optimizer_actor') c_optimizer = tf.optimizers.RMSprop(c_lr, name='RMS_optimizer_critic') optimizers_list = [a_optimizer, c_optimizer] alg_params['optimizers_list'] = optimizers_list learn_params = dict(max_steps=1000, gamma=0.9, train_episodes=1000, test_episodes=10, save_interval=100, update_itr=10, n_workers=num_env) return alg_params, learn_params
def classic_control(env, default_seed=True): if default_seed: assert isinstance(env, list) seed = np.arange(len(env)).tolist() # a list of seeds for each env set_seed(seed,env) # reproducible # for multi-threading if isinstance(env, list): # judge if multiple envs are passed in for parallel computing num_env = len(env) # number of envs passed in env = env[0] # take one of the env as they are all the same else: num_env = 1 alg_params = dict( epsilon=0.2, # for method 'clip' kl_target=0.01, # for method 'penalty' lam=0.5 # for method 'penalty' ) if alg_params.get('net_list') is None: num_hidden_layer = 1 # number of hidden layers for the networks hidden_dim = 100 # dimension of hidden layers for the networks with tf.name_scope('DPPO'): with tf.name_scope('V_Net'): v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) with tf.name_scope('Policy'): policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, [hidden_dim] * num_hidden_layer) net_list = v_net, policy_net alg_params['net_list'] = net_list if alg_params.get('optimizers_list') is None: actor_lr = 1e-4 critic_lr = 2e-4 optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] alg_params['optimizers_list'] = optimizers_list learn_params = dict(train_episodes=1000, test_episodes=10, max_steps=200, save_interval=10, gamma=0.9, a_update_steps=10, c_update_steps=10, n_workers=num_env, batch_size=32) return alg_params, learn_params
def __init__(self, count: int, config_file: Optional[str], config=None, seed: int = 42): set_seed(seed) self.count = count self.traces = [] self.config_file = config_file self.config = config if self.config_file: self.traces = generate_traces(self.config_file, self.count, 30) elif self.config: self.traces = generate_traces_from_config(self.config, self.count, 30)
def box2d(env, default_seed=True): if default_seed: # reproducible seed = 7 set_seed(seed, env) alg_params = dict( replay_buffer_size=10000, tau=0.01, ) if alg_params.get('net_list') is None: num_hidden_layer = 1 # number of hidden layers for the networks hidden_dim = 30 # dimension of hidden layers for the networks with tf.name_scope('DDPG'): with tf.name_scope('Q_Net'): q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) with tf.name_scope('Target_Q_Net'): target_q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) with tf.name_scope('Policy'): policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) with tf.name_scope('Target_Policy'): target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim]) net_list = [q_net, target_q_net, policy_net, target_policy_net] alg_params['net_list'] = net_list if alg_params.get('optimizers_list') is None: actor_lr = 1e-3 critic_lr = 2e-3 optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)] alg_params['optimizers_list'] = optimizers_list learn_params = dict( train_episodes=200, test_episodes=100, max_steps=200, save_interval=10, explore_steps=500, batch_size=32, gamma=0.9, noise_scale=1., noise_scale_decay=0.995 ) return alg_params, learn_params
def classic_control(env, default_seed=True): if default_seed: # reproducible seed = 1 set_seed(seed, env) alg_params = dict( epsilon=0.2, # for method 'clip' kl_target=0.01, # for method 'penalty' lam=0.5) # for method 'penalty' if alg_params.get('net_list') is None: num_hidden_layer = 1 # number of hidden layers for the networks hidden_dim = 100 # dimension of hidden layers for the networks with tf.name_scope('PPO'): with tf.name_scope('V_Net'): v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) with tf.name_scope('Policy'): policy_net = StochasticPolicyNetwork( env.observation_space, env.action_space, [hidden_dim] * num_hidden_layer, output_activation=tf.nn.tanh, trainable=True) net_list = [v_net, policy_net] alg_params['net_list'] = net_list if alg_params.get('optimizers_list') is None: actor_lr = 1e-4 critic_lr = 2e-4 optimizers_list = [ tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr) ] alg_params['optimizers_list'] = optimizers_list learn_params = dict(train_episodes=1000, test_episodes=100, max_steps=200, save_interval=10, gamma=0.9, batch_size=32, a_update_steps=10, c_update_steps=10) return alg_params, learn_params
def main(): args = parse_args() set_seed(args.seed) if args.heuristic == 'mpc': heuristic = RobustMPC() else: raise NotImplementedError genet = Genet(args.config_file, args.save_dir, black_box_function, heuristic, args.model_path, args.video_size_file_dir, jump_action=args.jump_action) genet.train(args.bo_rounds, epoch_per_round=5000, val_dir=args.val_trace_dir)
def main(): args = parse_args() set_seed(args.seed) if args.heuristic == 'cubic': cc = Cubic() elif args.heuristic == 'bbr_old': cc = BBR_old() elif args.heuristic == 'bbr': cc = BBR() else: raise NotImplementedError # if 'large' in args.config_file: # config = read_json_file(args.config_file) # config[0]['bandwidth_lower_bound'] = (1, 1) # config[0]['bandwidth_upper_bound'] = (1, 100) # traces = generate_traces_from_config(config, 50, 30) # else: if not args.config_file: dataset = PantheonDataset('../../data', 'all') traces = dataset.get_traces(0, 50) save_dirs = [os.path.join( args.save_dir, link_conn_type, link_name, trace_name) for link_conn_type, (link_name, trace_name) in zip(dataset.link_conn_types, dataset.trace_names)] else: traces = generate_traces(args.config_file, 50, 30) save_dirs = [os.path.join(args.save_dir, "trace_{:02d}".format(i)) for i in range(len(traces))] cc_save_dirs = [os.path.join(save_dir, cc.cc_name) for save_dir in save_dirs] cc_res = cc.test_on_traces(traces, cc_save_dirs, plot_flag=False, n_proc=16) aurora_save_dirs = [os.path.join(save_dir, 'aurora') for save_dir in save_dirs] aurora_res = test_on_traces(args.model_path, traces, aurora_save_dirs, nproc=16, seed=42, record_pkt_log=False, plot_flag=False) print(cc.cc_name, np.mean([res[1] for res in cc_res])) print('aurora', np.mean([res[1] for res in aurora_res])) for i, (trace, save_dir) in enumerate(zip(traces, save_dirs)): trace.dump(os.path.join(save_dir, 'trace_{:02d}.json'.format(i)))
def main(): set_seed(42) dummy_trace = generate_trace(duration_range=(10, 10), bandwidth_lower_bound_range=(0.1, 0.1), bandwidth_upper_bound_range=(12, 12), delay_range=(25, 25), loss_rate_range=(0.0, 0.0), queue_size_range=(1, 1), T_s_range=(3, 3), delay_noise_range=(0, 0)) dummy_trace.dump(os.path.join(SAVE_DIR, "test_trace.json")) genet = Aurora(seed=20, log_dir=SAVE_DIR, pretrained_model_path=MODEL_PATH, timesteps_per_actorbatch=10, record_pkt_log=True) t_start = time.time() print(genet.test(dummy_trace, SAVE_DIR, True, saliency=True)) print("aurora", time.time() - t_start)
def classic_control(env, default_seed=True): if default_seed: seed = 2 set_seed(seed, env) # reproducible alg_params = dict( gamma=0.9, action_range= 1 # integer because some envs in classic_control are discrete ) if alg_params.get('net_list') is None: num_hidden_layer = 1 # number of hidden layers for the networks hidden_dim = 32 # dimension of hidden layers for the networks with tf.name_scope('AC'): with tf.name_scope('Critic'): critic = ValueNetwork(env.observation_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) with tf.name_scope('Actor'): actor = StochasticPolicyNetwork( env.observation_space, env.action_space, hidden_dim_list=num_hidden_layer * [hidden_dim], output_activation=tf.nn.tanh) net_list = [actor, critic] alg_params['net_list'] = net_list if alg_params.get('optimizers_list') is None: a_lr, c_lr = 1e-4, 1e-2 # a_lr: learning rate of the actor; c_lr: learning rate of the critic a_optimizer = tf.optimizers.Adam(a_lr) c_optimizer = tf.optimizers.Adam(c_lr) optimizers_list = [a_optimizer, c_optimizer] alg_params['optimizers_list'] = optimizers_list learn_params = dict( max_steps=200, train_episodes=1000, test_episodes=10, save_interval=100, ) return alg_params, learn_params
def classic_control(env, default_seed=True): if default_seed: # reproducible seed = 1 set_seed(seed, env) alg_params = dict(damping_coeff=0.1, cg_iters=10, delta=0.01) if alg_params.get('net_list') is None: num_hidden_layer = 2 # number of hidden layers for the networks hidden_dim = 64 # dimension of hidden layers for the networks with tf.name_scope('TRPO'): with tf.name_scope('V_Net'): v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer) with tf.name_scope('Policy'): policy_net = StochasticPolicyNetwork( env.observation_space, env.action_space, [hidden_dim] * num_hidden_layer, output_activation=tf.nn.tanh) net_list = [v_net, policy_net] alg_params['net_list'] = net_list if alg_params.get('optimizers_list') is None: critic_lr = 1e-3 optimizers_list = [tf.optimizers.Adam(critic_lr)] alg_params['optimizers_list'] = optimizers_list learn_params = dict(train_episodes=10000, test_episodes=100, max_steps=200, save_interval=10, gamma=0.9, batch_size=32, backtrack_iters=10, backtrack_coeff=0.8, train_critic_iters=80) return alg_params, learn_params
def main(): args = parse_args() set_seed(args.seed) if args.save_dir: os.makedirs(args.save_dir, exist_ok=True) if args.trace_file is not None and args.trace_file.endswith('.json'): test_traces = [Trace.load_from_file(args.trace_file)] elif args.trace_file is not None and args.trace_file.endswith('.log'): test_traces = [ Trace.load_from_pantheon_file(args.trace_file, args.delay, args.loss, args.queue) ] elif args.config_file is not None: test_traces = generate_traces(args.config_file, 1, args.duration, constant_bw=not args.time_variant_bw) else: test_traces = [ generate_trace((args.duration, args.duration), (args.bandwidth, args.bandwidth), (args.delay, args.delay), (args.loss, args.loss), (args.queue, args.queue), (60, 60), (60, 60), constant_bw=not args.time_variant_bw) ] # print(test_traces[0].bandwidths) aurora = Aurora(seed=args.seed, timesteps_per_actorbatch=10, log_dir=args.save_dir, pretrained_model_path=args.model_path, delta_scale=args.delta_scale) results, pkt_logs = aurora.test_on_traces(test_traces, [args.save_dir]) for pkt_log in pkt_logs: with open(os.path.join(args.save_dir, "aurora_packet_log.csv"), 'w', 1) as f: pkt_logger = csv.writer(f, lineterminator='\n') pkt_logger.writerows(pkt_log)
import gym # from common.env_wrappers import DummyVecEnv from common.utils import make_env, set_seed from algorithms.dppo_clip.dppo_clip import DPPO_CLIP from common.value_networks import * from common.policy_networks import * n_workers = 4 ''' load environment ''' env = [gym.make('Pendulum-v0').unwrapped for i in range(n_workers)] # reproducible seed = 1 set_seed(seed) ''' build networks for the algorithm ''' name = 'DPPO_CLIP' hidden_dim = 100 num_hidden_layer = 1 critic = ValueNetwork(env[0].observation_space, [hidden_dim] * num_hidden_layer, name=name + '_value') actor = StochasticPolicyNetwork(env[0].observation_space, env[0].action_space, [hidden_dim] * num_hidden_layer, trainable=True, name=name + '_policy') net_list = critic, actor ''' create model ''' actor_lr = 1e-4
from common.utils import set_seed import pandas as pd import matplotlib.pyplot as plt import numpy as np plt.style.use('seaborn-deep') set_seed(10) df_genet_bbr = pd.read_csv('training_curve_genet_bbr.csv') df_udr = pd.read_csv('training_curve_udr.csv') assert isinstance(df_genet_bbr, pd.DataFrame) assert isinstance(df_udr, pd.DataFrame) genet_steps = df_genet_bbr['genet_steps'] / 1e3 steps = df_udr['steps'] / 1e3 udr1_avg_rewards = df_udr['udr1_avg_rewards'] udr2_avg_rewards = df_udr['udr2_avg_rewards'] udr3_avg_rewards = df_udr['udr3_avg_rewards'] genet_avg_rewards = df_genet_bbr['genet_avg_rewards'] plt.plot(genet_steps, df_genet_bbr['genet_avg_rewards'], c='r') genet_reward_errs = np.concatenate([((df_udr['udr1_up_bnd'] - df_udr['udr1_low_bnd']) / 2).to_numpy(), ((df_udr['udr3_up_bnd'] - df_udr['udr3_low_bnd']) / 2).to_numpy()]) print(genet_reward_errs) genet_reward_errs = genet_reward_errs[:36] print(len(genet_reward_errs)) assert len(genet_avg_rewards) == len(genet_reward_errs) genet_low_bnd = genet_avg_rewards.to_numpy() - genet_reward_errs genet_up_bnd = genet_avg_rewards.to_numpy() + genet_reward_errs print(genet_up_bnd) print(genet_low_bnd) plt.fill_between(genet_steps, np.array(genet_low_bnd), np.array(genet_up_bnd), color='r', alpha=0.1) udr1_low_bnd = df_udr['udr1_low_bnd'] udr1_up_bnd = df_udr['udr1_up_bnd']
def main(): args = parse_args() set_seed(args.seed) if args.save_dir: os.makedirs(args.save_dir, exist_ok=True) df = pd.read_csv(args.log_file, sep='\t') assert isinstance(df, pd.DataFrame) latest_step = int(df['num_timesteps'].iloc[-1]) assert os.path.exists( os.path.join(os.path.dirname(args.log_file), "model_step_{}.ckpt.meta".format(latest_step))) latest_model_path = os.path.join(os.path.dirname(args.log_file), "model_step_{}.ckpt".format(latest_step)) aurora = Aurora(seed=args.seed, timesteps_per_actorbatch=10, log_dir="", pretrained_model_path=latest_model_path) bbr = BBR(True) cubic = Cubic(True) test_traces = [] trace_dirs = [] for noise in [0, 20]: for bw in [20, 50]: tr = generate_trace((30, 30), (bw, bw), (bw, bw), (25, 25), (0, 0), (0.1, 0.1), (60, 60), (noise, noise)) test_traces.append(tr) for _ in range(5): test_traces.append( generate_trace((30, 30), (0.1, 0.1), (20, 20), (50, 100), (0, 0), (0.5, 1), (10, 10), (0, 10))) test_traces.append( generate_trace((30, 30), (10, 10), (100, 100), (50, 100), (0, 0), (0.5, 1), (10, 10), (0, 10))) for i, tr in enumerate(test_traces): os.makedirs(os.path.join(args.save_dir, 'trace_{}'.format(i)), exist_ok=True) tr.dump(os.path.join(args.save_dir, 'trace_{}'.format(i), 'trace.json')) trace_dirs.append(os.path.join(args.save_dir, 'trace_{}'.format(i))) t_start = time.time() aurora_pkt_level_rewards = [] for tr, save_dir in zip(test_traces, trace_dirs): _, pkt_level_reward = aurora.test(tr, save_dir, True) aurora_pkt_level_rewards.append(pkt_level_reward) print('aurora', time.time() - t_start) t_start = time.time() bbr_results = bbr.test_on_traces(test_traces, trace_dirs, True) print('bbr', time.time() - t_start) t_start = time.time() cubic_results = cubic.test_on_traces(test_traces, trace_dirs, True) print('cubic', time.time() - t_start) bbr_pkt_level_rewards = [val for _, val in bbr_results] cubic_pkt_level_rewards = [val for _, val in cubic_results] mean_rewards = [ np.mean(aurora_pkt_level_rewards), np.mean(bbr_pkt_level_rewards), np.mean(cubic_pkt_level_rewards) ] reward_errs = [ np.std(aurora_pkt_level_rewards), np.std(bbr_pkt_level_rewards), np.std(cubic_pkt_level_rewards) ] plt.bar([1, 2, 3], mean_rewards, yerr=reward_errs, width=0.5) plt.xticks([1, 2, 3], ['aurora', 'bbr', 'cubic']) plt.ylabel('Test Reward') plt.tight_layout() plt.savefig(os.path.join(args.save_dir, 'test_cc.jpg'))
def main(): args = parse_args() set_seed(args.seed) # tokens = os.path.basename(os.path.dirname(os.path.dirname(args.save_dir))).split('_') # config0_dim0_idx = int(tokens[1]) # config0_dim1_idx = int(tokens[2]) # config1_dim0_idx = int(tokens[4]) # config1_dim1_idx = int(tokens[5]) dim0, dim1 = args.dims config = read_json_file(args.config_file)[0] assert dim0 in config and dim1 in config # dim0_vals = np.linspace(config[dim0][0], config[dim0][1], 10) # dim1_vals = np.linspace(config[dim1][0], config[dim1][1], 10) dim0_vals = get_dim_vals(dim0) dim1_vals = get_dim_vals(dim1) print(dim0_vals) print(dim1_vals) traces = [] save_dirs = [] with open('heatmap_trace_cnt_ratio.npy', 'rb') as f: cnt_ratio = np.load(f) for dim0_idx, dim0_val in enumerate(dim0_vals): for dim1_idx, dim1_val in enumerate(dim1_vals): dim_vals = copy.copy(DEFAULT_VALUES) dim_vals[dim0] = dim0_val dim_vals[dim1] = dim1_val # print(i, dim0_val, dim1_val, dim_vals) cnt = 10 # if cnt_ratio[dim0_idx, dim1_idx] > 1: # cnt *= int(cnt_ratio[dim0_idx, dim1_idx]) # print(cnt) for trace_idx in range(cnt): trace = generate_trace( duration_range=(dim_vals['duration'], dim_vals['duration']), bandwidth_lower_bound_range=( dim_vals['bandwidth_lower_bound'], dim_vals['bandwidth_lower_bound']), bandwidth_upper_bound_range=( dim_vals['bandwidth_upper_bound'], dim_vals['bandwidth_upper_bound']), delay_range=(dim_vals['delay'], dim_vals['delay']), loss_rate_range=(dim_vals['loss'], dim_vals['loss']), queue_size_range=(dim_vals['queue'], dim_vals['queue']), T_s_range=(dim_vals['T_s'], dim_vals['T_s']), delay_noise_range=(dim_vals['delay_noise'], dim_vals['delay_noise'])) traces.append(trace) save_dir = os.path.join( args.save_dir, 'pair_{}_{}'.format(dim0_idx, dim1_idx), 'trace_{}'.format(trace_idx)) save_dirs.append(save_dir) os.makedirs(save_dir, exist_ok=True) trace.dump( os.path.join(save_dir, 'trace_{}.json'.format(trace_idx))) if args.cc == 'genet_bbr' or args.cc == 'genet_cubic' or args.cc == 'genet_bbr_old': genet_seed = '' for s in args.models_path.split('/'): if 'seed' in s: genet_seed = s for bo in range(0, 30, 3): # for bo_dir in natural_sort(glob.glob(os.path.join(args.models_path, "bo_*/"))): bo_dir = os.path.join(args.models_path, "bo_{}".format(bo)) step = 64800 model_path = os.path.join(bo_dir, 'model_step_{}.ckpt'.format(step)) if not os.path.exists(model_path + '.meta'): print(model_path, 'does not exist') continue print(model_path) genet_save_dirs = [ os.path.join(save_dir, args.cc, genet_seed, "bo_{}".format(bo), "step_{}".format(step)) for save_dir in save_dirs ] t_start = time.time() test_on_traces(model_path, traces, genet_save_dirs, args.nproc, 42, False, False) print('bo {}: {:.3f}'.format(bo, time.time() - t_start)) elif args.cc == 'pretrained': pretrained_save_dirs = [ os.path.join(save_dir, args.cc) for save_dir in save_dirs ] t_start = time.time() test_on_traces(args.models_path, traces, pretrained_save_dirs, args.nproc, 42, False, False) print('pretrained: {:.3f}'.format(time.time() - t_start)) elif args.cc == 'overfit_config': overfit_config_save_dirs = [ os.path.join(save_dir, args.cc) for save_dir in save_dirs ] t_start = time.time() test_on_traces(args.models_path, traces, overfit_config_save_dirs, args.nproc, 42, False, False) print('overfit_config: {:.3f}'.format(time.time() - t_start)) else: if args.cc == 'bbr': cc = BBR(False) elif args.cc == 'cubic': cc = Cubic(False) elif args.cc == 'bbr_old': cc = BBR_old(False) else: raise NotImplementedError heuristic_save_dirs = [ os.path.join(save_dir, cc.cc_name) for save_dir in save_dirs ] t_start = time.time() cc.test_on_traces(traces, heuristic_save_dirs, False, args.nproc) print('{}: {:.3f}'.format(args.cc, time.time() - t_start))
plt.rcParams['font.size'] = 16 plt.rcParams['axes.labelsize'] = 18 plt.rcParams['legend.fontsize'] = 18 plt.rcParams["figure.figsize"] = (10, 6) # MODEL_PATH = "/tank/zxxia/PCC-RL/results_0503/udr_7_dims/udr_large/seed_50/model_step_396000.ckpt" MODEL_PATH = "../../results_0503/udr_7_dims/udr_mid/seed_50/model_step_360000.ckpt" # \"/tank/zxxia/PCC-RL/results_0503/udr_7_dims/udr_small/seed_50/model_step_396000.ckpt" SAVE_DIR = '../../figs' REAL_TRACE_DIR = "/tank/zxxia/PCC-RL/data/cellular/2018-12-02T13-03-India-cellular-to-AWS-India-1-3-runs-3-flows" # REAL_TRACE_DIR = "/tank/zxxia/PCC-RL/data/cellular/2018-12-10T20-36-AWS-Brazil-2-to-Colombia-cellular-3-runs" metric = 'bandwidth' set_seed(20) vals2test = { "bandwidth": [0, 1, 2, 3, 4, 5, 6], "delay": [5, 50, 100, 150, 200], "loss": [0, 0.01, 0.02, 0.03, 0.04, 0.05], "queue": [2, 10, 50, 100, 150, 200], "T_s": [0, 1, 2, 3, 4, 5, 6], "delay_noise": [0, 20, 40, 60, 80, 100], } # real_traces = [] # for trace_file in glob.glob(os.path.join(REAL_TRACE_DIR, "*datalink_run*.log")): # if 'bbr' not in trace_file and 'cubic' not in trace_file and \ # 'vegas' not in trace_file and 'pcc' not in trace_file and 'copa' not in trace_file: # continue
def main(): args = parse_args() assert (not args.pretrained_model_path or args.pretrained_model_path.endswith(".ckpt")) os.makedirs(args.save_dir, exist_ok=True) save_args(args, args.save_dir) set_seed(args.seed + COMM_WORLD.Get_rank() * 100) nprocs = COMM_WORLD.Get_size() # Initialize model and agent policy aurora = Aurora( args.seed + COMM_WORLD.Get_rank() * 100, args.save_dir, int(args.val_freq / nprocs), args.pretrained_model_path, tensorboard_log=args.tensorboard_log, ) # training_traces, validation_traces, training_traces = [] val_traces = [] if args.curriculum == "udr": config_file = args.config_file if args.train_trace_file: with open(args.train_trace_file, "r") as f: for line in f: line = line.strip() training_traces.append(Trace.load_from_file(line)) if args.validation and args.val_trace_file: with open(args.val_trace_file, "r") as f: for line in f: line = line.strip() if args.dataset == "pantheon": queue = 100 # dummy value val_traces.append( Trace.load_from_pantheon_file(line, queue=queue, loss=0)) elif args.dataset == "synthetic": val_traces.append(Trace.load_from_file(line)) else: raise ValueError train_scheduler = UDRTrainScheduler( config_file, training_traces, percent=args.real_trace_prob, ) elif args.curriculum == "cl1": config_file = args.config_files[0] train_scheduler = CL1TrainScheduler(args.config_files, aurora) elif args.curriculum == "cl2": config_file = args.config_file train_scheduler = CL2TrainScheduler(config_file, aurora, args.baseline) else: raise NotImplementedError aurora.train( config_file, args.total_timesteps, train_scheduler, tb_log_name=args.exp_name, validation_traces=val_traces, )
def rlbench(env, default_seed=True): if default_seed: seed = 2 set_seed(seed, env) # reproducible state_shape = env.observation_space.shape action_shape = env.action_space.shape alg_params = dict(state_dim=state_shape[0], action_dim=action_shape[0], replay_buffer_capacity=5e5, policy_target_update_interval=5, action_range=0.1) if alg_params.get('net_list') is None: num_hidden_layer = 4 #number of hidden layers for the networks hidden_dim = 64 # dimension of hidden layers for the networks with tf.name_scope('TD3'): with tf.name_scope('Q_Net1'): q_net1 = QNetwork(env.observation_space, env.action_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) with tf.name_scope('Q_Net2'): q_net2 = QNetwork(env.observation_space, env.action_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) with tf.name_scope('Target_Q_Net1'): target_q_net1 = QNetwork(env.observation_space, env.action_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) with tf.name_scope('Target_Q_Net2'): target_q_net2 = QNetwork(env.observation_space, env.action_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) with tf.name_scope('Policy'): policy_net = DeterministicPolicyNetwork( env.observation_space, env.action_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) with tf.name_scope('Target_Policy'): target_policy_net = DeterministicPolicyNetwork( env.observation_space, env.action_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) net_list = [ q_net1, q_net2, target_q_net1, target_q_net2, policy_net, target_policy_net ] alg_params['net_list'] = net_list if alg_params.get('optimizers_list') is None: q_lr, policy_lr = 3e-4, 3e-4 # q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network q_optimizer1 = tf.optimizers.Adam(q_lr) q_optimizer2 = tf.optimizers.Adam(q_lr) policy_optimizer = tf.optimizers.Adam(policy_lr) optimizers_list = [q_optimizer1, q_optimizer2, policy_optimizer] alg_params['optimizers_list'] = optimizers_list learn_params = dict( max_steps=150, batch_size=64, explore_steps=500, update_itr=3, reward_scale=1., explore_noise_scale=1.0, eval_noise_scale=0.5, train_episodes=1000, test_episodes=10, save_interval=100, ) return alg_params, learn_params
def main(argv): set_seed(FLAGS.seed) if FLAGS.generate: generate() else: train()
def init_train_env(args, tbert_type): # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) torch.distributed.init_process_group(backend="nccl") args.n_gpu = 1 args.device = device # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN, ) logger.warning( "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.fp16, ) # Set seed set_seed(args.seed, args.n_gpu) # Load pretrained model and tokenizer if args.local_rank not in [-1, 0]: # Make sure only the first process in distributed training will download model & vocab torch.distributed.barrier() if tbert_type == 'twin' or tbert_type == "T": model = TBertT(BertConfig(), args.code_bert) elif tbert_type == 'siamese' or tbert_type == "I": model = TBertI(BertConfig(), args.code_bert) elif tbert_type == 'siamese2' or tbert_type == "I2": model = TBertI2(BertConfig(), args.code_bert) elif tbert_type == 'single' or tbert_type == "S": model = TBertS(BertConfig(), args.code_bert) else: raise Exception("TBERT type not found") args.tbert_type = tbert_type if args.local_rank == 0: # Make sure only the first process in distributed training will download model & vocab torch.distributed.barrier() model.to(args.device) logger.info("Training/evaluation parameters %s", args) # Before we do anything with models, we want to ensure that we get fp16 execution of torch.einsum if args.fp16 is set. # Otherwise it'll default to "promote" mode, and we'll get fp32 operations. Note that running `--fp16_opt_level="O2"` will # remove the need for this code, but it is still valid. if args.fp16: try: import apex apex.amp.register_half_function(torch, "einsum") except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." ) return model
def generate_trace(duration_range: Tuple[float, float], bandwidth_lower_bound_range: Tuple[float, float], bandwidth_upper_bound_range: Tuple[float, float], delay_range: Tuple[float, float], loss_rate_range: Tuple[float, float], queue_size_range: Tuple[float, float], T_s_range: Optional[Tuple[float, float]] = None, delay_noise_range: Optional[Tuple[float, float]] = None, seed: Optional[int] = None, dt: float = 0.1): """Generate trace for a network flow. Args: duration_range: duraiton range in second. bandwidth_range: link bandwidth range in Mbps. delay_range: link one-way propagation delay in ms. loss_rate_range: Uplink loss rate range. queue_size_range: queue size range in packets. """ if seed: set_seed(seed) assert len(duration_range) == 2 and \ duration_range[0] <= duration_range[1] and duration_range[0] > 0 assert len(bandwidth_lower_bound_range) == 2 and \ bandwidth_lower_bound_range[0] <= bandwidth_lower_bound_range[1] and bandwidth_lower_bound_range[0] > 0 assert len(bandwidth_upper_bound_range) == 2 and \ bandwidth_upper_bound_range[0] <= bandwidth_upper_bound_range[1] and bandwidth_upper_bound_range[0] > 0 assert len(delay_range) == 2 and delay_range[0] <= delay_range[1] and \ delay_range[0] > 0 assert len(loss_rate_range) == 2 and \ loss_rate_range[0] <= loss_rate_range[1] and loss_rate_range[0] >= 0 loss_rate_exponent = float( np.random.uniform(np.log10(loss_rate_range[0] + 1e-5), np.log10(loss_rate_range[1] + 1e-5), 1)) if loss_rate_exponent < -4: loss_rate = 0 else: loss_rate = 10**loss_rate_exponent duration = float(np.random.uniform(duration_range[0], duration_range[1], 1)) # use bandwidth generator. assert T_s_range is not None and len( T_s_range) == 2 and T_s_range[0] <= T_s_range[1] assert delay_noise_range is not None and len( delay_noise_range ) == 2 and delay_noise_range[0] <= delay_noise_range[1] T_s = float(np.random.uniform(T_s_range[0], T_s_range[1], 1)) delay_noise = float( np.random.uniform(delay_noise_range[0], delay_noise_range[1], 1)) timestamps, bandwidths, delays = generate_bw_delay_series( T_s, duration, bandwidth_lower_bound_range[0], bandwidth_lower_bound_range[1], bandwidth_upper_bound_range[0], bandwidth_upper_bound_range[1], delay_range[0], delay_range[1], dt=dt) queue_size = np.random.uniform(queue_size_range[0], queue_size_range[1]) bdp = np.max(bandwidths) / BYTES_PER_PACKET / BITS_PER_BYTE * 1e6 * np.max( delays) * 2 / 1000 queue_size = max(2, int(bdp * queue_size)) ret_trace = Trace(timestamps, bandwidths, delays, loss_rate, queue_size, delay_noise, T_s) return ret_trace
import gym # from common.env_wrappers import DummyVecEnv from common.utils import make_env, set_seed from algorithms.ac.ac import AC from common.value_networks import * from common.policy_networks import * ''' load environment ''' # env = gym.make('CartPole-v0').unwrapped env = gym.make('Pendulum-v0').unwrapped obs_space = env.observation_space act_space = env.action_space # reproducible seed = 2 set_seed(seed, env) # env = DummyVecEnv([lambda: env]) # The algorithms require a vectorized/wrapped environment to run ''' build networks for the algorithm ''' num_hidden_layer = 1 # number of hidden layers for the networks hidden_dim = 32 # dimension of hidden layers for the networks with tf.name_scope('AC'): with tf.name_scope('Critic'): critic = ValueNetwork(obs_space, hidden_dim_list=num_hidden_layer * [hidden_dim]) with tf.name_scope('Actor'): actor = StochasticPolicyNetwork(obs_space, act_space, hidden_dim_list=num_hidden_layer * [hidden_dim], output_activation=tf.nn.tanh) net_list = [actor, critic] ''' choose optimizers '''