Ejemplo n.º 1
0
def atari(env, default_seed=False, **kwargs):
    if default_seed:
        seed = 2
        set_seed(seed, env)  # reproducible

    in_dim = env.observation_space.shape
    act_dim = env.action_space.n
    params = dict(
        number_timesteps=int(1e7),  # for raw-pixel
        test_episodes=10,
        save_path=None,
        save_interval=1e4,
        batch_size=32,
        double_q=True,
        buffer_size=10000,
        exploration_rate=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
        prioritized_replay=True,
        prioritized_alpha=0.6,
        prioritized_beta0=0.4,
        dueling=True)
    params.update(kwargs)

    if params.get('network') is None:
        params['network'] = CNNQNet(in_dim, act_dim, params.pop('dueling'))
    if params.get('optimizer') is None:
        params['optimizer'] = tf.optimizers.Adam(1e-4,
                                                 epsilon=1e-5,
                                                 clipnorm=10)
    return dict(), params
Ejemplo n.º 2
0
def classic_control(env, default_seed=False, **kwargs):
    if default_seed:
        seed = 2
        set_seed(seed, env)  # reproducible

    in_dim = env.observation_space.shape[0]
    act_dim = env.action_space.n
    params = dict(
        number_timesteps=int(1e4),
        test_episodes=10,
        save_path=None,
        save_interval=1e3,
        batch_size=32,
        double_q=True,
        buffer_size=1000,
        exploration_rate=0.2,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=200,
        target_network_update_freq=50,
        gamma=0.99,
        prioritized_replay=False,
        prioritized_alpha=0.6,
        prioritized_beta0=0.4,
        dueling=True,
    )
    params.update(kwargs)
    if params.get('network') is None:
        params['network'] = MLPQNet(in_dim, act_dim, params.pop('dueling'))
    if params.get('optimizer') is None:
        params['optimizer'] = tf.optimizers.Adam(5e-3, epsilon=1e-5)
    return dict(), params
Ejemplo n.º 3
0
def box2d(env, default_seed=True):
    if default_seed:
        seed = 2
        set_seed(seed, env) # reproducible

    alg_params = dict()

    if alg_params.get('net_list') is None:
        num_hidden_layer = 1  # number of hidden layers for the networks
        hidden_dim = 64  # dimension of hidden layers for the networks
        with tf.name_scope('PG'):
            with tf.name_scope('Policy'):
                policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space,
                                                     num_hidden_layer * [hidden_dim])
        net_list = [policy_net]
        alg_params['net_list'] = net_list

    if alg_params.get('optimizers_list') is None:
        learning_rate = 0.02
        policy_optimizer = tf.optimizers.Adam(learning_rate)
        optimizers_list = [policy_optimizer]
        alg_params['optimizers_list'] = optimizers_list

    learn_params = dict(
        train_episodes=200,
        test_episodes=100,
        max_steps=200,
        save_interval=100,
        gamma=0.95
    )

    return alg_params, learn_params
Ejemplo n.º 4
0
def main():
    warnings.simplefilter(action='ignore', category=RuntimeWarning)
    args = parse_args()
    save_args(args)
    set_seed(args.seed)

    if args.heuristic == 'bbr':
        heuristic = BBR(False)
    elif args.heuristic == 'bbr_old':
        heuristic = BBR_old(False)
    elif args.heuristic == 'cubic':
        heuristic = Cubic(False)
    elif args.heuristic == 'optimal':
        heuristic = None
    else:
        raise ValueError
    genet = Genet(args.config_file, args.save_dir, black_box_function,
                  heuristic, args.model_path, args.nproc, seed=args.seed,
                  validation=args.validation,
                  n_init_pts=args.n_init_pts, n_iter=args.n_iter,
                  model_select=args.model_select,
                  train_trace_file=args.train_trace_file,
                  real_trace_prob=args.real_trace_prob, bo_only=args.bo_only,
                  param_select=args.param_select)
    genet.train(args.bo_rounds, args.bo_steps)
Ejemplo n.º 5
0
def main():
    args = parse_args()
    assert args.pretrained_model_path is None or args.pretrained_model_path.endswith(
        ".ckpt")
    os.makedirs(args.save_dir, exist_ok=True)
    save_args(args)
    set_seed(args.seed + COMM_WORLD.Get_rank() * 100)
    nprocs = COMM_WORLD.Get_size()

    # Initialize model and agent policy
    aurora = Aurora(args.seed + COMM_WORLD.Get_rank() * 100, args.save_dir,
                    int(7200 / nprocs), args.pretrained_model_path,
                    tensorboard_log=args.tensorboard_log)
    # training_traces, validation_traces,
    training_traces = []
    val_traces = []
    if args.train_trace_file:
        with open(args.train_trace_file, 'r') as f:
            for line in f:
                line = line.strip()
                if args.dataset == 'pantheon':
                    queue = 100  # dummy value
                    # if "ethernet" in line:
                    #     queue = 500
                    # elif "cellular" in line:
                    #     queue = 50
                    # else:
                    #     queue = 100
                    training_traces.append(Trace.load_from_pantheon_file(
                        line, queue=queue, loss=0))
                elif args.dataset == 'synthetic':
                    training_traces.append(Trace.load_from_file(line))
                else:
                    raise ValueError

    if args.val_trace_file:
        with open(args.val_trace_file, 'r') as f:
            for line in f:
                line = line.strip()
                if args.dataset == 'pantheon':
                    queue = 100  # dummy value
                    # if "ethernet" in line:
                    #     queue = 500
                    # elif "cellular" in line:
                    #     queue = 50
                    # else:
                    #     queue = 100
                    val_traces.append(Trace.load_from_pantheon_file(
                        line, queue=queue, loss=0))
                elif args.dataset == 'synthetic':
                    val_traces.append(Trace.load_from_file(line))
                else:
                    raise ValueError
    print(args.randomization_range_file)

    aurora.train(args.randomization_range_file,
                 args.total_timesteps, tot_trace_cnt=args.total_trace_count,
                 tb_log_name=args.exp_name, validation_flag=args.validation,
                 training_traces=training_traces,
                 validation_traces=val_traces)
Ejemplo n.º 6
0
def main():
    args = parse_args()
    set_seed(args.seed)
    assert args.count < 100000
    for i in range(args.count):
        trace = generate_trace_from_config_file(args.config_file)
        trace_file = os.path.join(args.save_dir, 'trace_{:05d}.json'.format(i))
        os.makedirs(args.save_dir, exist_ok=True)
        trace.dump(trace_file)
Ejemplo n.º 7
0
def main():
    args = parse_args()
    assert (not args.model_path or args.model_path.endswith(".ckpt"))
    os.makedirs(args.save_dir, exist_ok=True)
    save_args(args, args.save_dir)
    set_seed(args.seed)

    # Initialize model and agent policy
    if args.jump_action:
        pensieve = Pensieve(args.model_path, 6, 6, 3)
    else:
        pensieve = Pensieve(args.model_path)
        # args.seed,
        # args.save_dir,
        # int(args.val_freq / nagents),
        # tensorboard_log=args.tensorboard_log,
    # training_traces, validation_traces,
    training_traces = []
    val_traces = []
    if args.curriculum == "udr":
        config_file = args.config_file
        if args.train_trace_dir:
            all_time, all_bw, all_file_names = load_traces(
                args.train_trace_dir)
            training_traces = [
                AbrTrace(t, bw, link_rtt=80, buffer_thresh=60, name=name)
                for t, bw, name in zip(all_time, all_bw, all_file_names)
            ]

        if args.val_trace_dir:
            all_time, all_bw, all_file_names = load_traces(args.val_trace_dir)
            val_traces = [
                AbrTrace(t, bw, link_rtt=80, buffer_thresh=60, name=name)
                for t, bw, name in zip(all_time, all_bw, all_file_names)
            ]
        train_scheduler = UDRTrainScheduler(
            config_file,
            training_traces,
            percent=args.real_trace_prob,
        )
    elif args.curriculum == "cl1":
        # config_file = args.config_files[0]
        # train_scheduler = CL1TrainScheduler(args.config_files, aurora)
        raise NotImplementedError
    elif args.curriculum == "cl2":
        # config_file = args.config_file
        # train_scheduler = CL2TrainScheduler(
        #     config_file, aurora, args.baseline
        # )
        raise NotImplementedError
    else:
        raise NotImplementedError

    pensieve.train(train_scheduler, val_traces, args.save_dir, args.nagent,
                   args.total_epoch, args.video_size_file_dir)
Ejemplo n.º 8
0
def box2d(env, default_seed=True):
    if default_seed:
        seed = 2 
        set_seed(seed, env) # reproducible

    action_shape = env.action_space.shape  # only continuous
    state_shape = env.observation_space.shape


    alg_params = dict(
        state_dim = state_shape[0],
        action_dim = action_shape[0],
        replay_buffer_capacity = 5e5,
        action_range = 1.
    )
    if alg_params.get('net_list') is None:
        num_hidden_layer = 4 #number of hidden layers for the networks
        hidden_dim=64 # dimension of hidden layers for the networks, default as the same for each layer here
        with tf.name_scope('SAC'):
            with tf.name_scope('Q_Net1'):
                soft_q_net1 = QNetwork(env.observation_space, env.action_space, hidden_dim_list=num_hidden_layer*[hidden_dim])
            with tf.name_scope('Q_Net2'):
                soft_q_net2 = QNetwork(env.observation_space, env.action_space, hidden_dim_list=num_hidden_layer*[hidden_dim])
            with tf.name_scope('Target_Q_Net1'):
                target_soft_q_net1 = QNetwork(env.observation_space, env.action_space, hidden_dim_list=num_hidden_layer*[hidden_dim])
            with tf.name_scope('Target_Q_Net2'):
                target_soft_q_net2 = QNetwork(env.observation_space, env.action_space, hidden_dim_list=num_hidden_layer*[hidden_dim])
            with tf.name_scope('Policy'):
                policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space, hidden_dim_list=num_hidden_layer*[hidden_dim])
        net_list = [soft_q_net1, soft_q_net2, target_soft_q_net1, target_soft_q_net2, policy_net]
        alg_params['net_list'] = net_list
    if alg_params.get('optimizers_list') is None:
        soft_q_lr, policy_lr, alpha_lr = 3e-4, 3e-4, 3e-4  # soft_q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network; alpha_lr: learning rate of the variable alpha
        soft_q_optimizer1 = tf.optimizers.Adam(soft_q_lr)
        soft_q_optimizer2 = tf.optimizers.Adam(soft_q_lr)
        policy_optimizer = tf.optimizers.Adam(policy_lr)
        alpha_optimizer = tf.optimizers.Adam(alpha_lr)
        optimizers_list = [soft_q_optimizer1, soft_q_optimizer2, policy_optimizer, alpha_optimizer]
        alg_params['optimizers_list'] = optimizers_list

    learn_params = dict(
        max_steps=150, 
        batch_size=64, 
        explore_steps=200,
        update_itr=3, 
        policy_target_update_interval = 3,  
        reward_scale = 1. , 
        AUTO_ENTROPY = True, 
        DETERMINISTIC = False,
        train_episodes=1000, 
        test_episodes=10, 
        save_interval=100,
    )

    return alg_params, learn_params
Ejemplo n.º 9
0
def atari(env, default_seed=True):
    if default_seed:
        assert isinstance(env, list)
        seed = np.arange(len(env)).tolist()  # a list of seeds for each env
        set_seed(seed, env)  # reproducible

    # for multi-threading
    if isinstance(
            env, list
    ):  # judge if multiple envs are passed in for parallel computing
        num_env = len(env)  # number of envs passed in
        env = env[0]  # take one of the env as they are all the same
    else:
        num_env = 1

    alg_params = dict(entropy_beta=0.005)

    if alg_params.get('net_list') is None:
        num_hidden_layer = 4  #number of hidden layers for the networks
        hidden_dim = 64  # dimension of hidden layers for the networks
        net_list2 = [
        ]  # networks list of networks list, each item for single thread/process
        for _ in range(num_env + 1):  # additional one for global
            with tf.name_scope('AC'):
                with tf.name_scope('Critic'):
                    critic = ValueNetwork(env.observation_space,
                                          hidden_dim_list=num_hidden_layer *
                                          [hidden_dim])
                with tf.name_scope('Actor'):
                    actor = StochasticPolicyNetwork(
                        env.observation_space,
                        env.action_space,
                        hidden_dim_list=num_hidden_layer * [hidden_dim])
            net_list = [actor, critic]
            net_list2.append(net_list)
        alg_params['net_list'] = net_list2
    if alg_params.get('optimizers_list') is None:
        a_lr, c_lr = 1e-3, 1e-3  # a_lr: learning rate of the actor; c_lr: learning rate of the critic
        a_optimizer = tf.optimizers.RMSprop(a_lr, name='RMS_optimizer_actor')
        c_optimizer = tf.optimizers.RMSprop(c_lr, name='RMS_optimizer_critic')
        optimizers_list = [a_optimizer, c_optimizer]
        alg_params['optimizers_list'] = optimizers_list

    learn_params = dict(max_steps=1000,
                        gamma=0.9,
                        train_episodes=1000,
                        test_episodes=10,
                        save_interval=100,
                        update_itr=10,
                        n_workers=num_env)

    return alg_params, learn_params
Ejemplo n.º 10
0
def classic_control(env, default_seed=True):
    if default_seed:
        assert isinstance(env, list)
        seed = np.arange(len(env)).tolist()  # a list of seeds for each env
        set_seed(seed,env) # reproducible

    # for multi-threading
    if isinstance(env, list):  # judge if multiple envs are passed in for parallel computing
        num_env = len(env)  # number of envs passed in
        env = env[0]  # take one of the env as they are all the same
    else:
        num_env = 1

    alg_params = dict(
        epsilon=0.2,  # for method 'clip'
        kl_target=0.01,  # for method 'penalty'
        lam=0.5  # for method 'penalty'
    )

    if alg_params.get('net_list') is None:
        num_hidden_layer = 1  # number of hidden layers for the networks
        hidden_dim = 100  # dimension of hidden layers for the networks
        with tf.name_scope('DPPO'):
            with tf.name_scope('V_Net'):
                v_net = ValueNetwork(env.observation_space, [hidden_dim] * num_hidden_layer)
            with tf.name_scope('Policy'):
                policy_net = StochasticPolicyNetwork(env.observation_space, env.action_space,
                                                     [hidden_dim] * num_hidden_layer)

        net_list = v_net, policy_net
        alg_params['net_list'] = net_list

    if alg_params.get('optimizers_list') is None:
        actor_lr = 1e-4
        critic_lr = 2e-4
        optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)]
        alg_params['optimizers_list'] = optimizers_list

    learn_params = dict(train_episodes=1000,
                        test_episodes=10,
                        max_steps=200,
                        save_interval=10,
                        gamma=0.9,
                        a_update_steps=10,
                        c_update_steps=10,
                        n_workers=num_env,
                        batch_size=32)

    return alg_params, learn_params
Ejemplo n.º 11
0
 def __init__(self,
              count: int,
              config_file: Optional[str],
              config=None,
              seed: int = 42):
     set_seed(seed)
     self.count = count
     self.traces = []
     self.config_file = config_file
     self.config = config
     if self.config_file:
         self.traces = generate_traces(self.config_file, self.count, 30)
     elif self.config:
         self.traces = generate_traces_from_config(self.config, self.count,
                                                   30)
Ejemplo n.º 12
0
def box2d(env, default_seed=True):
    if default_seed:
        # reproducible
        seed = 7
        set_seed(seed, env)

    alg_params = dict(
        replay_buffer_size=10000,
        tau=0.01,
    )

    if alg_params.get('net_list') is None:
        num_hidden_layer = 1  # number of hidden layers for the networks
        hidden_dim = 30  # dimension of hidden layers for the networks
        with tf.name_scope('DDPG'):
            with tf.name_scope('Q_Net'):
                q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim])
            with tf.name_scope('Target_Q_Net'):
                target_q_net = QNetwork(env.observation_space, env.action_space, num_hidden_layer * [hidden_dim])
            with tf.name_scope('Policy'):
                policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space,
                                                        num_hidden_layer * [hidden_dim])
            with tf.name_scope('Target_Policy'):
                target_policy_net = DeterministicPolicyNetwork(env.observation_space, env.action_space,
                                                               num_hidden_layer * [hidden_dim])

        net_list = [q_net, target_q_net, policy_net, target_policy_net]
        alg_params['net_list'] = net_list

    if alg_params.get('optimizers_list') is None:
        actor_lr = 1e-3
        critic_lr = 2e-3
        optimizers_list = [tf.optimizers.Adam(critic_lr), tf.optimizers.Adam(actor_lr)]
        alg_params['optimizers_list'] = optimizers_list

    learn_params = dict(
        train_episodes=200,
        test_episodes=100,
        max_steps=200,
        save_interval=10,
        explore_steps=500,
        batch_size=32,
        gamma=0.9,
        noise_scale=1.,
        noise_scale_decay=0.995
    )

    return alg_params, learn_params
Ejemplo n.º 13
0
def classic_control(env, default_seed=True):
    if default_seed:
        # reproducible
        seed = 1
        set_seed(seed, env)

    alg_params = dict(
        epsilon=0.2,  # for method 'clip'
        kl_target=0.01,  # for method 'penalty'
        lam=0.5)  # for method 'penalty'

    if alg_params.get('net_list') is None:
        num_hidden_layer = 1  # number of hidden layers for the networks
        hidden_dim = 100  # dimension of hidden layers for the networks
        with tf.name_scope('PPO'):
            with tf.name_scope('V_Net'):
                v_net = ValueNetwork(env.observation_space,
                                     [hidden_dim] * num_hidden_layer)
            with tf.name_scope('Policy'):
                policy_net = StochasticPolicyNetwork(
                    env.observation_space,
                    env.action_space, [hidden_dim] * num_hidden_layer,
                    output_activation=tf.nn.tanh,
                    trainable=True)
        net_list = [v_net, policy_net]
        alg_params['net_list'] = net_list

    if alg_params.get('optimizers_list') is None:
        actor_lr = 1e-4
        critic_lr = 2e-4
        optimizers_list = [
            tf.optimizers.Adam(critic_lr),
            tf.optimizers.Adam(actor_lr)
        ]
        alg_params['optimizers_list'] = optimizers_list

    learn_params = dict(train_episodes=1000,
                        test_episodes=100,
                        max_steps=200,
                        save_interval=10,
                        gamma=0.9,
                        batch_size=32,
                        a_update_steps=10,
                        c_update_steps=10)

    return alg_params, learn_params
Ejemplo n.º 14
0
def main():
    args = parse_args()
    set_seed(args.seed)
    if args.heuristic == 'mpc':
        heuristic = RobustMPC()
    else:
        raise NotImplementedError

    genet = Genet(args.config_file,
                  args.save_dir,
                  black_box_function,
                  heuristic,
                  args.model_path,
                  args.video_size_file_dir,
                  jump_action=args.jump_action)

    genet.train(args.bo_rounds,
                epoch_per_round=5000,
                val_dir=args.val_trace_dir)
Ejemplo n.º 15
0
def main():
    args = parse_args()
    set_seed(args.seed)
    if args.heuristic == 'cubic':
        cc = Cubic()
    elif args.heuristic == 'bbr_old':
        cc = BBR_old()
    elif args.heuristic == 'bbr':
        cc = BBR()
    else:
        raise NotImplementedError
    # if 'large' in args.config_file:
    #     config = read_json_file(args.config_file)
    #     config[0]['bandwidth_lower_bound'] = (1, 1)
    #     config[0]['bandwidth_upper_bound'] = (1, 100)
    #     traces = generate_traces_from_config(config, 50, 30)
    # else:
    if not args.config_file:
        dataset = PantheonDataset('../../data', 'all')
        traces = dataset.get_traces(0, 50)
        save_dirs = [os.path.join(
            args.save_dir, link_conn_type, link_name, trace_name)
            for link_conn_type, (link_name, trace_name) in
            zip(dataset.link_conn_types, dataset.trace_names)]
    else:
        traces = generate_traces(args.config_file, 50, 30)
        save_dirs = [os.path.join(args.save_dir, "trace_{:02d}".format(i))
                     for i in range(len(traces))]
    cc_save_dirs = [os.path.join(save_dir, cc.cc_name)
                    for save_dir in save_dirs]
    cc_res = cc.test_on_traces(traces, cc_save_dirs, plot_flag=False, n_proc=16)

    aurora_save_dirs = [os.path.join(save_dir, 'aurora')
                        for save_dir in save_dirs]
    aurora_res = test_on_traces(args.model_path, traces, aurora_save_dirs,
                                nproc=16, seed=42, record_pkt_log=False,
                                plot_flag=False)
    print(cc.cc_name, np.mean([res[1] for res in cc_res]))
    print('aurora', np.mean([res[1] for res in aurora_res]))

    for i, (trace, save_dir) in enumerate(zip(traces, save_dirs)):
        trace.dump(os.path.join(save_dir, 'trace_{:02d}.json'.format(i)))
Ejemplo n.º 16
0
def main():
    set_seed(42)
    dummy_trace = generate_trace(duration_range=(10, 10),
                                 bandwidth_lower_bound_range=(0.1, 0.1),
                                 bandwidth_upper_bound_range=(12, 12),
                                 delay_range=(25, 25),
                                 loss_rate_range=(0.0, 0.0),
                                 queue_size_range=(1, 1),
                                 T_s_range=(3, 3),
                                 delay_noise_range=(0, 0))
    dummy_trace.dump(os.path.join(SAVE_DIR, "test_trace.json"))

    genet = Aurora(seed=20,
                   log_dir=SAVE_DIR,
                   pretrained_model_path=MODEL_PATH,
                   timesteps_per_actorbatch=10,
                   record_pkt_log=True)
    t_start = time.time()
    print(genet.test(dummy_trace, SAVE_DIR, True, saliency=True))
    print("aurora", time.time() - t_start)
Ejemplo n.º 17
0
def classic_control(env, default_seed=True):
    if default_seed:
        seed = 2
        set_seed(seed, env)  # reproducible

    alg_params = dict(
        gamma=0.9,
        action_range=
        1  # integer because some envs in classic_control are discrete
    )
    if alg_params.get('net_list') is None:
        num_hidden_layer = 1  # number of hidden layers for the networks
        hidden_dim = 32  # dimension of hidden layers for the networks
        with tf.name_scope('AC'):
            with tf.name_scope('Critic'):
                critic = ValueNetwork(env.observation_space,
                                      hidden_dim_list=num_hidden_layer *
                                      [hidden_dim])
            with tf.name_scope('Actor'):
                actor = StochasticPolicyNetwork(
                    env.observation_space,
                    env.action_space,
                    hidden_dim_list=num_hidden_layer * [hidden_dim],
                    output_activation=tf.nn.tanh)
        net_list = [actor, critic]
        alg_params['net_list'] = net_list
    if alg_params.get('optimizers_list') is None:
        a_lr, c_lr = 1e-4, 1e-2  # a_lr: learning rate of the actor; c_lr: learning rate of the critic
        a_optimizer = tf.optimizers.Adam(a_lr)
        c_optimizer = tf.optimizers.Adam(c_lr)
        optimizers_list = [a_optimizer, c_optimizer]
        alg_params['optimizers_list'] = optimizers_list

    learn_params = dict(
        max_steps=200,
        train_episodes=1000,
        test_episodes=10,
        save_interval=100,
    )

    return alg_params, learn_params
Ejemplo n.º 18
0
def classic_control(env, default_seed=True):

    if default_seed:
        # reproducible
        seed = 1
        set_seed(seed, env)

    alg_params = dict(damping_coeff=0.1, cg_iters=10, delta=0.01)

    if alg_params.get('net_list') is None:
        num_hidden_layer = 2  # number of hidden layers for the networks
        hidden_dim = 64  # dimension of hidden layers for the networks
        with tf.name_scope('TRPO'):
            with tf.name_scope('V_Net'):
                v_net = ValueNetwork(env.observation_space,
                                     [hidden_dim] * num_hidden_layer)
            with tf.name_scope('Policy'):
                policy_net = StochasticPolicyNetwork(
                    env.observation_space,
                    env.action_space, [hidden_dim] * num_hidden_layer,
                    output_activation=tf.nn.tanh)

        net_list = [v_net, policy_net]
        alg_params['net_list'] = net_list

    if alg_params.get('optimizers_list') is None:
        critic_lr = 1e-3
        optimizers_list = [tf.optimizers.Adam(critic_lr)]
        alg_params['optimizers_list'] = optimizers_list

    learn_params = dict(train_episodes=10000,
                        test_episodes=100,
                        max_steps=200,
                        save_interval=10,
                        gamma=0.9,
                        batch_size=32,
                        backtrack_iters=10,
                        backtrack_coeff=0.8,
                        train_critic_iters=80)

    return alg_params, learn_params
Ejemplo n.º 19
0
def main():
    args = parse_args()
    set_seed(args.seed)
    if args.save_dir:
        os.makedirs(args.save_dir, exist_ok=True)

    if args.trace_file is not None and args.trace_file.endswith('.json'):
        test_traces = [Trace.load_from_file(args.trace_file)]
    elif args.trace_file is not None and args.trace_file.endswith('.log'):
        test_traces = [
            Trace.load_from_pantheon_file(args.trace_file, args.delay,
                                          args.loss, args.queue)
        ]
    elif args.config_file is not None:
        test_traces = generate_traces(args.config_file,
                                      1,
                                      args.duration,
                                      constant_bw=not args.time_variant_bw)
    else:
        test_traces = [
            generate_trace((args.duration, args.duration),
                           (args.bandwidth, args.bandwidth),
                           (args.delay, args.delay), (args.loss, args.loss),
                           (args.queue, args.queue), (60, 60), (60, 60),
                           constant_bw=not args.time_variant_bw)
        ]
    # print(test_traces[0].bandwidths)

    aurora = Aurora(seed=args.seed,
                    timesteps_per_actorbatch=10,
                    log_dir=args.save_dir,
                    pretrained_model_path=args.model_path,
                    delta_scale=args.delta_scale)
    results, pkt_logs = aurora.test_on_traces(test_traces, [args.save_dir])

    for pkt_log in pkt_logs:
        with open(os.path.join(args.save_dir, "aurora_packet_log.csv"), 'w',
                  1) as f:
            pkt_logger = csv.writer(f, lineterminator='\n')
            pkt_logger.writerows(pkt_log)
Ejemplo n.º 20
0
import gym

# from common.env_wrappers import DummyVecEnv
from common.utils import make_env, set_seed
from algorithms.dppo_clip.dppo_clip import DPPO_CLIP
from common.value_networks import *
from common.policy_networks import *

n_workers = 4
''' load environment '''
env = [gym.make('Pendulum-v0').unwrapped for i in range(n_workers)]

# reproducible
seed = 1
set_seed(seed)
''' build networks for the algorithm '''
name = 'DPPO_CLIP'
hidden_dim = 100
num_hidden_layer = 1
critic = ValueNetwork(env[0].observation_space,
                      [hidden_dim] * num_hidden_layer,
                      name=name + '_value')

actor = StochasticPolicyNetwork(env[0].observation_space,
                                env[0].action_space,
                                [hidden_dim] * num_hidden_layer,
                                trainable=True,
                                name=name + '_policy')
net_list = critic, actor
''' create model '''
actor_lr = 1e-4
Ejemplo n.º 21
0
from common.utils import set_seed
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

plt.style.use('seaborn-deep')
set_seed(10)

df_genet_bbr = pd.read_csv('training_curve_genet_bbr.csv')
df_udr = pd.read_csv('training_curve_udr.csv')
assert isinstance(df_genet_bbr, pd.DataFrame)
assert isinstance(df_udr, pd.DataFrame)
genet_steps = df_genet_bbr['genet_steps'] / 1e3
steps = df_udr['steps'] / 1e3
udr1_avg_rewards = df_udr['udr1_avg_rewards']
udr2_avg_rewards = df_udr['udr2_avg_rewards']
udr3_avg_rewards = df_udr['udr3_avg_rewards']
genet_avg_rewards = df_genet_bbr['genet_avg_rewards']
plt.plot(genet_steps, df_genet_bbr['genet_avg_rewards'], c='r')
genet_reward_errs = np.concatenate([((df_udr['udr1_up_bnd'] - df_udr['udr1_low_bnd']) / 2).to_numpy(), ((df_udr['udr3_up_bnd'] - df_udr['udr3_low_bnd']) / 2).to_numpy()])
print(genet_reward_errs)
genet_reward_errs = genet_reward_errs[:36]
print(len(genet_reward_errs))
assert len(genet_avg_rewards) == len(genet_reward_errs)
genet_low_bnd = genet_avg_rewards.to_numpy() - genet_reward_errs
genet_up_bnd = genet_avg_rewards.to_numpy() + genet_reward_errs
print(genet_up_bnd)
print(genet_low_bnd)
plt.fill_between(genet_steps, np.array(genet_low_bnd), np.array(genet_up_bnd), color='r', alpha=0.1)
udr1_low_bnd = df_udr['udr1_low_bnd']
udr1_up_bnd = df_udr['udr1_up_bnd']
Ejemplo n.º 22
0
def main():
    args = parse_args()
    set_seed(args.seed)
    if args.save_dir:
        os.makedirs(args.save_dir, exist_ok=True)

    df = pd.read_csv(args.log_file, sep='\t')
    assert isinstance(df, pd.DataFrame)

    latest_step = int(df['num_timesteps'].iloc[-1])
    assert os.path.exists(
        os.path.join(os.path.dirname(args.log_file),
                     "model_step_{}.ckpt.meta".format(latest_step)))
    latest_model_path = os.path.join(os.path.dirname(args.log_file),
                                     "model_step_{}.ckpt".format(latest_step))

    aurora = Aurora(seed=args.seed,
                    timesteps_per_actorbatch=10,
                    log_dir="",
                    pretrained_model_path=latest_model_path)
    bbr = BBR(True)
    cubic = Cubic(True)

    test_traces = []
    trace_dirs = []
    for noise in [0, 20]:
        for bw in [20, 50]:
            tr = generate_trace((30, 30), (bw, bw), (bw, bw), (25, 25), (0, 0),
                                (0.1, 0.1), (60, 60), (noise, noise))
            test_traces.append(tr)

    for _ in range(5):
        test_traces.append(
            generate_trace((30, 30), (0.1, 0.1), (20, 20), (50, 100), (0, 0),
                           (0.5, 1), (10, 10), (0, 10)))
        test_traces.append(
            generate_trace((30, 30), (10, 10), (100, 100), (50, 100), (0, 0),
                           (0.5, 1), (10, 10), (0, 10)))

    for i, tr in enumerate(test_traces):
        os.makedirs(os.path.join(args.save_dir, 'trace_{}'.format(i)),
                    exist_ok=True)
        tr.dump(os.path.join(args.save_dir, 'trace_{}'.format(i),
                             'trace.json'))
        trace_dirs.append(os.path.join(args.save_dir, 'trace_{}'.format(i)))

    t_start = time.time()
    aurora_pkt_level_rewards = []
    for tr, save_dir in zip(test_traces, trace_dirs):
        _, pkt_level_reward = aurora.test(tr, save_dir, True)
        aurora_pkt_level_rewards.append(pkt_level_reward)
    print('aurora', time.time() - t_start)
    t_start = time.time()
    bbr_results = bbr.test_on_traces(test_traces, trace_dirs, True)
    print('bbr', time.time() - t_start)
    t_start = time.time()
    cubic_results = cubic.test_on_traces(test_traces, trace_dirs, True)
    print('cubic', time.time() - t_start)

    bbr_pkt_level_rewards = [val for _, val in bbr_results]
    cubic_pkt_level_rewards = [val for _, val in cubic_results]
    mean_rewards = [
        np.mean(aurora_pkt_level_rewards),
        np.mean(bbr_pkt_level_rewards),
        np.mean(cubic_pkt_level_rewards)
    ]
    reward_errs = [
        np.std(aurora_pkt_level_rewards),
        np.std(bbr_pkt_level_rewards),
        np.std(cubic_pkt_level_rewards)
    ]
    plt.bar([1, 2, 3], mean_rewards, yerr=reward_errs, width=0.5)
    plt.xticks([1, 2, 3], ['aurora', 'bbr', 'cubic'])
    plt.ylabel('Test Reward')
    plt.tight_layout()
    plt.savefig(os.path.join(args.save_dir, 'test_cc.jpg'))
Ejemplo n.º 23
0
def main():
    args = parse_args()
    set_seed(args.seed)
    # tokens = os.path.basename(os.path.dirname(os.path.dirname(args.save_dir))).split('_')
    # config0_dim0_idx = int(tokens[1])
    # config0_dim1_idx = int(tokens[2])
    # config1_dim0_idx = int(tokens[4])
    # config1_dim1_idx = int(tokens[5])

    dim0, dim1 = args.dims
    config = read_json_file(args.config_file)[0]
    assert dim0 in config and dim1 in config

    # dim0_vals = np.linspace(config[dim0][0], config[dim0][1], 10)
    # dim1_vals = np.linspace(config[dim1][0], config[dim1][1], 10)
    dim0_vals = get_dim_vals(dim0)
    dim1_vals = get_dim_vals(dim1)
    print(dim0_vals)
    print(dim1_vals)
    traces = []
    save_dirs = []
    with open('heatmap_trace_cnt_ratio.npy', 'rb') as f:
        cnt_ratio = np.load(f)
    for dim0_idx, dim0_val in enumerate(dim0_vals):
        for dim1_idx, dim1_val in enumerate(dim1_vals):
            dim_vals = copy.copy(DEFAULT_VALUES)
            dim_vals[dim0] = dim0_val
            dim_vals[dim1] = dim1_val
            # print(i, dim0_val, dim1_val, dim_vals)
            cnt = 10
            # if cnt_ratio[dim0_idx, dim1_idx] > 1:
            #     cnt *= int(cnt_ratio[dim0_idx, dim1_idx])
            # print(cnt)
            for trace_idx in range(cnt):
                trace = generate_trace(
                    duration_range=(dim_vals['duration'],
                                    dim_vals['duration']),
                    bandwidth_lower_bound_range=(
                        dim_vals['bandwidth_lower_bound'],
                        dim_vals['bandwidth_lower_bound']),
                    bandwidth_upper_bound_range=(
                        dim_vals['bandwidth_upper_bound'],
                        dim_vals['bandwidth_upper_bound']),
                    delay_range=(dim_vals['delay'], dim_vals['delay']),
                    loss_rate_range=(dim_vals['loss'], dim_vals['loss']),
                    queue_size_range=(dim_vals['queue'], dim_vals['queue']),
                    T_s_range=(dim_vals['T_s'], dim_vals['T_s']),
                    delay_noise_range=(dim_vals['delay_noise'],
                                       dim_vals['delay_noise']))
                traces.append(trace)
                save_dir = os.path.join(
                    args.save_dir, 'pair_{}_{}'.format(dim0_idx, dim1_idx),
                    'trace_{}'.format(trace_idx))
                save_dirs.append(save_dir)
                os.makedirs(save_dir, exist_ok=True)
                trace.dump(
                    os.path.join(save_dir, 'trace_{}.json'.format(trace_idx)))
    if args.cc == 'genet_bbr' or args.cc == 'genet_cubic' or args.cc == 'genet_bbr_old':
        genet_seed = ''
        for s in args.models_path.split('/'):
            if 'seed' in s:
                genet_seed = s
        for bo in range(0, 30, 3):
            # for bo_dir in natural_sort(glob.glob(os.path.join(args.models_path, "bo_*/"))):
            bo_dir = os.path.join(args.models_path, "bo_{}".format(bo))
            step = 64800
            model_path = os.path.join(bo_dir,
                                      'model_step_{}.ckpt'.format(step))
            if not os.path.exists(model_path + '.meta'):
                print(model_path, 'does not exist')
                continue
            print(model_path)
            genet_save_dirs = [
                os.path.join(save_dir, args.cc, genet_seed, "bo_{}".format(bo),
                             "step_{}".format(step)) for save_dir in save_dirs
            ]
            t_start = time.time()
            test_on_traces(model_path, traces, genet_save_dirs, args.nproc, 42,
                           False, False)
            print('bo {}: {:.3f}'.format(bo, time.time() - t_start))
    elif args.cc == 'pretrained':
        pretrained_save_dirs = [
            os.path.join(save_dir, args.cc) for save_dir in save_dirs
        ]
        t_start = time.time()
        test_on_traces(args.models_path, traces, pretrained_save_dirs,
                       args.nproc, 42, False, False)
        print('pretrained: {:.3f}'.format(time.time() - t_start))
    elif args.cc == 'overfit_config':
        overfit_config_save_dirs = [
            os.path.join(save_dir, args.cc) for save_dir in save_dirs
        ]
        t_start = time.time()
        test_on_traces(args.models_path, traces, overfit_config_save_dirs,
                       args.nproc, 42, False, False)
        print('overfit_config: {:.3f}'.format(time.time() - t_start))
    else:
        if args.cc == 'bbr':
            cc = BBR(False)
        elif args.cc == 'cubic':
            cc = Cubic(False)
        elif args.cc == 'bbr_old':
            cc = BBR_old(False)
        else:
            raise NotImplementedError
        heuristic_save_dirs = [
            os.path.join(save_dir, cc.cc_name) for save_dir in save_dirs
        ]
        t_start = time.time()
        cc.test_on_traces(traces, heuristic_save_dirs, False, args.nproc)
        print('{}: {:.3f}'.format(args.cc, time.time() - t_start))
Ejemplo n.º 24
0
plt.rcParams['font.size'] = 16
plt.rcParams['axes.labelsize'] = 18
plt.rcParams['legend.fontsize'] = 18
plt.rcParams["figure.figsize"] = (10, 6)

# MODEL_PATH = "/tank/zxxia/PCC-RL/results_0503/udr_7_dims/udr_large/seed_50/model_step_396000.ckpt"
MODEL_PATH = "../../results_0503/udr_7_dims/udr_mid/seed_50/model_step_360000.ckpt"
# \"/tank/zxxia/PCC-RL/results_0503/udr_7_dims/udr_small/seed_50/model_step_396000.ckpt"
SAVE_DIR = '../../figs'
REAL_TRACE_DIR = "/tank/zxxia/PCC-RL/data/cellular/2018-12-02T13-03-India-cellular-to-AWS-India-1-3-runs-3-flows"

# REAL_TRACE_DIR = "/tank/zxxia/PCC-RL/data/cellular/2018-12-10T20-36-AWS-Brazil-2-to-Colombia-cellular-3-runs"

metric = 'bandwidth'

set_seed(20)

vals2test = {
    "bandwidth": [0, 1, 2, 3, 4, 5, 6],
    "delay": [5, 50, 100, 150, 200],
    "loss": [0, 0.01, 0.02, 0.03, 0.04, 0.05],
    "queue": [2, 10, 50, 100, 150, 200],
    "T_s": [0, 1, 2, 3, 4, 5, 6],
    "delay_noise": [0, 20, 40, 60, 80, 100],
}

# real_traces = []
# for trace_file in glob.glob(os.path.join(REAL_TRACE_DIR, "*datalink_run*.log")):
#     if 'bbr' not in trace_file and 'cubic' not in trace_file and \
#             'vegas' not in trace_file and 'pcc' not in trace_file and 'copa' not in trace_file:
#         continue
Ejemplo n.º 25
0
def main():
    args = parse_args()
    assert (not args.pretrained_model_path
            or args.pretrained_model_path.endswith(".ckpt"))
    os.makedirs(args.save_dir, exist_ok=True)
    save_args(args, args.save_dir)
    set_seed(args.seed + COMM_WORLD.Get_rank() * 100)
    nprocs = COMM_WORLD.Get_size()

    # Initialize model and agent policy
    aurora = Aurora(
        args.seed + COMM_WORLD.Get_rank() * 100,
        args.save_dir,
        int(args.val_freq / nprocs),
        args.pretrained_model_path,
        tensorboard_log=args.tensorboard_log,
    )
    # training_traces, validation_traces,
    training_traces = []
    val_traces = []
    if args.curriculum == "udr":
        config_file = args.config_file
        if args.train_trace_file:
            with open(args.train_trace_file, "r") as f:
                for line in f:
                    line = line.strip()
                    training_traces.append(Trace.load_from_file(line))

        if args.validation and args.val_trace_file:
            with open(args.val_trace_file, "r") as f:
                for line in f:
                    line = line.strip()
                    if args.dataset == "pantheon":
                        queue = 100  # dummy value
                        val_traces.append(
                            Trace.load_from_pantheon_file(line,
                                                          queue=queue,
                                                          loss=0))
                    elif args.dataset == "synthetic":
                        val_traces.append(Trace.load_from_file(line))
                    else:
                        raise ValueError
        train_scheduler = UDRTrainScheduler(
            config_file,
            training_traces,
            percent=args.real_trace_prob,
        )
    elif args.curriculum == "cl1":
        config_file = args.config_files[0]
        train_scheduler = CL1TrainScheduler(args.config_files, aurora)
    elif args.curriculum == "cl2":
        config_file = args.config_file
        train_scheduler = CL2TrainScheduler(config_file, aurora, args.baseline)
    else:
        raise NotImplementedError

    aurora.train(
        config_file,
        args.total_timesteps,
        train_scheduler,
        tb_log_name=args.exp_name,
        validation_traces=val_traces,
    )
Ejemplo n.º 26
0
def rlbench(env, default_seed=True):
    if default_seed:
        seed = 2
        set_seed(seed, env)  # reproducible

    state_shape = env.observation_space.shape
    action_shape = env.action_space.shape

    alg_params = dict(state_dim=state_shape[0],
                      action_dim=action_shape[0],
                      replay_buffer_capacity=5e5,
                      policy_target_update_interval=5,
                      action_range=0.1)
    if alg_params.get('net_list') is None:
        num_hidden_layer = 4  #number of hidden layers for the networks
        hidden_dim = 64  # dimension of hidden layers for the networks
        with tf.name_scope('TD3'):
            with tf.name_scope('Q_Net1'):
                q_net1 = QNetwork(env.observation_space,
                                  env.action_space,
                                  hidden_dim_list=num_hidden_layer *
                                  [hidden_dim])
            with tf.name_scope('Q_Net2'):
                q_net2 = QNetwork(env.observation_space,
                                  env.action_space,
                                  hidden_dim_list=num_hidden_layer *
                                  [hidden_dim])
            with tf.name_scope('Target_Q_Net1'):
                target_q_net1 = QNetwork(env.observation_space,
                                         env.action_space,
                                         hidden_dim_list=num_hidden_layer *
                                         [hidden_dim])
            with tf.name_scope('Target_Q_Net2'):
                target_q_net2 = QNetwork(env.observation_space,
                                         env.action_space,
                                         hidden_dim_list=num_hidden_layer *
                                         [hidden_dim])
            with tf.name_scope('Policy'):
                policy_net = DeterministicPolicyNetwork(
                    env.observation_space,
                    env.action_space,
                    hidden_dim_list=num_hidden_layer * [hidden_dim])
            with tf.name_scope('Target_Policy'):
                target_policy_net = DeterministicPolicyNetwork(
                    env.observation_space,
                    env.action_space,
                    hidden_dim_list=num_hidden_layer * [hidden_dim])
        net_list = [
            q_net1, q_net2, target_q_net1, target_q_net2, policy_net,
            target_policy_net
        ]
        alg_params['net_list'] = net_list
    if alg_params.get('optimizers_list') is None:
        q_lr, policy_lr = 3e-4, 3e-4  # q_lr: learning rate of the Q network; policy_lr: learning rate of the policy network
        q_optimizer1 = tf.optimizers.Adam(q_lr)
        q_optimizer2 = tf.optimizers.Adam(q_lr)
        policy_optimizer = tf.optimizers.Adam(policy_lr)
        optimizers_list = [q_optimizer1, q_optimizer2, policy_optimizer]
        alg_params['optimizers_list'] = optimizers_list

    learn_params = dict(
        max_steps=150,
        batch_size=64,
        explore_steps=500,
        update_itr=3,
        reward_scale=1.,
        explore_noise_scale=1.0,
        eval_noise_scale=0.5,
        train_episodes=1000,
        test_episodes=10,
        save_interval=100,
    )

    return alg_params, learn_params
Ejemplo n.º 27
0
def main(argv):
    set_seed(FLAGS.seed)
    if FLAGS.generate:
        generate()
    else:
        train()
Ejemplo n.º 28
0
def init_train_env(args, tbert_type):
    # Setup CUDA, GPU & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        args.n_gpu = 0 if args.no_cuda else torch.cuda.device_count()
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend="nccl")
        args.n_gpu = 1
    args.device = device

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
    )
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        args.local_rank,
        device,
        args.n_gpu,
        bool(args.local_rank != -1),
        args.fp16,
    )
    # Set seed
    set_seed(args.seed, args.n_gpu)
    # Load pretrained model and tokenizer
    if args.local_rank not in [-1, 0]:
        # Make sure only the first process in distributed training will download model & vocab
        torch.distributed.barrier()
    if tbert_type == 'twin' or tbert_type == "T":
        model = TBertT(BertConfig(), args.code_bert)
    elif tbert_type == 'siamese' or tbert_type == "I":
        model = TBertI(BertConfig(), args.code_bert)
    elif tbert_type == 'siamese2' or tbert_type == "I2":
        model = TBertI2(BertConfig(), args.code_bert)
    elif tbert_type == 'single' or tbert_type == "S":
        model = TBertS(BertConfig(), args.code_bert)
    else:
        raise Exception("TBERT type not found")
    args.tbert_type = tbert_type
    if args.local_rank == 0:
        # Make sure only the first process in distributed training will download model & vocab
        torch.distributed.barrier()

    model.to(args.device)
    logger.info("Training/evaluation parameters %s", args)

    # Before we do anything with models, we want to ensure that we get fp16 execution of torch.einsum if args.fp16 is set.
    # Otherwise it'll default to "promote" mode, and we'll get fp32 operations. Note that running `--fp16_opt_level="O2"` will
    # remove the need for this code, but it is still valid.
    if args.fp16:
        try:
            import apex
            apex.amp.register_half_function(torch, "einsum")
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )
    return model
Ejemplo n.º 29
0
def generate_trace(duration_range: Tuple[float, float],
                   bandwidth_lower_bound_range: Tuple[float, float],
                   bandwidth_upper_bound_range: Tuple[float, float],
                   delay_range: Tuple[float, float],
                   loss_rate_range: Tuple[float, float],
                   queue_size_range: Tuple[float, float],
                   T_s_range: Optional[Tuple[float, float]] = None,
                   delay_noise_range: Optional[Tuple[float, float]] = None,
                   seed: Optional[int] = None,
                   dt: float = 0.1):
    """Generate trace for a network flow.

    Args:
        duration_range: duraiton range in second.
        bandwidth_range: link bandwidth range in Mbps.
        delay_range: link one-way propagation delay in ms.
        loss_rate_range: Uplink loss rate range.
        queue_size_range: queue size range in packets.
    """
    if seed:
        set_seed(seed)
    assert len(duration_range) == 2 and \
            duration_range[0] <= duration_range[1] and duration_range[0] > 0
    assert len(bandwidth_lower_bound_range) == 2 and \
            bandwidth_lower_bound_range[0] <= bandwidth_lower_bound_range[1] and bandwidth_lower_bound_range[0] > 0
    assert len(bandwidth_upper_bound_range) == 2 and \
            bandwidth_upper_bound_range[0] <= bandwidth_upper_bound_range[1] and bandwidth_upper_bound_range[0] > 0
    assert len(delay_range) == 2 and delay_range[0] <= delay_range[1] and \
            delay_range[0] > 0
    assert len(loss_rate_range) == 2 and \
            loss_rate_range[0] <= loss_rate_range[1] and loss_rate_range[0] >= 0

    loss_rate_exponent = float(
        np.random.uniform(np.log10(loss_rate_range[0] + 1e-5),
                          np.log10(loss_rate_range[1] + 1e-5), 1))
    if loss_rate_exponent < -4:
        loss_rate = 0
    else:
        loss_rate = 10**loss_rate_exponent

    duration = float(np.random.uniform(duration_range[0], duration_range[1],
                                       1))

    # use bandwidth generator.
    assert T_s_range is not None and len(
        T_s_range) == 2 and T_s_range[0] <= T_s_range[1]
    assert delay_noise_range is not None and len(
        delay_noise_range
    ) == 2 and delay_noise_range[0] <= delay_noise_range[1]
    T_s = float(np.random.uniform(T_s_range[0], T_s_range[1], 1))
    delay_noise = float(
        np.random.uniform(delay_noise_range[0], delay_noise_range[1], 1))

    timestamps, bandwidths, delays = generate_bw_delay_series(
        T_s,
        duration,
        bandwidth_lower_bound_range[0],
        bandwidth_lower_bound_range[1],
        bandwidth_upper_bound_range[0],
        bandwidth_upper_bound_range[1],
        delay_range[0],
        delay_range[1],
        dt=dt)

    queue_size = np.random.uniform(queue_size_range[0], queue_size_range[1])
    bdp = np.max(bandwidths) / BYTES_PER_PACKET / BITS_PER_BYTE * 1e6 * np.max(
        delays) * 2 / 1000
    queue_size = max(2, int(bdp * queue_size))

    ret_trace = Trace(timestamps, bandwidths, delays, loss_rate, queue_size,
                      delay_noise, T_s)
    return ret_trace
Ejemplo n.º 30
0
import gym
# from common.env_wrappers import DummyVecEnv
from common.utils import make_env, set_seed
from algorithms.ac.ac import AC
from common.value_networks import *
from common.policy_networks import *
''' load environment '''
# env = gym.make('CartPole-v0').unwrapped
env = gym.make('Pendulum-v0').unwrapped
obs_space = env.observation_space
act_space = env.action_space
# reproducible
seed = 2
set_seed(seed, env)

# env = DummyVecEnv([lambda: env])  # The algorithms require a vectorized/wrapped environment to run
''' build networks for the algorithm '''
num_hidden_layer = 1  # number of hidden layers for the networks
hidden_dim = 32  # dimension of hidden layers for the networks
with tf.name_scope('AC'):
    with tf.name_scope('Critic'):
        critic = ValueNetwork(obs_space,
                              hidden_dim_list=num_hidden_layer * [hidden_dim])
    with tf.name_scope('Actor'):
        actor = StochasticPolicyNetwork(obs_space,
                                        act_space,
                                        hidden_dim_list=num_hidden_layer *
                                        [hidden_dim],
                                        output_activation=tf.nn.tanh)
net_list = [actor, critic]
''' choose optimizers '''