Example #1
0
def main():
    parser = argparse.ArgumentParser()

    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('--all', action='store_true',
                       help='test all the schemes specified in src/config.yml')
    group.add_argument('--schemes', metavar='"SCHEME1 SCHEME2..."',
                       help='test a space-separated list of schemes')

    args = parser.parse_args()

    if args.all:
        schemes = parse_config()['schemes'].keys()
    elif args.schemes is not None:
        schemes = args.schemes.split()

    curr_dir = path.abspath(path.dirname(__file__))
    data_dir = path.join(curr_dir, 'data')
    shutil.rmtree(data_dir, ignore_errors=True)
    make_sure_path_exists(data_dir)

    test_py = path.join(project_root.DIR, 'test', 'test.py')
    analyze_py = path.join(project_root.DIR, 'analysis', 'analyze.py')

    cmd = ['python', test_py, 'local', '--schemes', ' '.join(schemes),
           '-t', '10', '--data-dir', data_dir, '--pkill-cleanup',
           '--prepend-mm-cmds', 'mm-delay 20', '--extra-mm-link-args',
           '--uplink-queue=droptail --uplink-queue-args=packets=200']
    check_call(cmd)

    cmd = ['python', analyze_py, '--data-dir', data_dir]
    check_call(cmd)
Example #2
0
    def __init__(self, args):
        self.sender = Sender(args.port, train=True)
        self.algorithm = args.algorithm

        curr_file_path = os.path.dirname(os.path.abspath(__file__))
        saved_models_path = os.path.join(curr_file_path, 'saved_models')
        make_sure_path_exists(saved_models_path)

        dagger_path = os.path.join(saved_models_path, 'dagger')
        reinforce_path = os.path.join(saved_models_path, 'reinforce')

        if self.algorithm == 'dagger':
            self.learner = Dagger(state_dim=self.sender.state_dim,
                                  action_cnt=self.sender.action_cnt,
                                  train=True,
                                  save_vars=dagger_path,
                                  restore_vars=None,
                                  debug=True)
        elif self.algorithm == 'reinforce':
            self.learner = Reinforce(state_dim=self.sender.state_dim,
                                     action_cnt=self.sender.action_cnt,
                                     train=True,
                                     save_vars=reinforce_path,
                                     restore_vars=None,
                                     debug=True)

        self.sender.set_sample_action(self.learner.sample_action)

        self.max_batches = 2000
        self.episodes_per_batch = 1
Example #3
0
def parse_test():
    parser = argparse.ArgumentParser(
        description='perform congestion control tests')
    subparsers = parser.add_subparsers(dest='mode')

    local = subparsers.add_parser(
        'local', help='test schemes locally in mahimahi emulated networks')
    remote = subparsers.add_parser(
        'remote',
        help='test schemes between local and remote in '
        'real-life networks')
    remote.add_argument(
        'remote_path',
        metavar='HOST:PANTHEON-DIR',
        help='HOST ([user@]IP) and PANTHEON-DIR (remote pantheon directory)')

    parse_test_shared(local, remote)
    parse_test_local(local)
    parse_test_remote(remote)

    args = parser.parse_args()
    if args.schemes is not None:
        verify_schemes(args.schemes)
    verify_test_args(args)
    make_sure_path_exists(args.data_dir)
    return args
Example #4
0
    def __init__(self, cluster, server, task_index, env, dagger):
        # distributed tensorflow related
        self.cluster = cluster
        self.server = server
        self.task_index = task_index
        self.env = env
        self.dagger = dagger
        self.time_file = open('/tmp/sample_action_time', 'w')  # 做决策花销的时(2ms左右)

        self.is_chief = (task_index == 0)  # TODO task=0就都是ps?
        self.worker_device = '/job:worker/task:%d' % task_index

        # buffers required to train
        self.action_buf = []
        self.state_buf = []

        # step counters
        self.local_step = 0

        if self.dagger:
            self.max_global_step = 2000
            self.check_point = 1500
            self.learn_rate = 1e-3
        else:
            self.max_global_step = 10000  # TODO should be longer according to the graph
            self.check_point = 10
            self.learn_rate = 2 * 1e-5

        # dimension of state and action spaces
        self.state_dim = env.state_dim
        self.action_cnt = env.action_cnt

        # must call env.set_sample_action() before env.run()
        env.set_sample_action(self.sample_action)

        # build tensorflow computation graph
        self.build_tf_graph()

        # summary related
        if self.is_chief:
            date_time = datetime.datetime.now().strftime('%Y-%m-%d--%H-%M-%S')
            self.logdir = path.join(project_root.DIR, 'a3c', 'logs', date_time)
            make_sure_path_exists(self.logdir)
            self.summary_writer = tf.summary.FileWriter(self.logdir)

        # create session
        # hesy add config to allow more GPU sources
        config = tf.ConfigProto()
        config = tf.ConfigProto(allow_soft_placement=True,
                                log_device_placement=False)
        self.session = tf.Session(self.server.target,
                                  config=config)  # session 第一个承诺书
        self.session.run(tf.global_variables_initializer())
Example #5
0
def parse_test():
    # Load configuration file before parsing other command line options
    # Command line options will override options in config file
    config_parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter,
        add_help=False)
    config_parser.add_argument('-c',
                               '--config_file',
                               metavar='CONFIG',
                               help='path to configuration file. '
                               'command line arguments will override options '
                               'in config file. ')
    config_args, remaining_argv = config_parser.parse_known_args()

    parser = argparse.ArgumentParser(
        description='perform congestion control tests',
        parents=[config_parser])

    subparsers = parser.add_subparsers(dest='mode')
    local = subparsers.add_parser(
        'local', help='test schemes locally in mahimahi emulated networks')
    remote = subparsers.add_parser(
        'remote',
        help='test schemes between local and remote in '
        'real-life networks')
    remote.add_argument(
        'remote_path',
        metavar='HOST:PANTHEON-DIR',
        help='HOST ([user@]IP) and PANTHEON-DIR (remote pantheon directory)')

    parse_test_shared(local, remote, config_args)
    parse_test_local(local)
    parse_test_remote(remote)

    # Make settings in config file the defaults
    test_config = None
    if config_args.config_file is not None:
        with open(config_args.config_file) as f:
            test_config = yaml.safe_load(f)
        parse_test_config(test_config, local, remote)

    args = parser.parse_args(remaining_argv)
    if args.schemes is not None:
        verify_schemes(args.schemes)
        args.test_config = None
    else:
        assert (test_config is not None)
        schemes = ' '.join([flow['scheme'] for flow in test_config['flows']])
        verify_schemes(schemes)
    verify_test_args(args)
    make_sure_path_exists(args.data_dir)
    return args
Example #6
0
    def save_model(self, check_point=None):
        if check_point is None:
            model_path = path.join(self.logdir, 'model')
        else:
            model_path = path.join(self.logdir, 'checkpoint-%d' % check_point)

        make_sure_path_exists(model_path)

        # copy global parameters to local
        self.session.run(self.sync_op)

        # save local parameters to worker-0
        saver = tf.train.Saver(self.local_network.trainable_vars)
        saver.save(self.session, model_path)
        sys.stderr.write('\nModel saved to worker-0:%s\n' % model_path)
Example #7
0
    def build_tf_graph(self):
        self.build_policy()

        if self.train:
            self.build_loss()

        if self.debug:
            summary_path = 'reinforce_summary'
            make_sure_path_exists(summary_path)
            self.summary_writer = tf.summary.FileWriter(
                summary_path, graph=self.session.graph)

            tf.summary.scalar('reg_loss', self.reg_loss)
            tf.summary.scalar('policy_loss', self.policy_loss)
            self.summary_op = tf.summary.merge_all()
Example #8
0
    def __init__(self, cluster, server, task_index, env, dagger):
        # distributed tensorflow related
        self.cluster = cluster
        self.server = server
        self.task_index = task_index
        self.env = env
        self.dagger = dagger
        self.time_file = open('/tmp/sample_action_time', 'w')

        self.is_chief = (task_index == 0)
        self.worker_device = '/job:worker/task:%d' % task_index

        # buffers required to train
        self.action_buf = []
        self.state_buf = []

        # step counters
        self.local_step = 0

        if self.dagger:
            self.max_global_step = 2000
            self.check_point = 1500
            self.learn_rate = 1e-3
        else:
            self.max_global_step = 30
            self.check_point = 10
            self.learn_rate = 2 * 1e-5

        # dimension of state and action spaces
        self.state_dim = env.state_dim
        self.action_cnt = env.action_cnt

        # must call env.set_sample_action() before env.run()
        env.set_sample_action(self.sample_action)

        # build tensorflow computation graph
        self.build_tf_graph()

        # summary related
        if self.is_chief:
            date_time = datetime.datetime.now().strftime('%Y-%m-%d--%H-%M-%S')
            self.logdir = path.join(project_root.DIR, 'a3c', 'logs', date_time)
            make_sure_path_exists(self.logdir)
            self.summary_writer = tf.summary.FileWriter(self.logdir)

        # create session
        self.session = tf.Session(self.server.target)
        self.session.run(tf.global_variables_initializer())
def main():
    h.call(['echo', '1'])
    h.check_call('echo 2', shell=True)

    ret = h.check_output(['echo', '3']).strip()
    print ret
    assert ret == '3'

    proc = h.Popen(['echo', '4'], stdout=h.PIPE)
    ret = proc.communicate()[0].strip()
    print ret
    assert ret == '4'

    print h.get_open_port()
    h.make_sure_path_exists(h.TMPDIR)
    print h.parse_config()
Example #10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('port', type=int)
    parser.add_argument('--algorithm', choices=['dagger', 'reinforce'],
                        required=True)
    args = parser.parse_args()

    sender = Sender(args.port)

    curr_file_path = os.path.dirname(os.path.abspath(__file__))
    saved_models_path = os.path.join(curr_file_path, 'saved_models')
    make_sure_path_exists(saved_models_path)

    if args.algorithm == 'dagger':
        model_path = os.path.join(saved_models_path, 'dagger')

        policer = Dagger(
            state_dim=sender.state_dim,
            action_cnt=sender.action_cnt,
            train=False,
            restore_vars=model_path)
    elif args.algorithm == 'reinforce':
        model_path = os.path.join(saved_models_path, 'reinforce')

        policer = Reinforce(
            state_dim=sender.state_dim,
            action_cnt=sender.action_cnt,
            train=False,
            restore_vars=model_path)

    sender.set_sample_action(policer.sample_action)

    try:
        sender.handshake()
        sender.run()
    except KeyboardInterrupt:
        pass
    finally:
        sender.cleanup()
Example #11
0
    def setup_tf_ops(self, server):
        """ Sets up Tensorboard operators and tools, such as the optimizer,
        summary values, Tensorboard, and Session.
        """

        self.actions = tf.placeholder(tf.int32, [None, None])

        reg_loss = 0.0
        for x in self.global_network.trainable_vars:
            if x.name == 'global/cnt:0':
                continue
            reg_loss += tf.nn.l2_loss(x)
        reg_loss *= self.regularization_lambda

        cross_entropy_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=self.actions,  # todo: Q-target and Q-evaluation
                logits=self.global_network.action_scores))

        self.total_loss = cross_entropy_loss + reg_loss

        optimizer = tf.train.AdamOptimizer(self.learn_rate)
        self.train_op = optimizer.minimize(self.total_loss)

        tf.summary.scalar('reduced_ce_loss', cross_entropy_loss)
        tf.summary.scalar('reg_loss', reg_loss)
        tf.summary.scalar('total_loss', self.total_loss)
        self.summary_op = tf.summary.merge_all()

        git_commit = check_output('cd %s && git rev-parse @' %
                                  project_root.DIR,
                                  shell=True)
        date_time = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
        log_name = date_time + '-%s' % git_commit.strip()
        self.logdir = path.join(project_root.DIR, 'dagger', 'logs', log_name)
        make_sure_path_exists(self.logdir)
        self.summary_writer = tf.summary.FileWriter(self.logdir)
Example #12
0
def parse_test():
    parser = argparse.ArgumentParser(
        description='perform congestion control tests')
    subparsers = parser.add_subparsers(dest='mode')

    local = subparsers.add_parser(
        'local', help='test schemes locally in mahimahi emulated networks')
    remote = subparsers.add_parser(
        'remote', help='test schemes between local and remote in '
        'real-life networks')
    remote.add_argument(
        'remote_path', metavar='HOST:PANTHEON-DIR',
        help='HOST ([user@]IP) and PANTHEON-DIR (remote pantheon directory)')

    parse_test_shared(local, remote)
    parse_test_local(local)
    parse_test_remote(remote)

    args = parser.parse_args()
    if args.schemes is not None:
        verify_schemes(args.schemes)
    verify_test_args(args)
    make_sure_path_exists(args.data_dir)
    return args
Example #13
0
    def setup_tf_ops(self, server):
        """ Sets up Tensorboard operators and tools, such as the optimizer,
        summary values, Tensorboard, and Session.
        """

        self.expert_actions = tf.placeholder(tf.float32, [None, None])

        reg_loss = 0.0
        for x in self.global_network.trainable_vars:
            if x.name == 'global/cnt:0':
                continue
            reg_loss += tf.nn.l2_loss(x)
        reg_loss *= self.regularization_lambda

        MSE = tf.losses.mean_squared_error(
            labels=self.expert_actions,
            predictions=self.global_network.actions)

        self.total_loss = MSE + reg_loss

        optimizer = tf.train.AdamOptimizer(self.learn_rate)
        self.train_op = optimizer.minimize(self.total_loss)

        tf.summary.scalar('MSE', MSE)
        tf.summary.scalar('reg_loss', reg_loss)
        tf.summary.scalar('total_loss', self.total_loss)
        self.summary_op = tf.summary.merge_all()

        git_commit = check_output('cd %s && git rev-parse @' %
                                  project_root.DIR,
                                  shell=True)
        date_time = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
        log_name = date_time + '-%s' % git_commit.strip()
        self.logdir = path.join(project_root.DIR, 'dagger', 'logs', log_name)
        make_sure_path_exists(self.logdir)
        self.summary_writer = tf.summary.FileWriter(self.logdir)
Example #14
0
def main():
    parser = argparse.ArgumentParser()

    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('--all',
                       action='store_true',
                       help='test all the schemes specified in src/config.yml')
    group.add_argument('--schemes',
                       metavar='"SCHEME1 SCHEME2..."',
                       help='test a space-separated list of schemes')

    args = parser.parse_args()

    if args.all:
        schemes = parse_config()['schemes'].keys()
    elif args.schemes is not None:
        schemes = args.schemes.split()

    curr_dir = path.abspath(path.dirname(__file__))
    data_dir = path.join(curr_dir, 'data')
    shutil.rmtree(data_dir, ignore_errors=True)
    make_sure_path_exists(data_dir)

    test_py = path.join(project_root.DIR, 'test', 'test.py')
    analyze_py = path.join(project_root.DIR, 'analysis', 'analyze.py')

    cmd = [
        'python', test_py, 'local', '--schemes', ' '.join(schemes), '-t', '10',
        '--data-dir', data_dir, '--pkill-cleanup', '--prepend-mm-cmds',
        'mm-delay 20', '--extra-mm-link-args',
        '--uplink-queue=droptail --uplink-queue-args=packets=200'
    ]
    check_call(cmd)

    cmd = ['python', analyze_py, '--data-dir', data_dir]
    check_call(cmd)