Example #1
0
    # We first choose a model
    model = SAC(obs_dim=obs_dim,
                n_actions=n_actions,
                act_lim=act_lim,
                save_dir=model_dir,
                discount=gamma,
                lr=lr,
                seed=seed,
                polyak_coef=polyak_coef,
                temperature=temperature,
                hidden_layers=hidden_layers,
                n_hidden_units=n_hidden_units,
                env=train_env)

    # Now we are going to create an Agent to train / test the model
    agent = Agent(model=model,
                  replay_buffer=replay_buffer,
                  train_env=train_env,
                  test_env=test_env,
                  replay_start_size=replay_start_size,
                  n_episodes=n_episodes,
                  batch_size=batch_size,
                  n_actions=n_actions)

    if train:
        # Perform a training using an agent with a certain model
        agent.train()
    else:
        # We are going to test an existing model
        agent.test(model_path)
Example #2
0
File: run.py Project: yyht/RLs
def run():
    if sys.platform.startswith('win'):
        import win32api
        import win32con
        import _thread

        def _win_handler(event, hook_sigint=_thread.interrupt_main):
            if event == 0:
                hook_sigint()
                return 1
            return 0
        # Add the _win_handler function to the windows console's handler function list
        win32api.SetConsoleCtrlHandler(_win_handler, 1)

    options = docopt(__doc__)
    options = get_options(dict(options))
    print(options)

    default_config = load_yaml(f'config.yaml')
    # gym > unity > unity_env
    model_args = Config(**default_config['model'])
    train_args = Config(**default_config['train'])
    env_args = Config()
    buffer_args = Config(**default_config['buffer'])

    model_args.algo = options.algo
    model_args.use_rnn = options.use_rnn
    model_args.algo_config = options.algo_config
    model_args.seed = options.seed
    model_args.load = options.load

    env_args.env_num = options.n_copys
    if options.gym:
        train_args.add_dict(default_config['gym']['train'])
        train_args.update({'render_episode': options.render_episode})
        env_args.add_dict(default_config['gym']['env'])
        env_args.type = 'gym'
        env_args.env_name = options.gym_env
        env_args.env_seed = options.gym_env_seed
    else:
        train_args.add_dict(default_config['unity']['train'])
        env_args.add_dict(default_config['unity']['env'])
        env_args.type = 'unity'
        env_args.port = options.port
        env_args.sampler_path = options.sampler
        env_args.env_seed = options.unity_env_seed
        if options.unity:
            env_args.file_path = None
            env_args.env_name = 'unity'
        else:
            env_args.update({'file_path': options.env})
            if os.path.exists(env_args.file_path):
                env_args.env_name = options.unity_env or os.path.join(
                    *os.path.split(env_args.file_path)[0].replace('\\', '/').replace(r'//', r'/').split('/')[-2:]
                )
                if 'visual' in env_args.env_name.lower():
                    # if traing with visual input but do not render the environment, all 0 obs will be passed.
                    options.graphic = True
            else:
                raise Exception('can not find this file.')
        if options.inference:
            env_args.train_mode = False
            env_args.render = True
        else:
            env_args.train_mode = True
            env_args.render = options.graphic

    train_args.index = 0
    train_args.name = NAME
    train_args.use_wandb = options.use_wandb
    train_args.inference = options.inference
    train_args.prefill_choose = options.prefill_choose
    train_args.base_dir = os.path.join(options.store_dir or BASE_DIR, env_args.env_name, model_args.algo)
    train_args.update(
        dict([
            ['name', options.name],
            ['max_step_per_episode', options.max_step_per_episode],
            ['max_train_step', options.max_train_step],
            ['max_train_frame', options.max_train_frame],
            ['max_train_episode', options.max_train_episode],
            ['save_frequency', options.save_frequency],
            ['pre_fill_steps', options.prefill_steps],
            ['info', options.info]
        ])
    )

    if options.inference:
        Agent(env_args, model_args, buffer_args, train_args).evaluate()
        return

    trails = options.models
    if trails == 1:
        agent_run(env_args, model_args, buffer_args, train_args)
    elif trails > 1:
        processes = []
        for i in range(trails):
            _env_args = deepcopy(env_args)
            _model_args = deepcopy(model_args)
            _model_args.seed += i * 10
            _buffer_args = deepcopy(buffer_args)
            _train_args = deepcopy(train_args)
            _train_args.index = i
            if _env_args.type == 'unity':
                _env_args.port = env_args.port + i
            p = Process(target=agent_run, args=(_env_args, _model_args, _buffer_args, _train_args))
            p.start()
            time.sleep(10)
            processes.append(p)
        [p.join() for p in processes]
    else:
        raise Exception('trials must be greater than 0.')
Example #3
0
def main():
    logging.basicConfig(level=logging.DEBUG,
                        format="[%(asctime)s %(filename)s] %(message)s")
    json_file = open('params.json')
    json_str = json_file.read()
    config = json.loads(json_str)

    args = __create_args()

    config = __adjust_config(args, config)

    # loading the dataset
    train_loader, valid_loader = __load_data(config)

    # Creating the model
    model = NN(config['model']['architecture'], is_maskable=True)
    model = model.to(config['device'])
    initial_mask = model.masks

    # Getting the criterion, optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = get_optimizer(config, model)

    #########################
    # Agent and Environment #
    #########################
    ACTIONS = create_environment(model.masks, config['environment_protocol'])
    random.seed(42)
    shuffle(ACTIONS)
    N_STATES = len(ACTIONS)
    N_EPISODES = config['mdp']['N_EPISODES']
    MAX_EPISODE_PER_STEPS = config['mdp']['MAX_STEPS_PER_EPISODES']
    MIN_ALPHA = config['mdp']['MIN_ALPHA']
    GAMMA = config['mdp']['GAMMA']
    alphas = np.linspace(1.0, MIN_ALPHA, N_EPISODES)

    q_table = dict()
    start_state = State(model.masks, ACTIONS)

    ##########################
    # Create sub_working_dir #
    ##########################
    sub_working_dir = '{}/results/{}/{}/{}/{}'.format(
        config['working_dir'], config['model']['name'],
        '_' + config['mdp']['Q_COMPUTATION'],
        '{}_{}_{}/{}_{}'.format(time.strftime("%d", time.localtime()),
                                time.strftime("%m", time.localtime()),
                                time.strftime("%Y", time.localtime()),
                                time.strftime("%H", time.localtime()),
                                time.strftime("%M", time.localtime())),
        'ALPHA_SEARCH__MIN_ALPHA-{}__GAMMA-{}__PRUNE_TYPE-{}__PRUNE_PERCENT-{}__EPSILON-{}__REWARD_TYPE-{}'
        .format(MIN_ALPHA,
                GAMMA if config['mdp']['Q_COMPUTATION'] != 'QL_M' else 'None',
                config['environment_protocol'],
                config['agent']['prune_percentage'],
                config['agent']['epsilon'], config['agent']['reward_type']))

    if not os.path.exists(sub_working_dir):
        os.makedirs(sub_working_dir)
    config["sub_working_dir"] = sub_working_dir
    logging.info("sub working dir: %s" % sub_working_dir)

    ###############
    # Begin Train #
    ###############
    train(model, train_loader, valid_loader, criterion, optimizer,
          config['train']['epochs'], config['train']['print_every'],
          config['device'])
    loss, accuracy = validation(model, valid_loader, criterion)
    logging.info(
        'Validation Loss performed: {}\tValidation Accuracy performed: {}'.
        format(loss, accuracy))

    if config['agent']['reward_type'] == 'ACCURACY':
        start_state.last_reward = -(1. - accuracy)
    elif config['agent']['reward_type'] == 'LOSS':
        start_state.last_reward = -loss
    elif config['agent']['reward_type'] == 'ACC_COMPRESSION':
        start_state.last_reward = -(1. - accuracy)
    elif config['agent']['reward_type'] == 'MY_RCRA':
        start_state.last_reward = -(1. - accuracy)

    #########
    # Prune #
    #########
    for e in range(N_EPISODES):

        state = deepcopy(start_state)
        total_reward = .0
        ALPHA = alphas[e]
        agent = Agent(config, ACTIONS, model, valid_loader, criterion)

        for i in range(MAX_EPISODE_PER_STEPS):
            action = agent.choose_action(q_table, state)

            next_state, reward, done = agent.act(state, action)
            total_reward += reward

            if config['mdp']['Q_COMPUTATION'] == 'QL_M':
                # Q-Learning from Ghallab, Nau and Traverso
                q_value(q_table, state)[action] = q_value(q_table, state, action) + \
                    ALPHA * (reward + np.max(q_value(q_table, next_state)) - q_value(q_table, state, action))

            elif config['mdp']['Q_COMPUTATION'] == 'QL_WIKI':
                # Q-Learning from from Wikipedia
                q_value(q_table, state)[action] = (1. - ALPHA) * q_value(q_table, state, action) + \
                    ALPHA * (reward + GAMMA * np.max(q_value(q_table, next_state)))

            del state
            state = next_state
            if done:
                break

        logging.info("Episode {}: reward type {}: total reward -> {}".format(
            e + 1, config['agent']['reward_type'], total_reward))

    #####################
    # Save the solution #
    #####################
    q_table_saver(q_table, config['sub_working_dir'], '/q_table.tsv')

    agent = Agent(config, ACTIONS, model, valid_loader, criterion)
    my_state = start_state
    result = []
    done = False
    while not done:
        sa = q_value(q_table, my_state)
        my_action = np.argmax(sa)
        action = my_state.environment[my_action]
        my_state, reward, done = agent.act(my_state, my_action)
        result.append([action, reward])

    final = pd.DataFrame(result, columns=['Action', 'Reward'])
    final.to_csv(config['sub_working_dir'] + '/actions_to_prune.tsv',
                 sep='\t',
                 index=False)
Example #4
0
File: run.py Project: yyht/RLs
def agent_run(*args):
    Agent(*args)()
Example #5
0
def run():
    if sys.platform.startswith('win'):
        import win32api
        import win32con
        import _thread

        def _win_handler(event, hook_sigint=_thread.interrupt_main):
            if event == 0:
                hook_sigint()
                return 1
            return 0

        # Add the _win_handler function to the windows console's handler function list
        win32api.SetConsoleCtrlHandler(_win_handler, 1)

    options = docopt(__doc__)
    print(options)

    default_config = load_yaml(f'config.yaml')
    # gym > unity > unity_env
    env_args, model_args, train_args = {}, {}, {}
    unity_args, gym_args, buffer_args = default_config[
        'unity'], default_config['gym'], default_config['buffer']

    model_args['algo'] = str(options['--algorithm'])
    model_args['algo_config'] = None if options[
        '--config-file'] == 'None' else str(options['--config-file'])
    model_args['seed'] = int(options['--seed'])
    model_args['load'] = None if options['--load'] == 'None' else str(
        options['--load'])
    model_args['logger2file'] = default_config['logger2file']

    train_args['index'] = 0
    train_args['all_learner_print'] = default_config['all_learner_print']
    train_args['name'] = NAME if options['--name'] == 'None' else str(
        options['--name'])
    train_args['max_step'] = default_config['max_step'] if options[
        '--max-step'] == 'None' else int(options['--max-step'])
    train_args['max_episode'] = default_config['max_episode'] if options[
        '--max-episode'] == 'None' else int(options['--max-episode'])
    train_args['save_frequency'] = default_config['save_frequency'] if options[
        '--save-frequency'] == 'None' else int(options['--save-frequency'])
    train_args['inference'] = bool(options['--inference'])
    train_args['fill_in'] = bool(options['--fill-in'])
    train_args['no_op_choose'] = bool(options['--noop-choose'])
    train_args['info'] = default_config['info'] if options[
        '--info'] == 'None' else str(options['--info'])

    if options['--gym']:
        env_args['type'] = 'gym'
        env_args['env_name'] = str(options['--gym-env'])
        env_args['env_num'] = int(options['--gym-agents'])
        env_args['env_seed'] = int(options['--gym-env-seed'])
        env_args['render_mode'] = gym_args['render_mode']
        env_args['action_skip'] = gym_args['action_skip']
        env_args['skip'] = gym_args['skip']
        env_args['obs_stack'] = gym_args['obs_stack']
        env_args['stack'] = gym_args['stack']
        env_args['obs_grayscale'] = gym_args['obs_grayscale']
        env_args['obs_resize'] = gym_args['obs_resize']
        env_args['resize'] = gym_args['resize']
        env_args['obs_scale'] = gym_args['obs_scale']

        train_args['render_episode'] = gym_args['render_episode'] if options[
            '--render-episode'] == 'None' else int(options['--render-episode'])
        train_args['no_op_steps'] = gym_args['random_steps']
        train_args['render'] = gym_args['render']
        train_args['eval_while_train'] = gym_args['eval_while_train']
        train_args['max_eval_episode'] = gym_args['max_eval_episode']
    else:
        env_args['type'] = 'unity'
        if options['--unity']:
            env_args['file_path'] = None
            env_args['env_name'] = 'unity'
        else:
            env_args['file_path'] = unity_args['exe_file'] if options[
                '--env'] == 'None' else str(options['--env'])
            if os.path.exists(env_args['file_path']):
                env_args['env_name'] = os.path.join(
                    *os.path.split(env_args['file_path'])[0].replace(
                        '\\', '/').replace(r'//', r'/').split('/')[-2:])
            else:
                raise Exception('can not find this file.')
        if bool(options['--inference']):
            env_args['train_mode'] = False
        else:
            env_args['train_mode'] = True

        env_args['port'] = int(options['--port'])
        env_args['render'] = bool(options['--graphic'])
        env_args['sampler_path'] = None if options[
            '--sampler'] == 'None' else str(options['--sampler'])
        env_args['reset_config'] = unity_args['reset_config']

        train_args['no_op_steps'] = unity_args['no_op_steps']

    train_args['base_dir'] = os.path.join(
        BASE_DIR if options['--store-dir'] == 'None' else str(
            options['--store-dir']), env_args['env_name'], model_args['algo'])

    if bool(options['--inference']):
        Agent(env_args, model_args, buffer_args, train_args).evaluate()

    trails = int(options['--modes'])
    if trails == 1:
        agent_run(env_args, model_args, buffer_args, train_args)
    elif trails > 1:
        processes = []
        for i in range(trails):
            _env_args = deepcopy(env_args)
            _model_args = deepcopy(model_args)
            _model_args['seed'] += i * 10
            _buffer_args = deepcopy(buffer_args)
            _train_args = deepcopy(train_args)
            _train_args['index'] = i
            if _env_args['type'] == 'unity':
                _env_args['port'] = env_args['port'] + i
            p = Process(target=agent_run,
                        args=(_env_args, _model_args, _buffer_args,
                              _train_args))
            p.start()
            time.sleep(10)
            processes.append(p)
        [p.join() for p in processes]
    else:
        raise Exception('trials must be greater than 0.')