Exemple #1
0
    def save_replay_buffer(self, dir_path=None):
        if dir_path is None:
            dir_path = os.path.join(self.parent_level_manager.parent_graph_manager.task_parameters.experiment_path,
                                    'replay_buffer')
        if not os.path.exists(dir_path):
            os.mkdir(dir_path)

        path = os.path.join(dir_path, 'RB_{}.joblib.bz2'.format(type(self).__name__))
        joblib.dump(self.memory.get_all_complete_episodes(), path, compress=('bz2', 1))

        screen.log('Saved replay buffer to: \"{}\" - Number of transitions: {}'.format(path,
                                                                                       self.memory.num_transitions()))
 def _sanitize_device_list(devices: List[mx.Context]) -> List[mx.Context]:
     """
     Returns intersection of devices with available devices. If no intersection, returns mx.cpu()
     :param devices: list of requested devices
     :return: list of devices that are actually available
     """
     actual_device = [mx.cpu()
                      ] + [mx.gpu(i) for i in mx.test_utils.list_gpus()]
     intersection = [dev for dev in devices if dev in actual_device]
     if len(intersection) == 0:
         intersection = [mx.cpu()]
         screen.log(
             'Requested devices {} not available. Default to CPU context.'.
             format(devices))
     elif len(intersection) < len(devices):
         screen.log('{} not available, using {}.'.format(
             [dev for dev in devices if dev not in intersection],
             intersection))
     return intersection
Exemple #3
0
 def init_environment_dependent_modules(self):
     super().init_environment_dependent_modules()
     self.env = self.parent_level_manager._real_environment
     screen.log_title("Human Control Mode")
     available_keys = self.env.get_available_keys()
     if available_keys:
         screen.log("Use keyboard keys to move. Press escape to quit. Available keys:")
         screen.log("")
         for action, key in self.env.get_available_keys():
             screen.log("\t- {}: {}".format(action, key))
         screen.separator()
Exemple #4
0
def perform_reward_based_tests(args, preset_validation_params, preset_name):
    win_size = 10

    test_name = '__test_reward'
    test_path = os.path.join('./experiments', test_name)
    if path.exists(test_path):
        shutil.rmtree(test_path)

    # run the experiment in a separate thread
    screen.log_title("Running test {}".format(preset_name))
    log_file_name = 'test_log_{preset_name}.txt'.format(
        preset_name=preset_name)
    cmd = ('python3 rl_coach/coach.py '
           '-p {preset_name} '
           '-e {test_name} '
           '-n {num_workers} '
           '--seed 0 '
           '-c '
           '{level} '
           '&> {log_file_name} ').format(
               preset_name=preset_name,
               test_name=test_name,
               num_workers=preset_validation_params.num_workers,
               log_file_name=log_file_name,
               level='-lvl ' + preset_validation_params.reward_test_level
               if preset_validation_params.reward_test_level else '')

    p = subprocess.Popen(cmd,
                         shell=True,
                         executable="/bin/bash",
                         preexec_fn=os.setsid)

    start_time = time.time()

    reward_str = 'Evaluation Reward'
    if preset_validation_params.num_workers > 1:
        filename_pattern = 'worker_0*.csv'
    else:
        filename_pattern = '*.csv'

    test_passed = False

    # get the csv with the results
    csv_paths = read_csv_paths(test_path, filename_pattern)

    if csv_paths:
        csv_path = csv_paths[0]

        # verify results
        csv = None
        time.sleep(1)
        averaged_rewards = [0]

        last_num_episodes = 0

        if not args.no_progress_bar:
            print_progress(averaged_rewards, last_num_episodes,
                           preset_validation_params, start_time, args)

        while csv is None or (
                csv['Episode #'].values[-1] <
                preset_validation_params.max_episodes_to_achieve_reward
                and time.time() - start_time < args.time_limit):
            try:
                csv = pd.read_csv(csv_path)
            except:
                # sometimes the csv is being written at the same time we are
                # trying to read it. no problem -> try again
                continue

            if reward_str not in csv.keys():
                continue

            rewards = csv[reward_str].values
            rewards = rewards[~np.isnan(rewards)]

            if len(rewards) >= 1:
                averaged_rewards = np.convolve(
                    rewards,
                    np.ones(min(len(rewards), win_size)) / win_size,
                    mode='valid')
            else:
                time.sleep(1)
                continue

            if not args.no_progress_bar:
                print_progress(averaged_rewards, last_num_episodes,
                               preset_validation_params, start_time, args)

            if csv['Episode #'].shape[0] - last_num_episodes <= 0:
                continue

            last_num_episodes = csv['Episode #'].values[-1]

            # check if reward is enough
            if np.any(averaged_rewards >=
                      preset_validation_params.min_reward_threshold):
                test_passed = True
                break
            time.sleep(1)

    # kill test and print result
    os.killpg(os.getpgid(p.pid), signal.SIGTERM)
    screen.log('')
    if test_passed:
        screen.success("Passed successfully")
    else:
        if time.time() - start_time > args.time_limit:
            screen.error("Failed due to exceeding time limit", crash=False)
            if args.verbose:
                screen.error("command exitcode: {}".format(p.returncode),
                             crash=False)
                screen.error(open(log_file_name).read(), crash=False)
        elif csv_paths:
            screen.error("Failed due to insufficient reward", crash=False)
            if args.verbose:
                screen.error("command exitcode: {}".format(p.returncode),
                             crash=False)
                screen.error(open(log_file_name).read(), crash=False)
            screen.error(
                "preset_validation_params.max_episodes_to_achieve_reward: {}".
                format(
                    preset_validation_params.max_episodes_to_achieve_reward),
                crash=False)
            screen.error(
                "preset_validation_params.min_reward_threshold: {}".format(
                    preset_validation_params.min_reward_threshold),
                crash=False)
            screen.error("averaged_rewards: {}".format(averaged_rewards),
                         crash=False)
            screen.error("episode number: {}".format(
                csv['Episode #'].values[-1]),
                         crash=False)
        else:
            screen.error("csv file never found", crash=False)
            if args.verbose:
                screen.error("command exitcode: {}".format(p.returncode),
                             crash=False)
                screen.error(open(log_file_name).read(), crash=False)

    shutil.rmtree(test_path)
    os.remove(log_file_name)
    return test_passed
Exemple #5
0
def perform_trace_based_tests(args, preset_name, num_env_steps, level=None):
    test_name = '__test_trace'
    test_path = os.path.join('./experiments', test_name)
    if path.exists(test_path):
        shutil.rmtree(test_path)

    # run the experiment in a separate thread
    screen.log_title("Running test {}{}".format(preset_name, ' - ' +
                                                level if level else ''))
    log_file_name = 'test_log_{preset_name}.txt'.format(
        preset_name=preset_name)

    cmd = ('python3 rl_coach/coach.py '
           '-p {preset_name} '
           '-e {test_name} '
           '--seed 42 '
           '-c '
           '--no_summary '
           '-cp {custom_param} '
           '{level} '
           '&> {log_file_name} ').format(
               preset_name=preset_name,
               test_name=test_name,
               log_file_name=log_file_name,
               level='-lvl ' + level if level else '',
               custom_param='\"improve_steps=EnvironmentSteps({n});'
               'steps_between_evaluation_periods=EnvironmentSteps({n});'
               'evaluation_steps=EnvironmentSteps(1);'
               'heatup_steps=EnvironmentSteps(1024)\"'.format(n=num_env_steps))

    p = subprocess.Popen(cmd,
                         shell=True,
                         executable="/bin/bash",
                         preexec_fn=os.setsid)
    p.wait()

    filename_pattern = '*.csv'

    # get the csv with the results
    csv_paths = read_csv_paths(test_path, filename_pattern)

    test_passed = False
    if not csv_paths:
        screen.error("csv file never found", crash=False)
        if args.verbose:
            screen.error("command exitcode: {}".format(p.returncode),
                         crash=False)
            screen.error(open(log_file_name).read(), crash=False)
    else:
        trace_path = os.path.join(
            './rl_coach', 'traces', preset_name + '_' +
            level.replace(':', '_') if level else preset_name, '')
        if not os.path.exists(trace_path):
            screen.log(
                'No trace found, creating new trace in: {}'.format(trace_path))
            os.makedirs(os.path.dirname(trace_path))
            df = pd.read_csv(csv_paths[0])
            df = clean_df(df)
            df.to_csv(os.path.join(trace_path, 'trace.csv'), index=False)
            screen.success("Successfully created new trace.")
            test_passed = True
        else:
            test_df = pd.read_csv(csv_paths[0])
            test_df = clean_df(test_df)
            new_trace_csv_path = os.path.join(trace_path, 'trace_new.csv')
            test_df.to_csv(new_trace_csv_path, index=False)
            test_df = pd.read_csv(new_trace_csv_path)
            trace_csv_path = glob.glob(path.join(trace_path, 'trace.csv'))
            trace_csv_path = trace_csv_path[0]
            trace_df = pd.read_csv(trace_csv_path)
            test_passed = test_df.equals(trace_df)
            if test_passed:
                screen.success("Passed successfully.")
                os.remove(new_trace_csv_path)
                test_passed = True
            else:
                screen.error("Trace test failed.", crash=False)
                if args.overwrite:
                    os.remove(trace_csv_path)
                    os.rename(new_trace_csv_path, trace_csv_path)
                    screen.error("Overwriting old trace.", crash=False)
                else:
                    screen.error("bcompare {} {}".format(
                        trace_csv_path, new_trace_csv_path),
                                 crash=False)

    shutil.rmtree(test_path)
    os.remove(log_file_name)
    return test_passed
Exemple #6
0
def test_preset_reward(preset_name,
                       no_progress_bar=True,
                       time_limit=60 * 60,
                       verbose=True):
    preset_validation_params = validation_params(preset_name)

    win_size = 10

    test_name = '__test_reward_{}'.format(preset_name)
    test_path = os.path.join('./experiments', test_name)
    if path.exists(test_path):
        shutil.rmtree(test_path)

    # run the experiment in a separate thread
    screen.log_title("Running test {}".format(preset_name))
    log_file_name = 'test_log_{preset_name}.txt'.format(
        preset_name=preset_name)
    cmd = [
        'python3', 'rl_coach/coach.py', '-p',
        '{preset_name}'.format(preset_name=preset_name), '-e',
        '{test_name}'.format(test_name=test_name), '-n',
        '{num_workers}'.format(
            num_workers=preset_validation_params.num_workers), '--seed', '0',
        '-c'
    ]
    if preset_validation_params.reward_test_level:
        cmd += [
            '-lvl',
            '{level}'.format(level=preset_validation_params.reward_test_level)
        ]

    stdout = open(log_file_name, 'w')

    p = subprocess.Popen(cmd, stdout=stdout, stderr=stdout)

    start_time = time.time()

    reward_str = 'Evaluation Reward'
    if preset_validation_params.num_workers > 1:
        filename_pattern = 'worker_0*.csv'
    else:
        filename_pattern = '*.csv'

    test_passed = False

    # get the csv with the results
    csv_paths = read_csv_paths(test_path, filename_pattern)

    if csv_paths:
        csv_path = csv_paths[0]

        # verify results
        csv = None
        time.sleep(1)
        averaged_rewards = [0]

        last_num_episodes = 0

        if not no_progress_bar:
            print_progress(averaged_rewards, last_num_episodes,
                           preset_validation_params, start_time, time_limit)

        while csv is None or (
                csv['Episode #'].values[-1] <
                preset_validation_params.max_episodes_to_achieve_reward
                and time.time() - start_time < time_limit):
            try:
                csv = pd.read_csv(csv_path)
            except:
                # sometimes the csv is being written at the same time we are
                # trying to read it. no problem -> try again
                continue

            if reward_str not in csv.keys():
                continue

            rewards = csv[reward_str].values
            rewards = rewards[~np.isnan(rewards)]

            if len(rewards) >= 1:
                averaged_rewards = np.convolve(
                    rewards,
                    np.ones(min(len(rewards), win_size)) / win_size,
                    mode='valid')
            else:
                time.sleep(1)
                continue

            if not no_progress_bar:
                print_progress(averaged_rewards, last_num_episodes,
                               preset_validation_params, start_time,
                               time_limit)

            if csv['Episode #'].shape[0] - last_num_episodes <= 0:
                continue

            last_num_episodes = csv['Episode #'].values[-1]

            # check if reward is enough
            if np.any(averaged_rewards >=
                      preset_validation_params.min_reward_threshold):
                test_passed = True
                break
            time.sleep(1)

    # kill test and print result
    # os.killpg(os.getpgid(p.pid), signal.SIGKILL)
    p.kill()
    screen.log('')
    if test_passed:
        screen.success("Passed successfully")
    else:
        if time.time() - start_time > time_limit:
            screen.error("Failed due to exceeding time limit", crash=False)
            if verbose:
                screen.error("command exitcode: {}".format(p.returncode),
                             crash=False)
                screen.error(open(log_file_name).read(), crash=False)
        elif csv_paths:
            screen.error("Failed due to insufficient reward", crash=False)
            if verbose:
                screen.error("command exitcode: {}".format(p.returncode),
                             crash=False)
                screen.error(open(log_file_name).read(), crash=False)
            screen.error(
                "preset_validation_params.max_episodes_to_achieve_reward: {}".
                format(
                    preset_validation_params.max_episodes_to_achieve_reward),
                crash=False)
            screen.error(
                "preset_validation_params.min_reward_threshold: {}".format(
                    preset_validation_params.min_reward_threshold),
                crash=False)
            screen.error("averaged_rewards: {}".format(averaged_rewards),
                         crash=False)
            screen.error("episode number: {}".format(
                csv['Episode #'].values[-1]),
                         crash=False)
        else:
            screen.error("csv file never found", crash=False)
            if verbose:
                screen.error("command exitcode: {}".format(p.returncode),
                             crash=False)
                screen.error(open(log_file_name).read(), crash=False)

    shutil.rmtree(test_path)
    os.remove(log_file_name)
    if not test_passed:
        raise ValueError('golden test failed')
Exemple #7
0
def wait_and_check(args, processes, force=False):
    if not force and len(processes) < args.max_threads:
        return None

    test_path = processes[0][0]
    test_name = test_path.split('/')[-1]
    log_file_name = processes[0][1]
    p = processes[0][2]
    p.wait()

    filename_pattern = '*.csv'

    # get the csv with the results
    csv_paths = read_csv_paths(test_path, filename_pattern)

    test_passed = False
    screen.log('Results for {}: '.format(test_name[13:]))
    if not csv_paths:
        screen.error("csv file never found", crash=False)
        if args.verbose:
            screen.error("command exitcode: {}".format(p.returncode), crash=False)
            screen.error(open(log_file_name).read(), crash=False)
    else:
        trace_path = os.path.join('./rl_coach', 'traces', test_name[13:])
        if not os.path.exists(trace_path):
            screen.log('No trace found, creating new trace in: {}'.format(trace_path))
            os.makedirs(trace_path)
            df = pd.read_csv(csv_paths[0])
            df = clean_df(df)
            try:
                df.to_csv(os.path.join(trace_path, 'trace.csv'), index=False)
            except:
                pass
            screen.success("Successfully created new trace.")
            test_passed = True
        else:
            test_df = pd.read_csv(csv_paths[0])
            test_df = clean_df(test_df)
            new_trace_csv_path = os.path.join(trace_path, 'trace_new.csv')
            test_df.to_csv(new_trace_csv_path, index=False)
            test_df = pd.read_csv(new_trace_csv_path)
            trace_csv_path = glob.glob(path.join(trace_path, 'trace.csv'))
            trace_csv_path = trace_csv_path[0]
            trace_df = pd.read_csv(trace_csv_path)
            test_passed = test_df.equals(trace_df)
            if test_passed:
                screen.success("Passed successfully.")
                os.remove(new_trace_csv_path)
                test_passed = True
            else:
                screen.error("Trace test failed.", crash=False)
                if args.overwrite:
                    os.remove(trace_csv_path)
                    os.rename(new_trace_csv_path, trace_csv_path)
                    screen.error("Overwriting old trace.", crash=False)
                else:
                    screen.error("bcompare {} {}".format(trace_csv_path, new_trace_csv_path), crash=False)

    shutil.rmtree(test_path)
    os.remove(log_file_name)
    processes.pop(0)
    return test_passed