def run_experiment(args):
    # def env_fn():
    #     import flexibility  # register flexibility to gym env registry
    #     return gym.make(args.env_name)

    eg = ExperimentGrid(name=args.exp_name)
    eg.add('seed', [10*i for i in range(args.num_runs)] if args.seed is None else args.seed)
    eg.add('epochs', args.epochs)
    eg.add('steps_per_epoch', args.steps_per_epoch)
    eg.add('save_freq', args.save_freq)
    eg.add('max_ep_len', 200)
    eg.add('ac_kwargs:activation', eval(args.act), '')
    eg.add('custom_h', args.custom_h)
    eg.add('do_checkpoint_eval', args.do_checkpoint_eval)
    eg.add('eval_episodes', args.eval_episodes)
    eg.add('train_v_iters', args.train_v_iters)
    eg.add('eval_temp', args.eval_temp)
    eg.add('train_starting_temp', args.train_starting_temp)
    eg.add('gamma', args.gamma)
    eg.add('env_version', args.env_version)
    eg.add('env_name', args.env_name)
    eg.add('env_subtract_full_flex', args.env_subtract_full_flex)
    eg.add('meta_learning', args.meta_learning)
    eg.add('finetune', args.finetune)
    eg.add('finetune_model_path', args.finetune_model_path)
    eg.add('lam', args.lam)
    eg.add('early_stop_epochs', args.early_stop_epochs)
    eg.add('save_all_eval', args.save_all_eval)
    if args.episodes_per_epoch is not None:
        eg.add('episodes_per_epoch', args.episodes_per_epoch)

    if args.env_version >= 3:
        # args.file_path = "/home/user/git/spinningup/spinup/FlexibilityEnv/input_m8n12_cv0.8.pkl"
        prefix = os.getcwd().split('RL_flex_design')[0]
        args.file_path = prefix + "RL_flex_design/spinup/FlexibilityEnv_input/{}".format(args.env_input)

        m, n, mean_c, mean_d, sd_d, profit_mat, target_arcs, fixed_costs, flex_0 = load_FlexibilityEnv_input(args.file_path)

        eg.add('env_input', args.file_path)
        eg.add('env_n_sample', args.env_n_sample)

        if args.target_arcs is None:
            eg.add('target_arcs', target_arcs)
        else:  # target_arcs is explicitly specified by the scripts, which overrides the target_arc from the input file
            eg.add('target_arcs', args.target_arcs)

    if args.algo == "ppo":
        eg.add('train_pi_iters', args.train_pi_iters)
        eg.run(ppo)
    elif args.algo == "vpg":
        eg.run(vpg)
Beispiel #2
0
def get_custom_env_fn(env_name,
                      env_version=None,
                      target_arcs=None,
                      env_input=None,
                      env_n_sample=None,
                      subtract_full_flexibility_performance=False,
                      meta_leraning=False):
    if env_version in (1, 2):
        # parse FlexibilityEnv settings from env_name
        n_plant, n_product, target_arcs, n_sample = _parse_attributes(env_name)

        class CustomFlexibilityEnv(FlexibilityEnv):
            def __init__(self):
                super().__init__(n_plant=n_plant,
                                 n_product=n_product,
                                 target_arcs=target_arcs,
                                 n_sample=n_sample,
                                 env_version=env_version)
                print(
                    'using custom env: {} | n_plant: {} | n_product: {} | target_arcs: {} | n_sample: {} | env_version: {}'
                    .format(env_name, n_plant, n_product, target_arcs,
                            n_sample, env_version))

    else:  # env_version in (3, 4, >40, 5):
        # load FlexibilityEnv settings from env_input
        n_plant, n_product, mean_c, mean_d, sd_d, profit_mat, _, fixed_costs, flex_0 = load_FlexibilityEnv_input(
            env_input)

        def to_numpy_array(obj):
            if isinstance(obj, list):
                obj_array = np.asarray(obj, dtype=np.float32)
                return obj_array
            else:
                return obj

        mean_c = to_numpy_array(mean_c)
        mean_d = to_numpy_array(mean_d)
        sd_d = to_numpy_array(sd_d)

        class CustomFlexibilityEnv(FlexibilityEnv):
            def __init__(self):
                super().__init__(
                    n_plant=n_plant,
                    n_product=n_product,
                    target_arcs=target_arcs,
                    # for env_version=3, target_arcs is passed into the function call
                    n_sample=env_n_sample,
                    capacity_mean=mean_c,
                    env_version=env_version,
                    demand_mean=mean_d,
                    demand_std=sd_d,
                    profit_matrix=profit_mat,
                    fixed_costs=fixed_costs,
                    starting_structure=flex_0,
                    subtract_full_flexibility_performance=
                    subtract_full_flexibility_performance,
                    meta_learning=meta_leraning)
                print(
                    'using env: {} | n_plant: {} | n_product: {} | target_arcs: {} | n_sample: {} | env_version: {} '
                    '| subtract_full_flex: {} | meta_leraning: {}'.format(
                        env_name, n_plant, n_product, target_arcs,
                        env_n_sample, env_version,
                        subtract_full_flexibility_performance, meta_leraning))

    return CustomFlexibilityEnv
def generate_scripts_for_multiple_target_arcs(experiment,
                                              env_input,
                                              env_version_list,
                                              epoch_episodes,
                                              num_tars_per_script,
                                              num_batches,
                                              num_runs,
                                              gamma=None,
                                              lam=None,
                                              variance_reduction=False,
                                              env_n_sample=50,
                                              custom_h=None,
                                              cpu=2,
                                              tar_list=None,
                                              early_stop=None,
                                              epoch=800,
                                              save_freq=10,
                                              save_all_eval=None,
                                              included_tars=None):
    m, n, mean_c, mean_d, sd_d, profit_mat, target_arcs, fixed_costs, flex_0 = load_FlexibilityEnv_input(
        _get_full_path(env_input))
    print("number of existing arcs {}".format(flex_0.sum()))

    if included_tars is not None:
        # filter out tars that is not in included_tars
        target_arcs = [tar for tar in target_arcs if tar in included_tars]

    if tar_list is None or (tar_list is not None and len(tar_list) == 0):
        tar_list = get_tars_list(num_tars_per_script, target_arcs)

    print("target arcs to be run: {}".format(tar_list))

    for batch in range(num_batches):
        starting_seed = 100 * batch
        for env_version in env_version_list:
            # create entrypoint script
            # !/bin/bash
            path = 'run_{}_ENV{}_batch{}_entrypoint.sh'.format(
                experiment, env_version, batch)
            python_string = 'for((i=0;i < {};i++)); do bash run_{}_ENV{}_batch{}_'.format(len(tar_list),
                                                                                          experiment,
                                                                                          env_version,
                                                                                          batch) \
                            + '$' + '{' + 'i' + '}' + '.sh & done'
            with open(path, 'w') as f:
                f.write('#!/bin/bash\n\n')
                f.write(python_string)
            make_executable(path)

            print(python_string)

            # create scripts to be called in parallel
            for idx, target_arcs in enumerate(tar_list):

                assert len(target_arcs) >= 1
                if len(target_arcs) == 1:
                    # add 'tar' to exp_name explicitely
                    exp_name = 'F{}_CH{}_ENV{}_tar{}'.format(
                        experiment,
                        '1024-128' if custom_h is None else custom_h,
                        env_version, target_arcs[0])
                else:
                    exp_name = 'F{}_CH{}_ENV{}'.format(
                        experiment,
                        '1024-128' if custom_h is None else custom_h,
                        env_version)

                python_string = "python -m spinup.run_flexibility   \\\n   \
                                --algo ppo    \\\n   \
                                --env_name F{}-v{}   \\\n   \
                                --exp_name {}    \\\n   \
                                --cpu {}   \\\n   \
                                --epochs {}    \\\n   \
                                --custom_h 1024-128   \\\n   \
                                --env_version {}   \\\n   \
                                --env_input {}   \\\n   \
                                --target_arcs  {}   \\\n   \
                                --seed {}   \\\n   \
                                --save_freq {}    \\\n   \
                                --steps_per_epoch {}   \\\n   \
                                --do_checkpoint_eval  \\\n".format(
                    experiment, env_version, exp_name, cpu, epoch, env_version,
                    env_input, _get_target_arcs_string(target_arcs),
                    _get_seed_str(starting_seed, num_runs), save_freq,
                    int(
                        np.ceil(
                            (int(statistics.mean(target_arcs)) - flex_0.sum())
                            * epoch_episodes)))

                if variance_reduction:
                    python_string += '                                   --env_subtract_full_flex  \\\n'

                if env_n_sample != 50:
                    python_string += '                                   --env_n_sample {}  \\\n'.format(
                        env_n_sample)

                if custom_h is not None:
                    python_string += '                                   --custom_h {}   \\\n'.format(
                        custom_h)

                if early_stop is not None:
                    python_string += '                                   --early_stop {}  \\\n'.format(
                        early_stop)

                if save_all_eval is not None:
                    python_string += '                                   --save_all_eval  \\\n'

                if gamma is not None:
                    python_string += '                                   --gamma {}   \\\n'.format(
                        gamma)

                if lam is None:
                    python_string += '                                   ;'
                else:
                    python_string += '                                   --lam {};'.format(
                        lam)

                path = 'run_{}_ENV{}_batch{}_{}.sh'.format(
                    experiment, env_version, batch, idx)
                with open(path, 'w') as f:
                    f.write('#!/bin/bash\n\n')
                    f.write(python_string)

                make_executable(path)

                print(python_string)
Beispiel #4
0
    else:
        exp = experiment
    return exp


from spinup.FlexibilityEnv_input.FlexibilityEnv_input_files import INPUTS

if __name__ == "__main__":
    experiment = '10x10a-lspe'
    envs = ['ENV5']
    print()
    input_path = get_input_path(INPUTS[get_input_key(experiment)])

    exclude = ['abcdef']

    m, n, mean_c, mean_d, sd_d, profit_mat, target_arcs, fixed_costs, flex_0 = load_FlexibilityEnv_input(input_path)

    perf_dicts = []
    files_dicts = []
    for env in envs:
        print("==== processing files for {}".format(env))
        files = collect_best_structures(experiment, env, exclude)

        dict_tar_perf = {}
        dict_tar_file = {}

        files = sorted(files)

        for file in files:
            tar = file.split('T')[1].split('_SP')[0]
            with open(file, 'rb') as f:
def generate_scripts_for_one_target_arcs(experiment,
                                         env_input,
                                         env_version_list,
                                         epoch_episodes,
                                         target_arcs,
                                         num_runs,
                                         starting_seed,
                                         gamma=None,
                                         lam=None,
                                         variance_reduction=False,
                                         env_n_sample=50,
                                         early_stop=None,
                                         cpu=8,
                                         epoch=800,
                                         save_freq=10,
                                         save_all_eval=None,
                                         custom_h=None,
                                         meta_learning=False,
                                         finetune=False,
                                         finetune_path=None,
                                         finetune_meta_trained_epoch=None):
    m, n, mean_c, mean_d, sd_d, profit_mat, _, fixed_costs, flex_0 = load_FlexibilityEnv_input(
        _get_full_path(env_input))
    print("number of existing arcs {}".format(flex_0.sum()))

    target_arcs_in_names = 0 if meta_learning else target_arcs

    for env_version in env_version_list:
        # create entrypoint script
        # !/bin/bash
        path = 'run_{}_ENV{}_tar{}_entrypoint.sh'.format(
            experiment, env_version, target_arcs_in_names)
        python_string = 'for((i=0;i < {};i++)); do bash run_{}_ENV{}_tar{}_'.format(num_runs,
                                                                                    experiment,
                                                                                    env_version,
                                                                                    target_arcs_in_names) \
                        + '$' + '{' + 'i' + '}' + '.sh & done'
        with open(path, 'w') as f:
            f.write('#!/bin/bash\n\n')
            f.write(python_string)
        make_executable(path)

        print(python_string)

        # create scripts to be called in parallel
        for idx in range(num_runs):

            target_arcs = target_arcs if meta_learning is False else (
                13 if idx % 2 == 1 else 22)

            python_string = "python -m spinup.run_flexibility  \\\n \
                            --algo ppo  \\\n \
                            --env_name F{}-v{}  \\\n \
                            --exp_name F{}_CH{}_ENV{}_tar{}  \\\n \
                            --cpu {} \\\n \
                            --epochs {}  \\\n \
                            --custom_h 1024-128  \\\n \
                            --env_version {}  \\\n \
                            --env_input {}  \\\n \
                            --target_arcs  {}  \\\n \
                            --seed {}  \\\n \
                            --save_freq {}   \\\n \
                            --steps_per_epoch {}  \\\n \
                            --do_checkpoint_eval \\\n".format(
                experiment, env_version, experiment,
                '1024-128' if custom_h is None else custom_h, env_version,
                target_arcs_in_names, cpu, epoch, env_version, env_input,
                target_arcs if meta_learning is False else
                (13 if idx % 2 == 1 else 22), starting_seed + 10 * idx,
                save_freq,
                int(np.ceil(
                    (target_arcs - flex_0.sum()) *
                    epoch_episodes)) if meta_learning is False else 20 *
                epoch_episodes)

            if variance_reduction:
                python_string += '                             --env_subtract_full_flex  \\\n'

            if env_n_sample != 50:
                python_string += '                             --env_n_sample {}  \\\n'.format(
                    env_n_sample)

            if early_stop is not None:
                python_string += '                             --early_stop {}  \\\n'.format(
                    early_stop)

            if save_all_eval is not None:
                python_string += '                             --save_all_eval  \\\n'

            if gamma is not None:
                python_string += '                             --gamma {}  \\\n'.format(
                    gamma)

            if custom_h is not None:
                python_string += '                             --custom_h {}  \\\n'.format(
                    custom_h)

            if meta_learning:
                python_string += '                             --meta_learning  \\\n'

            if finetune:
                python_string += '                             --finetune  \\\n'
                if finetune_meta_trained_epoch is None:
                    python_string += '                             --finetune_model_path {}  \\\n'.format(
                        '{}_s{}'.format(finetune_path,
                                        starting_seed + 10 * idx))
                else:
                    python_string += '                             --finetune_model_path {}  \\\n'.format(
                        '{}_s{}/simple_save{}'.format(
                            finetune_path, starting_seed + 10 * idx,
                            finetune_meta_trained_epoch))

            if lam is None:
                python_string += '                             ;'
            else:
                python_string += '                             --lam {};'.format(
                    lam)

            path = 'run_{}_ENV{}_tar{}_{}.sh'.format(experiment, env_version,
                                                     target_arcs_in_names, idx)
            with open(path, 'w') as f:
                f.write('#!/bin/bash\n\n')
                f.write(python_string)

            make_executable(path)

            print(python_string)