Python GetState.minmax 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: Environments.state_definition

클래스/타입: GetState

메소드/함수: minmax

hotexamples.com에서의 예제들: 4

Python GetState.minmax - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 Environments.state_definition.GetState.minmax에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

GetState(7)

minmax(4)

determine_delta_target(1)

get_state(1)

예제 #1

파일 보기

파일: pretrain_models.py 프로젝트: CalCharles/contingency-options

def get_states(args,
               true_environment,
               length_constraint=50000,
               raws=None,
               dumps=None):
    dataset_path = args.record_rollouts
    changepoint_path = args.changepoint_dir
    option_chain = OptionChain(true_environment, args.changepoint_dir,
                               args.train_edge, args)
    environments = option_chain.initialize(args)
    print(environments)
    proxy_environment = environments.pop(-1)
    head, tail = get_edge(args.train_edge)
    if len(
            environments
    ) > 1:  # there is a difference in the properties of a proxy environment and the true environment
        num_actions = len(environments[-1].reward_fns)
    else:
        num_actions = environments[-1].num_actions
    state_class = GetState(head,
                           state_forms=list(
                               zip(args.state_names, args.state_forms)))
    use_raw = 'raw' in args.state_forms
    state_class.minmax = compute_minmax(state_class, dataset_path)
    states, resps, raws, dumps = load_states(
        state_class.get_state,
        dataset_path,
        length_constraint=length_constraint,
        use_raw=use_raw,
        raws=raws,
        dumps=dumps)
    return states, resps, num_actions, state_class, environments, raws, dumps

예제 #2

파일 보기

파일: add_edge.py 프로젝트: CalCharles/contingency-options

        train_models = proxy_environment.models
    else:
        train_models = MultiOption(len(reward_paths), models[args.model_form])
    proxy_chain = environments
    if len(
            environments
    ) > 1:  # there is a difference in the properties of a proxy environment and the true environment
        num_actions = len(environments[-1].reward_fns)
    else:
        num_actions = environments[-1].num_actions
    print(args.state_names, args.state_forms)
    state_class = GetState(head,
                           state_forms=list(
                               zip(args.state_names, args.state_forms)))
    state_class.minmax = compute_minmax(state_class,
                                        dataset_path,
                                        filename=args.focus_dumps_name)
    if args.normalize:
        minv = []
        maxv = []
        for f in args.state_forms:
            if f == 'prox':
                minv += [-84, -84]
                maxv += [84, 84]
            elif f == 'bounds':
                minv += [0, 0]
                maxv += [84, 84]
        state_class.minmax = np.stack((np.array(minv), np.array(maxv)))
        print(state_class.minmax)

    behavior_policy = behavior_policies[args.behavior_policy]()

예제 #3

파일 보기

파일: dopamine_paddle.py 프로젝트: CalCharles/contingency-options

    train_models = MultiOption(len(reward_classes), models[args.model_form])
    environments = option_chain.initialize(args)
    proxy_environment = environments.pop(-1)
    proxy_chain = environments
    if len(
            environments
    ) > 1:  # there is a difference in the properties of a proxy environment and the true environment
        num_actions = len(environments[-1].reward_fns)
    else:
        num_actions = environments[-1].num_actions
    print(args.state_names, args.state_forms)
    state_class = GetState(head,
                           state_forms=list(
                               zip(args.state_names, args.state_forms)))
    # softcomputed minmax (buggy)
    state_class.minmax = compute_minmax(state_class, dataset_path)
    # HARDCODED MINMAX AT 84,84!!!!
    minv = []
    maxv = []
    for f in args.state_forms:
        if f == 'prox':
            minv += [-84, -84]
            maxv += [84, 84]
        elif f == 'bounds':
            minv += [0, 0]
            maxv += [84, 84]
    state_class.minmax = np.stack((np.array(minv), np.array(maxv)))
    print("state class minmax", state_class.minmax)

    for reward_class in reward_classes:
        reward_class.traj_dim = state_class.shape

예제 #4

파일 보기

    dataset_path = args.record_rollouts
    changepoint_path = args.changepoint_dir
    option_chain = OptionChain(true_environment, args.changepoint_dir, args.train_edge, args)
    reward_paths = glob.glob(os.path.join(option_chain.save_dir, "*rwd.pkl"))
    print(reward_paths)
    reward_paths.sort(key=lambda x: int(x.split("__")[2]))

    head, tail = get_edge(args.train_edge)

    reward_classes = [load_from_pickle(pth) for pth in reward_paths]
    # train_models = MultiOption(1, BasicModel)
    train_models = MultiOption(len(reward_paths), models[args.model_form])
    # learning_algorithm = DQN_optimizer()
    learning_algorithm = learning_algorithms[args.optimizer_form]()
    # learning_algorithm = DDPG_optimizer()
    environments = option_chain.initialize(args)
    print(environments)
    proxy_environment = environments.pop(-1)
    proxy_chain = environments
    if len(environments) > 1: # there is a difference in the properties of a proxy environment and the true environment
        num_actions = len(environments[-1].reward_fns)
    else:
        num_actions = environments[-1].num_actions
    print(args.state_names, args.state_forms)
    state_class = GetState(head, state_forms=list(zip(args.state_names, args.state_forms)))
    state_class.minmax = compute_minmax(state_class, dataset_path)
    behavior_policy = behavior_policies[args.behavior_policy]()
    # behavior_policy = EpsilonGreedyProbs()
    trainRL(args, option_chain.save_dir, true_environment, train_models, learning_algorithm, proxy_environment,
            proxy_chain, reward_classes, state_class, behavior_policy=behavior_policy)