コード例 #1
0
def register_history_bets(s3_bucket_name, filename, user_id, tz_string,
                          commit):
    """ Notice that it can register total bets both for open events and for events with results. In case
    of events with results, you can have existing closed bet events or new bet events will be created """
    # s3_bucket_name = 'zakanda-static-01'
    # filename = 'past_bets.xlsx'
    # user_id = 29
    # tz_string = 'Europe/Athens'
    user = get_user(user_id=user_id
                    )  # 10:c, 26:p, 23:t2, 27:p1, 25:bob, 28:liono, 29:yahdim
    try:
        tz = pytz.timezone(tz_string)
    except Exception as e:
        logger.error(e)
        tz = None
    if not s3_bucket_name or not filename or not user or not tz:
        logger.error('arguments error!')
        return

    s3_obj = utils.get_s3_obj(bucket=s3_bucket_name, filename=filename)
    # s3_obj = join(dirname(dirname(abspath(__file__))), 'fixtures', filename)  # read file from local disk

    work_book, data = utils.read_data(s3_obj)
    utils.remove_empty_rows(data)
    # datemode: Which date system was in force when this file was last saved.<br />
    #    0 => 1900 system (the Excel for Windows default).<br />
    #    1 => 1904 system (the Excel for Macintosh default).<br />
    datemode = work_book.datemode
    read_tbs = create_read_objects(user, data, datemode, tz=tz)
    if not commit:
        utils.check_validity(read_tbs)
    else:
        trees_created = utils.create_zakanda_bet_trees(read_tbs)
        if trees_created:
            user.profile.settle_total_bets(call_api=True)
コード例 #2
0
ファイル: evaluate.py プロジェクト: ngng0274/CGVAE-CON
    qedscorer, qed_score_per_molecule, diff, real, pred = utils.check_qed(
        dataset)
    with open('diff.csv', 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(diff)
        writer.writerow(real)
        writer.writerow(pred)
    #novelty=utils.novelty_metric(dataset)
    total, nonplanar = utils.check_planar(dataset)
    total, atom_counter, atom_per_molecule = utils.count_atoms(dataset)
    total, edge_type_counter, edge_type_per_molecule = utils.count_edge_type(
        dataset)
    total, shape_count, shape_count_per_molecule = utils.shape_count(dataset)
    total, tree_count = utils.check_cyclic(dataset)
    sascorer, sa_score_per_molecule = utils.check_sascorer(dataset)
    total, validity = utils.check_validity(dataset)

    print("------------------------------------------")
    print("Metrics")
    print("------------------------------------------")
    print("total molecule")
    print(total)
    print("------------------------------------------")
    print("percentage of nonplanar:")
    print(nonplanar / total)
    print("------------------------------------------")
    print("avg atom:")
    for atom_type, c in atom_counter.items():
        print(dataset_info(dataset)['atom_types'][atom_type])
        print(c / total)
    print("standard deviation")
コード例 #3
0
def design_agent_and_env(FLAGS):
    """
    1. DESIGN AGENT

    The key hyperparameters for agent construction are

        a. Number of levels in agent hierarchy
        b. Max sequence length in which each policy will specialize
        c. Max number of atomic actions allowed in an episode
        d. Environment timesteps per atomic action

    See Section 3 of this file for other agent hyperparameters that can be configured.
    """

    FLAGS.agents = 1  # Enter number of levels in agent hierarchy

    FLAGS.time_scale = 1000  # Enter max sequence length in which each policy will specialize

    # Enter max number of atomic actions.  This will typically be FLAGS.time_scale**(FLAGS.layers).
    max_actions = 1000

    timesteps_per_action = 10  # Provide the number of time steps per atomic action.
    """
    2. DESIGN ENVIRONMENT

        a. Designer must provide the original UMDP (S,A,T,G,R).
            - The S,A,T components can be fulfilled by providing the Mujoco model.
            - The user must separately specifiy the initial state space.
            - G can be provided by specifying the end goal space.
            - R, which by default uses a shortest path {-1,0} reward function, can be implemented by specifying two components: (i) a function that maps the state space to the end goal space and (ii) the end goal achievement thresholds for each dimensions of the end goal.

        b.  In order to convert the original UMDP into a hierarchy of k UMDPs, the designer must also provide
            - The subgoal action space, A_i, for all higher-level UMDPs i > 0
            - R_i for levels 0 <= i < k-1 (i.e., all levels that try to achieve goals in the subgoal space).  As in the original UMDP, R_i can be implemented by providing two components:(i) a function that maps the state space to the subgoal space and (ii) the subgoal achievement thresholds.

        c.  Designer should also provide subgoal and end goal visualization functions in order to show video of training.  These can be updated in "display_subgoal" and "display_end_goal" methods in the "environment.py" file.

    """

    # Provide file name of Mujoco model(i.e., "pendulum.xml").  Make sure file is stored in "mujoco_files" folder
    model_name = "pendulum.xml"

    # Provide initial state space consisting of the ranges for all joint angles and velocities.  In the inverted pendulum task, we randomly sample from the below initial joint position and joint velocity ranges.  These values are then converted to the actual state space, which is [cos(pendulum angle), sin(pendulum angle), pendulum velocity].

    initial_state_space = np.array([[np.pi / 4, 7 * np.pi / 4], [-0.05, 0.05]])

    # Provide end goal space.  The code supports two types of end goal spaces if user would like to train on a larger end goal space.  If user needs to make additional customizations to the end goals, the "get_next_goal" method in "environment.py" can be updated.

    # In the inverted pendulum environment, the end goal will be the desired joint angle and joint velocity for the pendulum.
    goal_space_train = [[np.deg2rad(-16), np.deg2rad(16)], [-0.6, 0.6]]
    goal_space_test = [[0, 0], [0, 0]]

    # Provide a function that maps from the state space to the end goal space.  This is used to determine whether the agent should be given the sparse reward.  It is also used for Hindsight Experience Replay to determine which end goal was achieved after a sequence of actions.

    # Supplemental function that converts angle to between [-pi,pi]
    def bound_angle(angle):
        bounded_angle = angle % (2 * np.pi)

        if np.absolute(bounded_angle) > np.pi:
            bounded_angle = -(np.pi - bounded_angle % np.pi)

        return bounded_angle

    project_state_to_end_goal = lambda sim, state: np.array([
        bound_angle(sim.data.qpos[0]), 15 if state[2] > 15 else -15
        if state[2] < -15 else state[2]
    ])

    # Set end goal achievement thresholds.  If the agent is within the threshold for each dimension, the end goal has been achieved and the reward of 0 is granted.
    end_goal_thresholds = np.array([np.deg2rad(9.5), 0.6])

    # Provide range for each dimension of subgoal space in order to configure subgoal actor networks.  Subgoal space can be the same as the state space or some other projection out of the state space.  In our implementation of the inverted pendulum task, the subgoal space is the concatenation pendulum position and velocity.  This is slightly different than the state space, which is [cos(pendulum angle), sin(pendulum angle), pendulum velocity].

    subgoal_bounds = np.array([[-np.pi, np.pi], [-15, 15]])

    # Provide state to subgoal projection function.
    project_state_to_subgoal = lambda sim, state: np.array([
        bound_angle(sim.data.qpos[0]), 15 if state[2] > 15 else -15
        if state[2] < -15 else state[2]
    ])

    # Set subgoal achievement thresholds
    subgoal_thresholds = np.array([np.deg2rad(9.5), 0.6])

    # To properly visualize goals, update "display_end_goal" and "display_subgoals" methods in "environment.py"
    """
    3. SET MISCELLANEOUS HYPERPARAMETERS

    Below are some other agent hyperparameters that can affect results, including
        a. Subgoal testing percentage
        b. Subgoal penalty
        c. Exploration noise
        d. Replay buffer size
    """

    agent_params = {}

    # Define percentage of actions that a subgoal level (i.e. level i > 0) will test subgoal actions
    agent_params["subgoal_test_perc"] = 0.3

    # Define subgoal penalty for missing subgoal.  Please note that by default the Q value target for missed subgoals does not include Q-value of next state (i.e, discount rate = 0).  As a result, the Q-value target for missed subgoal just equals penalty.  For instance in this 3-level pendulum implementation, if a level proposes a subgoal and misses it, the Q target value for this action would be -10.  To incorporate the next state in the penalty, go to the "penalize_subgoal" method in the "layer.py" file.
    agent_params["subgoal_penalty"] = -FLAGS.time_scale

    # Define exploration noise that is added to both subgoal actions and atomic actions.  Noise added is Gaussian N(0, noise_percentage * action_dim_range)
    agent_params["atomic_noise"] = [0.1 for i in range(1)]
    agent_params["subgoal_noise"] = [0.1 for i in range(2)]

    # Define number of episodes of transitions to be stored by each level of the hierarchy
    agent_params["episodes_to_store"] = 200

    # Provide training schedule for agent.  Training by default will alternate between exploration and testing.  Hyperparameter below indicates number of exploration episodes.  Testing occurs for 100 episodes.  To change number of testing episodes, go to "ran_HAC.py".
    agent_params["num_exploration_episodes"] = 50

    # For other relavent agent hyperparameters, please refer to the "agent.py" and "layer.py" files

    # Ensure environment customization have been properly entered
    check_validity(model_name, goal_space_train, goal_space_test,
                   end_goal_thresholds, initial_state_space, subgoal_bounds,
                   subgoal_thresholds, max_actions, timesteps_per_action)

    # Instantiate and return agent and environment
    env = Environment(model_name, goal_space_train, goal_space_test,
                      project_state_to_end_goal, end_goal_thresholds,
                      initial_state_space, subgoal_bounds,
                      project_state_to_subgoal, subgoal_thresholds,
                      max_actions, timesteps_per_action, FLAGS.show)

    agent = Agent(FLAGS, env, agent_params)

    return agent, env
def design_agent_and_env(FLAGS):
    """
    1. DESIGN AGENT

    The key hyperparameters for agent construction are

        a. Number of levels in agent hierarchy
        b. Max sequence length in which each policy will specialize
        c. Max number of atomic actions allowed in an episode
        d. Environment timesteps per atomic action

    See Section 3 of this file for other agent hyperparameters that can be configured.
    """

    FLAGS.layers = 3  # Enter number of levels in agent hierarchy

    FLAGS.time_scale = 10  # Enter max sequence length in which each policy will specialize

    # Enter max number of atomic actions.  This will typically be FLAGS.time_scale**(FLAGS.layers).  However, in the UR5 Reacher task, we use a shorter episode length.
    max_actions = 500
    # max_actions = 15

    timesteps_per_action = 15  # Provide the number of time steps per atomic action.
    """
    2. DESIGN ENVIRONMENT

        a. Designer must provide the original UMDP (S,A,T,G,R).
            - The S,A,T components can be fulfilled by providing the Mujoco model.
            - The user must separately specifiy the initial state space.
            - G can be provided by specifying the end goal space.
            - R, which by default uses a shortest path {-1,0} reward function, can be implemented by specifying two components: (i) a function that maps the state space to the end goal space and (ii) the end goal achievement thresholds for each dimensions of the end goal.

        b.  In order to convert the original UMDP into a hierarchy of k UMDPs, the designer must also provide
            - The subgoal action space, A_i, for all higher-level UMDPs i > 0
            - R_i for levels 0 <= i < k-1 (i.e., all levels that try to achieve goals in the subgoal space).  As in the original UMDP, R_i can be implemented by providing two components:(i) a function that maps the state space to the subgoal space and (ii) the subgoal achievement thresholds.

        c.  Designer should also provide subgoal and end goal visualization functions in order to show video of training.  These can be updated in "display_subgoal" and "display_end_goal" methods in the "environment.py" file.

    """

    # Provide file name of Mujoco model(i.e., "pendulum.xml").  Make sure file is stored in "mujoco_files" folder
    model_name = "ant_reacher.xml"

    # Provide initial state space consisting of the ranges for all joint angles and velocities.  In the Ant Reacher task, we use a random initial torso position and use fixed values for the remainder.

    initial_joint_pos = np.array([
        0, 0, 0.55, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, -1.0, 0.0, -1.0, 0.0,
        1.0
    ])
    initial_joint_pos = np.reshape(initial_joint_pos,
                                   (len(initial_joint_pos), 1))
    initial_joint_ranges = np.concatenate(
        (initial_joint_pos, initial_joint_pos), 1)
    initial_joint_ranges[0] = np.array([-9.5, 9.5])
    initial_joint_ranges[1] = np.array([-9.5, 9.5])

    # Cocatenate velocity ranges
    initial_state_space = np.concatenate(
        (initial_joint_ranges, np.zeros(
            (len(initial_joint_ranges) - 1, 2))), 0)

    # Provide end goal space.  The code supports two types of end goal spaces if user would like to train on a larger end goal space.  If user needs to make additional customizations to the end goals, the "get_next_goal" method in "environment.py" can be updated.

    # In the UR5 reacher environment, the end goal will be the desired joint positions for the 3 main joints.
    max_range = 9.5
    goal_space_train = [[-max_range, max_range], [-max_range, max_range],
                        [0.45, 0.55]]
    goal_space_test = [[-max_range, max_range], [-max_range, max_range],
                       [0.45, 0.55]]

    # Provide a function that maps from the state space to the end goal space.  This is used to (i) determine whether the agent should be given the sparse reward and (ii) for Hindsight Experience Replay to determine which end goal was achieved after a sequence of actions.
    project_state_to_end_goal = lambda sim, state: state[:3]

    # Set end goal achievement thresholds.  If the agent is within the threshold for each dimension, the end goal has been achieved and the reward of 0 is granted.

    # For the Ant Reacher task, the end goal will be the desired (x,y) position of the torso
    len_threshold = 0.5
    height_threshold = 0.2
    end_goal_thresholds = np.array(
        [len_threshold, len_threshold, height_threshold])

    # Provide range for each dimension of subgoal space in order to configure subgoal actor networks.  Subgoal space can be the same as the state space or some other projection out of the state space.

    # The subgoal space in the Ant Reacher task is the desired (x,y,z) position and (x,y,z) translational velocity of the torso
    cage_max_dim = 11.75
    max_height = 1
    max_velo = 3
    subgoal_bounds = np.array([[-cage_max_dim, cage_max_dim],
                               [-cage_max_dim, cage_max_dim], [0, max_height],
                               [-max_velo, max_velo], [-max_velo, max_velo]])

    # Provide state to subgoal projection function.
    # a = np.concatenate((sim.data.qpos[:2], np.array([4 if sim.data.qvel[i] > 4 else -4 if sim.data.qvel[i] < -4 else sim.data.qvel[i] for i in range(3)])))
    project_state_to_subgoal = lambda sim, state: np.concatenate(
        (sim.data.qpos[:2],
         np.array([1 if sim.data.qpos[2] > 1 else sim.data.qpos[2]]),
         np.array([
             3 if sim.data.qvel[i] > 3 else -3
             if sim.data.qvel[i] < -3 else sim.data.qvel[i] for i in range(2)
         ])))

    # Set subgoal achievement thresholds
    velo_threshold = 0.5
    quat_threshold = 0.5
    # subgoal_thresholds = np.array([len_threshold, len_threshold, height_threshold, quat_threshold, quat_threshold, quat_threshold, quat_threshold, velo_threshold, velo_threshold, velo_threshold])
    subgoal_thresholds = np.array([
        len_threshold, len_threshold, height_threshold, velo_threshold,
        velo_threshold
    ])

    # To properly visualize goals, update "display_end_goal" and "display_subgoals" methods in "environment.py"
    """
    3. SET MISCELLANEOUS HYPERPARAMETERS

    Below are some other agent hyperparameters that can affect results, including
        a. Subgoal testing percentage
        b. Subgoal penalty
        c. Exploration noise
        d. Replay buffer size
    """

    agent_params = {}

    # Define percentage of actions that a subgoal level (i.e. level i > 0) will test subgoal actions
    agent_params["subgoal_test_perc"] = 0.3

    # Define subgoal penalty for missing subgoal.  Please note that by default the Q value target for missed subgoals does not include Q-value of next state (i.e, discount rate = 0).  As a result, the Q-value target for missed subgoal just equals penalty.  For instance in this 3-level UR5 implementation, if a level proposes a subgoal and misses it, the Q target value for this action would be -10.  To incorporate the next state in the penalty, go to the "penalize_subgoal" method in the "layer.py" file.
    agent_params["subgoal_penalty"] = -FLAGS.time_scale

    # Set exploration hyperparameters

    # Set percentage of non-subgoal testing time, agents will take random actions (sampled uniformly from action space)
    agent_params["random_action_perc"] = 0.3

    # Define exploration noise that is added to both subgoal actions and atomic actions.  Noise added is Gaussian N(0, noise_percentage * action_dim_range)
    agent_params["atomic_noise"] = [0.2 for i in range(8)]
    agent_params["subgoal_noise"] = [
        0.2 for i in range(len(subgoal_thresholds))
    ]
    """
    Set number of pre-learning episodes (i.e., number of initial episodes before any updates are made to actor and critic functions).

    We noticed that this buffer period was helpful for ensuring the policy chose actions that could be achieved within $H$ actions.
    This is likely because the agent is able to gather some subgoal penalty transitions.
    """
    agent_params["num_pre_training_episodes"] = 25

    # Define number of episodes of transitions to be stored by each level of the hierarchy
    agent_params["episodes_to_store"] = 500

    # Provide training schedule for agent.  Training by default will alternate between exploration and testing.  Hyperparameter below indicates number of exploration episodes.  Testing occurs for 100 episodes.  To change number of testing episodes, go to "ran_HAC.py".
    agent_params["num_exploration_episodes"] = 100

    # For other relavent agent hyperparameters, please refer to the "agent.py" and "layer.py" files

    # Ensure environment customization have been properly entered
    check_validity(model_name, goal_space_train, goal_space_test,
                   end_goal_thresholds, initial_state_space, subgoal_bounds,
                   subgoal_thresholds, max_actions, timesteps_per_action)

    # Instantiate and return agent and environment
    env = Environment(model_name, goal_space_train, goal_space_test,
                      project_state_to_end_goal, end_goal_thresholds,
                      initial_state_space, subgoal_bounds,
                      project_state_to_subgoal, subgoal_thresholds,
                      max_actions, timesteps_per_action, FLAGS.show)

    agent = Agent(FLAGS, env, agent_params)

    return agent, env
コード例 #5
0
def design_agent_and_env(FLAGS):
    """
    1. DESIGN AGENT

    The key hyperparameters for agent construction are

        a. Number of levels in agent hierarchy
        b. Max sequence length in which each policy will specialize
        c. Max number of atomic actions allowed in an episode
        d. Environment timesteps per atomic action

    See Section 3 of this file for other agent hyperparameters that can be configured.
    """

    FLAGS.layers = 2  # Enter number of levels in agent hierarchy

    FLAGS.time_scale = 10  # Enter max sequence length in which each policy will specialize

    # Enter max number of atomic actions.  This will typically be FLAGS.time_scale**(FLAGS.layers).
    # However, in the UR5 Reacher task, we use a shorter episode length.
    max_actions = FLAGS.time_scale**(FLAGS.layers - 1) * 6
    #max_actions = 5000 / 15

    timesteps_per_action = 15  # Provide the number of time steps per atomic action.
    """
    2. DESIGN ENVIRONMENT

        a. Designer must provide the original UMDP (S,A,T,G,R).  
            - The S,A,T components can be fulfilled by providing the Mujoco model.  
            - The user must separately specifiy the initial state space.  
            - G can be provided by specifying the end goal space.  
            - R, which by default uses a shortest path {-1,0} reward function, can be implemented by specifying two components:
                (i) a function that maps the state space to the end goal space and 
                (ii) the end goal achievement thresholds for each dimensions of the end goal. 

        b.  In order to convert the original UMDP into a hierarchy of k UMDPs, the designer must also provide 
            - The subgoal action space, A_i, for all higher-level UMDPs i > 0
            - R_i for levels 0 <= i < k-1 (i.e., all levels that try to achieve goals in the subgoal space).
              As in the original UMDP, R_i can be implemented by providing two components:
                (i) a function that maps the state space to the subgoal space and 
                (ii) the subgoal achievement thresholds. 

        c.  Designer should also provide subgoal and end goal visualization functions in order to show video of training.
            These can be updated in "display_subgoal" and "display_end_goal" methods in the "environment.py" file.

    """

    # Provide file name of Mujoco model(i.e., "pendulum.xml").  Make sure file is stored in "mujoco_files" folder
    model_name = "assets/fetch/build_tower.xml"

    # Provide initial state space consisting of the ranges for all joint angles and velocities.
    # In the UR5 Reacher task, we use a random initial shoulder position and use fixed values for the remainder.
    #  Initial joint velocities are set to 0.

    initial_joint_pos = np.array(
        [5.96625837e-03, 3.22757851e-03, -1.27944547e-01])

    initial_joint_pos = np.reshape(initial_joint_pos,
                                   (len(initial_joint_pos), 1))

    initial_joint_ranges = np.concatenate(
        (initial_joint_pos, initial_joint_pos), 1)

    initial_joint_ranges[0] = np.array([-np.pi / 8, np.pi / 8])
    # initial_joint_ranges[1] = np.array([-np.pi/4,0])

    initial_state_space = np.concatenate(
        (initial_joint_ranges, np.zeros((len(initial_joint_ranges), 2))), 0)

    # Provide end goal space.
    # The code supports two types of end goal spaces if user would like to train on a larger end goal space.
    # If user needs to make additional customizations to the end goals,
    # the "get_next_goal" method in "environment.py" can be updated.

    # In the UR5 reacher environment, the end goal will be the desired joint positions for the 3 main joints.
    goal_space_train = [[-np.pi, np.pi], [-np.pi / 4, 0],
                        [-np.pi / 4, np.pi / 4]]
    goal_space_test = [[-np.pi, np.pi], [-np.pi / 4, 0],
                       [-np.pi / 4, np.pi / 4]]

    # Provide a function that maps from the state space to the end goal space.
    # This is used to determine whether the agent should be given the sparse reward.
    # It is also used for Hindsight Experience Replay to determine which end goal was achieved after a sequence of actions.

    # Supplementary function that will ensure all angles are between [-2*np.pi,2*np.pi]
    def bound_angle(angle):
        bounded_angle = np.absolute(angle) % (2 * np.pi)
        if angle < 0:
            bounded_angle = -bounded_angle

        return bounded_angle

    #project_state_to_end_goal = lambda sim, state: np.array(
    #    [bound_angle(sim.data.qpos[i]) for i in range(len(sim.data.qpos))])
    # TODO: I do not limit any angles atm, let's see if this works
    project_state_to_end_goal = lambda sim, state: sim.data.qpos

    # Set end goal achievement thresholds.
    # If the agent is within the threshold for each dimension, the end goal has been achieved and the reward of 0 is granted.
    angle_threshold = np.deg2rad(10)
    end_goal_thresholds = np.array(
        [angle_threshold, angle_threshold, angle_threshold])

    # Provide range for each dimension of subgoal space in order to configure subgoal actor networks.
    # Subgoal space can be the same as the state space or some other projection out of the state space.
    # In our implementation of the UR5 reacher task, the subgoal space is the state space,
    # which is the concatenation of all joint positions and joint velocities.

    subgoal_bounds = np.array([[-2 * np.pi,
                                2 * np.pi], [-2 * np.pi, 2 * np.pi],
                               [-2 * np.pi, 2 * np.pi], [-4, 4], [-4, 4],
                               [-4, 4]])

    # Provide state to subgoal projection function.
    #project_state_to_subgoal = lambda sim, state: np.concatenate((np.array(
    #    [bound_angle(sim.data.qpos[i]) for i in range(len(sim.data.qpos))]), np.array(
    #    [4 if sim.data.qvel[i] > 4 else -4 if sim.data.qvel[i] < -4 else sim.data.qvel[i] for i in
    #     range(len(sim.data.qvel))])))
    project_state_to_subgoal = lambda sim, state: sim.data.qpos

    # Set subgoal achievement thresholds
    velo_threshold = 2
    subgoal_thresholds = np.concatenate(
        (np.array([angle_threshold for i in range(3)]),
         np.array([velo_threshold for i in range(3)])))

    # To properly visualize goals, update "display_end_goal" and "display_subgoals" methods in "environment.py"
    """
    3. SET MISCELLANEOUS HYPERPARAMETERS

    Below are some other agent hyperparameters that can affect results, including
        a. Subgoal testing percentage
        b. Subgoal penalty
        c. Exploration noise
        d. Replay buffer size
    """

    agent_params = {}

    # Define percentage of actions that a subgoal level (i.e. level i > 0) will test subgoal actions
    agent_params["subgoal_test_perc"] = 0.3

    # Define subgoal penalty for missing subgoal.
    # Please note that by default the Q value target for missed subgoals does not include Q-value of next state
    #  (i.e, discount rate = 0).  As a result, the Q-value target for missed subgoal just equals penalty.
    #  For instance in this 3-level UR5 implementation, if a level proposes a subgoal and misses it,
    #  the Q target value for this action would be -10.
    #  To incorporate the next state in the penalty, go to the "penalize_subgoal" method in the "layer.py" file.
    agent_params["subgoal_penalty"] = -FLAGS.time_scale

    # Define exploration noise that is added to both subgoal actions and atomic actions.
    # Noise added is Gaussian N(0, noise_percentage * action_dim_range)
    agent_params["atomic_noise"] = [0.1 for i in range(4)]
    agent_params["subgoal_noise"] = [0.03 for i in range(3)]

    # Define number of episodes of transitions to be stored by each level of the hierarchy
    agent_params["episodes_to_store"] = 500

    # Provide training schedule for agent.  Training by default will alternate between exploration and testing.
    # Hyperparameter below indicates number of exploration episodes.
    # Testing occurs for 100 episodes.  To change number of testing episodes, go to "ran_HAC.py".
    agent_params["num_exploration_episodes"] = 50

    # For other relavent agent hyperparameters, please refer to the "agent.py" and "layer.py" files

    # Ensure environment customization have been properly entered
    check_validity(model_name, goal_space_train, goal_space_test,
                   end_goal_thresholds, initial_state_space, subgoal_bounds,
                   subgoal_thresholds, max_actions, timesteps_per_action)

    # Instantiate and return agent and environment
    env = Environment(model_name, goal_space_train, goal_space_test,
                      project_state_to_end_goal, end_goal_thresholds,
                      initial_state_space, subgoal_bounds,
                      project_state_to_subgoal, subgoal_thresholds,
                      max_actions, timesteps_per_action, FLAGS.show)

    agent_params = env.get_agent_params(agent_params)

    agent = Agent(FLAGS, env, agent_params)

    return agent, env