Esempio n. 1
0
def run(args: Namespace) -> None:

    env = get_env(**args.env.__dict__)
    featurizer = get_feature_extractor(env, **args.features.__dict__)
    algorithm = get_algorithm(env, featurizer, args.gamma,
                              **args.algorithm.__dict__)
    results = algorithm.run(**args.train.__dict__)
    filename = os.path.join(args.out_dir, "results.pkl")
    torch.save(results, filename)
Esempio n. 2
0
def run(args: Namespace) -> None:
    """Works just with random walk for now"""

    env = get_env(**args.env.__dict__)
    featurizer = get_feature_extractor(env, **args.features.__dict__)

    # --------------------

    if isinstance(env, DiscreteEnvironment):
        policy = np.zeros((env.states_no, env.actions_no))
        policy[:, :] = 1. / env.actions_no
        inits, dynamics, rewards = env.get_mdp()

        values = policy_evaluation_1(policy,
                                     dynamics,
                                     rewards,
                                     gamma=args.gamma)
        targets = values[:env.nonterminal_states_no]  # Drop terminal states

        state_dist = stationary_state_distribution(policy, inits, dynamics)
        state_dist = state_dist[:env.nonterminal_states_no]
        state_dist /= np.sum(state_dist)
    else:
        targets, state_dist = None, None

    policy = RandomWalk(env)

    algorithm = get_algorithm(env, featurizer, args.gamma,
                              **args.algorithm.__dict__)
    kwargs = {"targets": targets, "state_dist": state_dist, "policy": policy}
    kwargs.update(**args.train.__dict__)

    results, visits = algorithm.run(**kwargs)

    if isinstance(env, DiscreteEnvironment):
        values = algorithm._predict(
            np.array([featurizer(s)
                      for s in range(env.nonterminal_states_no)]))
    else:
        values = None
    filename = os.path.join(args.out_dir, "results.pkl")
    torch.save({
        "results": results,
        "visits": visits,
        "values": values
    }, filename)
Esempio n. 3
0
    def __init__(self, agent_path: Path, config: ConfigManager):
        """Creates an actor critic agent that uses FFNNs to represent both.

        Args:
            agent_path: The output folder for the model files
            config: The configurations for this agent
        """
        self.env: Environment = get_env(config.agent_config.env)()
        self.agent_path = agent_path
        self.config = config
        self.models_path = Path(agent_path, "models")
        self.periodic_test = False

        critic_constructor = critic_feed_forward_model_constructor(
            self.env.state_space_n)
        self.critic = critic_constructor(self.config.critic_config)

        actor_constructor = feed_forward_discrete_policy_constructor(
            self.env.state_space_n, self.env.action_space_n)
        self.actor = actor_constructor(self.config.actor_config)

        self.ckpts_manager = CheckpointsManager(self.models_path, self.actor,
                                                self.critic)
Esempio n. 4
0
def get_resource_group():
    env = get_env()
    return core.ResourceGroup('resource_group_{env}'.format(env=env))
Esempio n. 5
0
"""An Azure Python Pulumi program"""

import pulumi
from pulumi_azure import core, storage
from environments import get_env
from data_factory import provission_data_factory

def get_resource_group():
    env = get_env()
    return core.ResourceGroup('resource_group_{env}'.format(env=env))

env = get_env()
resource_group = get_resource_group()
account = storage.Account('storage{env}'.format(env=env),
                          # The location for the storage account will be derived automatically from the resource group.
                          resource_group_name=resource_group.name,
                          account_tier='Standard',
                          account_replication_type='LRS')

factory = provission_data_factory(resource_group)
pulumi.export('connection_string', account.primary_connection_string)
Esempio n. 6
0
        torch.backends.cudnn.benchmark = True

    # Setup tracking vars/directory and logger
    all_logging_dict = {}
    num_episodes = 0
    best_avg_reward = 0
    rewards = []
    dt_string = datetime.now().strftime("%d%m%Y%H%M%S")
    dir_name = "runs/eval_env_{}_agent_{}_memory_{}_{}".format(
        args.env_name, args.agent_type, args.memory_type, dt_string)
    logger = SummaryWriter(log_dir=dir_name)
    with open(os.path.join(dir_name, 'command_line_args.txt'), 'w') as f:
        json.dump(args.__dict__, f, indent=2)

    # Prepare envirornment
    env = environments.get_env(args.env_name)

    # Prepare memory module
    memory_module = memory.get_module(args.memory_type, args)

    # Prepare agent
    agent = agents.get_agent(args.agent_type, env, memory_module, dir_name,
                             device, args)

    # Load saved model
    agent.load_model(args.model_path)

    # Iterate through episodes
    for episode in range(args.num_episodes):
        # Run episode and get reward
        rewards.append(run_episode(env, agent))
Esempio n. 7
0
def provission_data_factory(resource_group):
    env = get_env()
    datafactory.Factory('factory{env}'.format(env=env),
                        location=resource_group.location,
                        resource_group_name=resource_group.name)