def run(args: Namespace) -> None: env = get_env(**args.env.__dict__) featurizer = get_feature_extractor(env, **args.features.__dict__) algorithm = get_algorithm(env, featurizer, args.gamma, **args.algorithm.__dict__) results = algorithm.run(**args.train.__dict__) filename = os.path.join(args.out_dir, "results.pkl") torch.save(results, filename)
def run(args: Namespace) -> None: """Works just with random walk for now""" env = get_env(**args.env.__dict__) featurizer = get_feature_extractor(env, **args.features.__dict__) # -------------------- if isinstance(env, DiscreteEnvironment): policy = np.zeros((env.states_no, env.actions_no)) policy[:, :] = 1. / env.actions_no inits, dynamics, rewards = env.get_mdp() values = policy_evaluation_1(policy, dynamics, rewards, gamma=args.gamma) targets = values[:env.nonterminal_states_no] # Drop terminal states state_dist = stationary_state_distribution(policy, inits, dynamics) state_dist = state_dist[:env.nonterminal_states_no] state_dist /= np.sum(state_dist) else: targets, state_dist = None, None policy = RandomWalk(env) algorithm = get_algorithm(env, featurizer, args.gamma, **args.algorithm.__dict__) kwargs = {"targets": targets, "state_dist": state_dist, "policy": policy} kwargs.update(**args.train.__dict__) results, visits = algorithm.run(**kwargs) if isinstance(env, DiscreteEnvironment): values = algorithm._predict( np.array([featurizer(s) for s in range(env.nonterminal_states_no)])) else: values = None filename = os.path.join(args.out_dir, "results.pkl") torch.save({ "results": results, "visits": visits, "values": values }, filename)
def __init__(self, agent_path: Path, config: ConfigManager): """Creates an actor critic agent that uses FFNNs to represent both. Args: agent_path: The output folder for the model files config: The configurations for this agent """ self.env: Environment = get_env(config.agent_config.env)() self.agent_path = agent_path self.config = config self.models_path = Path(agent_path, "models") self.periodic_test = False critic_constructor = critic_feed_forward_model_constructor( self.env.state_space_n) self.critic = critic_constructor(self.config.critic_config) actor_constructor = feed_forward_discrete_policy_constructor( self.env.state_space_n, self.env.action_space_n) self.actor = actor_constructor(self.config.actor_config) self.ckpts_manager = CheckpointsManager(self.models_path, self.actor, self.critic)
def get_resource_group(): env = get_env() return core.ResourceGroup('resource_group_{env}'.format(env=env))
"""An Azure Python Pulumi program""" import pulumi from pulumi_azure import core, storage from environments import get_env from data_factory import provission_data_factory def get_resource_group(): env = get_env() return core.ResourceGroup('resource_group_{env}'.format(env=env)) env = get_env() resource_group = get_resource_group() account = storage.Account('storage{env}'.format(env=env), # The location for the storage account will be derived automatically from the resource group. resource_group_name=resource_group.name, account_tier='Standard', account_replication_type='LRS') factory = provission_data_factory(resource_group) pulumi.export('connection_string', account.primary_connection_string)
torch.backends.cudnn.benchmark = True # Setup tracking vars/directory and logger all_logging_dict = {} num_episodes = 0 best_avg_reward = 0 rewards = [] dt_string = datetime.now().strftime("%d%m%Y%H%M%S") dir_name = "runs/eval_env_{}_agent_{}_memory_{}_{}".format( args.env_name, args.agent_type, args.memory_type, dt_string) logger = SummaryWriter(log_dir=dir_name) with open(os.path.join(dir_name, 'command_line_args.txt'), 'w') as f: json.dump(args.__dict__, f, indent=2) # Prepare envirornment env = environments.get_env(args.env_name) # Prepare memory module memory_module = memory.get_module(args.memory_type, args) # Prepare agent agent = agents.get_agent(args.agent_type, env, memory_module, dir_name, device, args) # Load saved model agent.load_model(args.model_path) # Iterate through episodes for episode in range(args.num_episodes): # Run episode and get reward rewards.append(run_episode(env, agent))
def provission_data_factory(resource_group): env = get_env() datafactory.Factory('factory{env}'.format(env=env), location=resource_group.location, resource_group_name=resource_group.name)