Ejemplo n.º 1
0
    def _run_inference(self) -> MultiObserver:
        try:
            n_inputs = int(self.widget_text_n_inputs.value)
            n_experts = int(self.widget_text_n_experts.value)
            n_rollouts = int(self.widget_text_n_rollouts.value)

            observer = MultiObserver()
            params = Params(**self.state.sacred_reader.config)
            params.n_experts = n_experts
            params.task_size = n_inputs
            self.state.inference_config = dataclasses.asdict(params)
            agent = create_agent(params)
            self.state.sacred_reader.load_model(agent, 'agent',
                                                self.state.epoch)

            rollout_size = n_rollouts  # 15 + params.rollout_size
            run_inference(params, agent, observer, rollout_size)
            return observer

        except ValueError as e:
            print(f'ValueError: {e}')
Ejemplo n.º 2
0
def load_agent(p: Params, file: str):
    agent = create_agent(p)
    agent.load_state_dict(torch.load(file))
    return agent
Ejemplo n.º 3
0
def run_inference(p: Params, agent: SearchAgent, observer: MultiObserver, rollout_size: int) -> float:
    with torch.no_grad():
        task = create_task(p)
        inner_loop = LearningLoop()
        agent.init_rollout(p.batch_size, p.n_experts, SearchAgentInitRolloutParams(True))
        err = inner_loop.train_fixed_steps(agent, task, rollout_size, 0.0, p.learning_rollout_steps_clip, observer)
        return err.item()
    
def load_agent(p: Params, file: str):
    agent = create_agent(p)
    agent.load_state_dict(torch.load(file))
    return agent

p = Params(random.randint(0, 2 ** 32))

agent = create_agent(p)
task = create_task(p)
inner_loop = LearningLoop()

loss = []    


# %%
# for epoch in tqdm(range(1, p.epochs + 1)):
for epoch in range(1, p.epochs + 1):
    observer = None
    agent.optim.zero_grad()
    agent.init_rollout(p.batch_size, p.n_experts, SearchAgentInitRolloutParams(True))
    rollout_size = p.rollout_size
    err = inner_loop.train_fixed_steps(agent, task, rollout_size, p.learning_exp_decay,
                                       p.learning_rollout_steps_clip, None)