class ValueEestimator(ValueEstimator):
    class Model(nn.Module):
        def __init__(self, observation_space_size):
            super().__init__()
            self.affine = nn.Linear(observation_space_size, 128)
            self.dropout = nn.Dropout(p=0.6)
            self.value_head = nn.Linear(128, 1)

        def forward(self, state):
            state = torch.from_numpy(state).float()
            state = F.relu(self.dropout(self.affine(state)))
            state_value = self.value_head(state)
            return state_value

    def __init__(self, observation_space_size):
        self.model = ValueEestimator.Model(observation_space_size)
        self.optimizer = SharedAdam(self.model.parameters(), lr=1e-3)

    def predict(self, state):
        value = self.model.forward(state)
        return value

    def update(self, *args):
        loss = args[0]
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
class GlobalPolicyEsitmator:
    def __init__(self, observation_space_size, action_space_size):
        self.model = Model(observation_space_size, action_space_size)
        self.optimizer = SharedAdam(self.model.parameters(), lr=1e-3)

    def update(self, *args):
        loss = args[0]
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
class GlobalValueEestimator(ValueEstimator):
    def __init__(self, observation_space_size):
        self.model = ValueModel(observation_space_size)
        self.model.share_memory()
        self.optimizer = SharedAdam(self.model.parameters(), lr=1e-3)
        self.optimizer.share_memory()

    def ensure_shared_grads(model, shared_model):
        for param, shared_param in zip(model.parameters(),
                                       shared_model.parameters()):
            if shared_param.grad is not None:
                return
            shared_param._grad = param.grad
 def __init__(self, observation_space_size):
     self.model = ValueEestimator.Model(observation_space_size)
     self.optimizer = SharedAdam(self.model.parameters(), lr=1e-3)
 def __init__(self, observation_space_size):
     self.model = ValueModel(observation_space_size)
     self.model.share_memory()
     self.optimizer = SharedAdam(self.model.parameters(), lr=1e-3)
     self.optimizer.share_memory()
class GlobalPolicyEsitmator:
    def __init__(self, observation_space_size, action_space_size):
        self.model = PolicyModel(observation_space_size, action_space_size)
        self.model.share_memory()
        self.optimizer = SharedAdam(self.model.parameters(), lr=1e-3)
        self.optimizer.share_memory()