Ejemplo n.º 1
0
class ModelCell(nn.Module):
    def __init__(self, params):
        super(ModelCell, self).__init__()

        self.device = torch.device("cpu")
        if torch.cuda.is_available():
            self.device = torch.device("cuda")

        # set params
        self.params = params

        # create memory
        self.memory = ROM(params.memory_n, params.memory_m)

        # create controller
        self.controller = LSTMController(
            params.sequence_width + 1 + self.memory.M, params.controller_size,
            params.controller_layers)

        # create state
        self.state = State(self.memory, self.controller)

        # create FC layer for addressing using controller output
        self.addressing_params_sizes = [self.memory.M, 1, 1, 3, 1]
        self.fc1 = nn.Sequential(
            nn.Linear(params.controller_size, sum(
                self.addressing_params_sizes)))  # no sigmoid needed here.

        # create FC layer to make output from controller output and read value
        self.fc2 = nn.Sequential(
            nn.Linear(params.controller_size + self.memory.M,
                      params.sequence_width + 1), nn.Sigmoid())

        self.to(self.device)

    def reset_parameters(self, stdv=1e-1):
        for weight in self.parameters():
            weight.data.normal_(0, stdv)

    def forward(self, X):
        cout, self.state.controllerstate.state = self.controller(
            torch.cat([X, self.state.readstate.r], dim=1),
            self.state.controllerstate.state)
        address_params = self.fc1(cout)
        k, beta, g, s, gamma = _split_cols(address_params,
                                           self.addressing_params_sizes)
        self.state.readstate.w = self.memory.address(k, beta, g, s, gamma,
                                                     self.state.readstate.w)
        self.state.readstate.r = self.memory.read(self.state.readstate.w)
        self.memory.write(X)
        outp = self.fc2(torch.cat([cout, self.state.readstate.r], dim=1))

        return outp
Ejemplo n.º 2
0
class ModelCell(nn.Module):
    def __init__(self, params):
        super(ModelCell, self).__init__()

        self.device = torch.device("cpu")
        if torch.cuda.is_available():
            self.device = torch.device("cuda")

        # set params
        self.params = params

        # create memory
        self.memory = ROM(params.memory_n, params.memory_m)

        # create controller
        self.controller = LSTMController(self.memory.M, params.controller_size,
                                         params.controller_layers)

        # create state
        self.state = State(self.memory, self.controller)

        # create variational model
        self.vmodel = params.variationalmodel(params.sequence_width,
                                              params.variational_hidden_size,
                                              params.memory_m, params.memory_m)

        # create FC layer for addressing using controller output
        self.addressing_params_sizes = [self.memory.M, 1, 1, 3, 1]
        self.fc1 = nn.Linear(params.controller_size,
                             sum(self.addressing_params_sizes))

        self.to(self.device)

    def reset_parameters(self, stdv=1e-1):
        for weight in self.parameters():
            weight.data.normal_(0, stdv)

    def forward(self, X, batch_size):
        cout, self.state.controllerstate.state = self.controller(
            self.state.latentstate.state, self.state.controllerstate.state)
        address_params = self.fc1(cout)
        k, beta, g, s, gamma = _split_cols(address_params,
                                           self.addressing_params_sizes)
        self.state.readstate.w = self.memory.address(k, beta, g, s, gamma,
                                                     self.state.readstate.w)
        self.state.readstate.r = self.memory.read(self.state.readstate.w)
        self.state.latentstate.state, X_gen_mean, _elbo = self.vmodel(
            self.state.readstate.r, X, batch_size)
        self.memory.write(self.state.latentstate.state)

        return _elbo, X_gen_mean

    def generate(self, batch_size):
        cout, self.state.controllerstate.state = self.controller(
            self.state.latentstate.state, self.state.controllerstate.state)
        address_params = self.fc1(cout)
        k, beta, g, s, gamma = _split_cols(address_params,
                                           self.addressing_params_sizes)
        self.state.readstate.w = self.memory.address(k, beta, g, s, gamma,
                                                     self.state.readstate.w)
        self.state.readstate.r = self.memory.read(self.state.readstate.w)
        self.state.latentstate.state, X_gen_mean = self.vmodel.sample_x_mean(
            self.state.readstate.r, batch_size)
        self.memory.write(self.state.latentstate.state)

        return X_gen_mean