예제 #1
0
class PRAE:
    def __init__(self, num_batch, max_len, n_features, hidden=[200, 200], **kwargs):
        self.num_batch = num_batch
        self.n_features = n_features
        self.max_len = max_len
        self.hidden = hidden
        rng = np.random.RandomState(123)
        self.drng = rng
        self.rng = RandomStreams(rng.randint(2 ** 30))

        # params
        initial_W = np.asarray(
            rng.uniform(
                    low=1e-5,
                    high=1,
                    size=(self.hidden[1], self.n_features)
            ),
            dtype=theano.config.floatX
        )

        self.W_y_theta = theano.shared(value=initial_W, name='W_y_theta', borrow=True)
        # # self.W_y_kappa = theano.shared(value=initial_W, name='W_y_kappa', borrow=True)
        self.b_y_theta = theano.shared(
                value=np.zeros(
                    self.n_features,
                    dtype=theano.config.floatX
                ),
                borrow=True
            )
        # self.b_y_kappa = theano.shared(
        #         value=np.zeros(
        #             self.n_features,
        #             dtype=theano.config.floatX
        #         ),
        #         name='b',
        #         borrow=True
        #     )


        # I could directly create the model here since it is fixed
        self.l_in = InputLayer(shape=(self.num_batch, self.max_len, self.n_features))
        self.mask_input = InputLayer(shape=(self.num_batch, self.max_len))
        first_hidden = GRULayer(self.l_in, mask_input=self.mask_input, num_units=hidden[0])
        self.model =GRULayer(first_hidden, num_units=hidden[1])
        # need some reshape voodoo
        # l_shp = ReshapeLayer(second_hidden, (-1, hidden[1]))
        # after the reshape I have batch*max_len X features
        # self.model = DenseLayer(l_shp, num_units=self.n_features, nonlinearity=rectify)
        # if now I put a dense layer this will collect all the output temporally which is what I want, I'll have to fix
        # the dimensions probably later
        # For every gaussian in the sum I need 3 values plus a value for the total scale
        # the output of this layer will be (num_batch, num_units, max_len) TODO check size



    def get_output_shape_for(self):
        return self.model.get_output_shape_for(self.num_batch, self.max_len, self.hidden[2])

    def get_output_y(self, output):
        # (batch, time, hidden) X (hidden, features) + (, features) => (batch, time, features)
        theta_out = T.nnet.relu(T.dot(output, self.W_y_theta) + self.b_y_theta)
        #kappa_out = T.nnet.relu(T.dot(output, self.W_y_kappa) + self.b_y_kappa)
        return theta_out

    def get_log_x(self, x, theta_out):
        # DIM = (batch, time, hidden)
        # everything is elementwise
        log_x = T.log(theta_out + 1e-8) - theta_out * x
        log_x = log_x.sum(axis=2, dtype=theano.config.floatX)  # sum over x cause I assume they are independent
        return log_x

    def build_model(self, train_x, train_mask_x, train_mask_out, train_target,
                    test_x, test_mask_x, test_mask_out, test_target):
        self.train_x = train_x
        self.train_mask_x = train_mask_x
        self.train_mask_out = train_mask_out
        self.train_target = train_target
        self.test_x = test_x
        self.test_mask_x = test_mask_x
        self.test_mask_out = test_mask_out
        self.test_target = test_target
        self.index = T.iscalar('index')
        self.num_batch_test = T.iscalar('index')
        self.b_slice = slice(self.index * self.num_batch, (self.index + 1) * self.num_batch)

        sym_x = T.dtensor3()
        sym_mask_x = T.dmatrix()
        sym_target = T.dtensor3()
        # sym_mask_out = T.dtensor3() should not be useful since output is still zero
        # TODO think about this if it is true

        output = lasagne.layers.get_output(self.model, inputs={self.l_in: sym_x, self.mask_input: sym_mask_x})
        theta = self.get_output_y(output)
        log_px = self.get_log_x(sym_target, theta)
        log_px_sum_time = log_px.sum(axis=1, dtype=theano.config.floatX) # sum over tx
        loss = - T.sum(log_px_sum_time) / self.num_batch # average over batch
        ##
        log_px_test = self.get_log_x(sym_target, theta)
        log_px_sum_time_test = log_px_test.sum(axis=1, dtype=theano.config.floatX) # sum over time
        loss_test = - T.sum(log_px_sum_time_test) / self.num_batch_test  # average over batch
        # loss = T.mean(lasagne.objectives.squared_error(mu, sym_target))
        all_params = [self.W_y_theta] + [self.b_y_theta] + lasagne.layers.get_all_params(self.model)
        all_grads_target = [T.clip(g, -3, 3) for g in T.grad(loss, all_params)]
        all_grads_target = lasagne.updates.total_norm_constraint(all_grads_target, 3)
        updates_target = adam(all_grads_target, all_params)

        train_model = theano.function([self.index],
                                      [loss, theta, log_px],
                                      givens={sym_x: self.train_x[self.b_slice],
                                              sym_mask_x: self.train_mask_x[self.b_slice],
                                              sym_target: self.train_target[self.b_slice]},
                                      updates=updates_target)
        test_model = theano.function([self.num_batch_test],
                                     [loss_test, theta],
                                     givens={sym_x: self.test_x,
                                             sym_mask_x: self.test_mask_x,
                                             sym_target: self.test_target})

        return train_model, test_model
예제 #2
0
class PRAE:
    def __init__(self,
                 num_batch,
                 max_len,
                 n_features,
                 hidden=[200, 200],
                 **kwargs):
        self.num_batch = num_batch
        self.n_features = n_features
        self.max_len = max_len
        self.hidden = hidden
        rng = np.random.RandomState(123)
        self.drng = rng
        self.rng = RandomStreams(rng.randint(2**30))

        # params
        initial_W = np.asarray(rng.uniform(low=1e-5,
                                           high=1,
                                           size=(self.hidden[1],
                                                 self.n_features)),
                               dtype=theano.config.floatX)

        self.W_y_theta = theano.shared(value=initial_W,
                                       name='W_y_theta',
                                       borrow=True)
        # # self.W_y_kappa = theano.shared(value=initial_W, name='W_y_kappa', borrow=True)
        self.b_y_theta = theano.shared(value=np.zeros(
            self.n_features, dtype=theano.config.floatX),
                                       borrow=True)
        # self.b_y_kappa = theano.shared(
        #         value=np.zeros(
        #             self.n_features,
        #             dtype=theano.config.floatX
        #         ),
        #         name='b',
        #         borrow=True
        #     )

        # I could directly create the model here since it is fixed
        self.l_in = InputLayer(shape=(self.num_batch, self.max_len,
                                      self.n_features))
        self.mask_input = InputLayer(shape=(self.num_batch, self.max_len))
        first_hidden = GRULayer(self.l_in,
                                mask_input=self.mask_input,
                                num_units=hidden[0])
        self.model = GRULayer(first_hidden, num_units=hidden[1])
        # need some reshape voodoo
        # l_shp = ReshapeLayer(second_hidden, (-1, hidden[1]))
        # after the reshape I have batch*max_len X features
        # self.model = DenseLayer(l_shp, num_units=self.n_features, nonlinearity=rectify)
        # if now I put a dense layer this will collect all the output temporally which is what I want, I'll have to fix
        # the dimensions probably later
        # For every gaussian in the sum I need 3 values plus a value for the total scale
        # the output of this layer will be (num_batch, num_units, max_len) TODO check size

    def get_output_shape_for(self):
        return self.model.get_output_shape_for(self.num_batch, self.max_len,
                                               self.hidden[2])

    def get_output_y(self, output):
        # (batch, time, hidden) X (hidden, features) + (, features) => (batch, time, features)
        theta_out = T.nnet.relu(T.dot(output, self.W_y_theta) + self.b_y_theta)
        #kappa_out = T.nnet.relu(T.dot(output, self.W_y_kappa) + self.b_y_kappa)
        return theta_out

    def get_log_x(self, x, theta_out):
        # DIM = (batch, time, hidden)
        # everything is elementwise
        log_x = T.log(theta_out + 1e-8) - theta_out * x
        log_x = log_x.sum(axis=2, dtype=theano.config.floatX
                          )  # sum over x cause I assume they are independent
        return log_x

    def build_model(self, train_x, train_mask_x, train_mask_out, train_target,
                    test_x, test_mask_x, test_mask_out, test_target):
        self.train_x = train_x
        self.train_mask_x = train_mask_x
        self.train_mask_out = train_mask_out
        self.train_target = train_target
        self.test_x = test_x
        self.test_mask_x = test_mask_x
        self.test_mask_out = test_mask_out
        self.test_target = test_target
        self.index = T.iscalar('index')
        self.num_batch_test = T.iscalar('index')
        self.b_slice = slice(self.index * self.num_batch,
                             (self.index + 1) * self.num_batch)

        sym_x = T.dtensor3()
        sym_mask_x = T.dmatrix()
        sym_target = T.dtensor3()
        # sym_mask_out = T.dtensor3() should not be useful since output is still zero
        # TODO think about this if it is true

        output = lasagne.layers.get_output(self.model,
                                           inputs={
                                               self.l_in: sym_x,
                                               self.mask_input: sym_mask_x
                                           })
        theta = self.get_output_y(output)
        log_px = self.get_log_x(sym_target, theta)
        log_px_sum_time = log_px.sum(axis=1,
                                     dtype=theano.config.floatX)  # sum over tx
        loss = -T.sum(log_px_sum_time) / self.num_batch  # average over batch
        ##
        log_px_test = self.get_log_x(sym_target, theta)
        log_px_sum_time_test = log_px_test.sum(
            axis=1, dtype=theano.config.floatX)  # sum over time
        loss_test = -T.sum(
            log_px_sum_time_test) / self.num_batch_test  # average over batch
        # loss = T.mean(lasagne.objectives.squared_error(mu, sym_target))
        all_params = [self.W_y_theta] + [
            self.b_y_theta
        ] + lasagne.layers.get_all_params(self.model)
        all_grads_target = [T.clip(g, -3, 3) for g in T.grad(loss, all_params)]
        all_grads_target = lasagne.updates.total_norm_constraint(
            all_grads_target, 3)
        updates_target = adam(all_grads_target, all_params)

        train_model = theano.function(
            [self.index], [loss, theta, log_px],
            givens={
                sym_x: self.train_x[self.b_slice],
                sym_mask_x: self.train_mask_x[self.b_slice],
                sym_target: self.train_target[self.b_slice]
            },
            updates=updates_target)
        test_model = theano.function(
            [self.num_batch_test], [loss_test, theta],
            givens={
                sym_x: self.test_x,
                sym_mask_x: self.test_mask_x,
                sym_target: self.test_target
            })

        return train_model, test_model
예제 #3
0
class PRAE:
    def __init__(self, num_batch, max_len, n_features, hidden=[200, 200], **kwargs):
        self.num_batch = num_batch
        self.n_features = n_features
        self.max_len = max_len
        self.hidden = hidden
        rng = np.random.RandomState(123)
        self.drng = rng
        self.rng = RandomStreams(rng.randint(2 ** 30))

        # params
        initial_W = np.asarray(
            rng.uniform(
                    low=-4 * np.sqrt(6. / (self.hidden[1] + self.n_features)),
                    high=4 * np.sqrt(6. / (self.hidden[1] + self.n_features)),
                    size=(self.hidden[1], self.n_features)
            ),
            dtype=theano.config.floatX
        )

        self.W = theano.shared(value=initial_W, name='W', borrow=True)
        # # self.W_y_kappa = theano.shared(value=initial_W, name='W_y_kappa', borrow=True)
        self.b = theano.shared(
                value=np.zeros(
                    self.n_features,
                    dtype=theano.config.floatX
                ),
                borrow=True
            )
        # self.b_y_kappa = theano.shared(
        #         value=np.zeros(
        #             self.n_features,
        #             dtype=theano.config.floatX
        #         ),
        #         name='b',
        #         borrow=True
        #     )


        # I could directly create the model here since it is fixed
        self.l_in = InputLayer(shape=(None, self.max_len, self.n_features))
        self.mask_input = InputLayer(shape=(None, self.max_len))
        first_hidden = GRULayer(self.l_in, mask_input=self.mask_input, num_units=hidden[0])
        # l_shp = ReshapeLayer(first_hidden, (-1, hidden[0]))
        # l_dense = DenseLayer(l_shp, num_units=self.hidden[0], nonlinearity=rectify)
        # l_drop = DropoutLayer(l_dense, p=0.5)
        # l_shp = ReshapeLayer(l_drop, (-1, self.max_len, self.hidden[0]))
        self.model = GRULayer(first_hidden, num_units=hidden[1])
        # self.model = ConcatLayer([first_hidden, second_hidden], axis=2)
        # l_shp = ReshapeLayer(second_hidden, (-1, hidden[1]))
        # l_dense = DenseLayer(l_shp, num_units=self.n_features, nonlinearity=rectify)
        # To reshape back to our original shape, we can use the symbolic shape
        # variables we retrieved above.
        #self.model = ReshapeLayer(l_dense, (-1, self.max_len, self.n_features))
        # if now I put a dense layer this will collect all the output temporally which is what I want, I'll have to fix
        # the dimensions probably later
        # For every gaussian in the sum I need 3 values plus a value for the total scale
        # the output of this layer will be (num_batch, num_units, max_len) TODO check size

    def get_output_shape_for(self):
        return self.model.get_output_shape_for(self.num_batch, self.max_len, self.hidden[1])

    def get_output_y(self, x):
        return T.nnet.relu(T.dot(x, self.W) + self.b)


    def build_model(self, train_x, train_mask_x, train_mask_out, train_target,
                    test_x, test_mask_x, test_mask_out, test_target):
        self.train_x = train_x
        self.train_mask_x = train_mask_x
        self.train_mask_out = train_mask_out
        self.train_target = train_target
        self.test_x = test_x
        self.test_mask_x = test_mask_x
        self.test_mask_out = test_mask_out
        self.test_target = test_target
        self.index = T.iscalar('index')
        self.num_batch_test = T.iscalar('index')
        self.b_slice = slice(self.index * self.num_batch, (self.index + 1) * self.num_batch)

        sym_x = T.dtensor3()
        sym_mask_x = T.dmatrix()
        sym_target = T.dtensor3()
        sym_mask_out = T.dtensor3()
        # sym_mask_out = T.dtensor3() should not be useful since output is still zero
        # TODO think about this if it is true

        out = lasagne.layers.get_output(self.model, inputs={self.l_in: sym_x, self.mask_input: sym_mask_x})
        out_out = self.get_output_y(out)
        loss = T.mean(lasagne.objectives.squared_error(out_out, sym_target)) / self.num_batch

        out_test = lasagne.layers.get_output(self.model, inputs={self.l_in: sym_x, self.mask_input: sym_mask_x})
        out_out_test = self.get_output_y(out_test)
        loss_test = T.mean(lasagne.objectives.squared_error(out_out_test, sym_target)) / self.num_batch_test

        all_params = [self.W] + [self.b] +lasagne.layers.get_all_params(self.model)
        all_grads_target = [T.clip(g, -3, 3) for g in T.grad(loss, all_params)]
        all_grads_target = lasagne.updates.total_norm_constraint(all_grads_target, 3)
        updates_target = adam(all_grads_target, all_params)

        train_model = theano.function([self.index],
                                      [loss, out_out],
                                      givens={sym_x: self.train_x[self.b_slice],
                                              sym_mask_x: self.train_mask_x[self.b_slice],
                                              sym_target: self.train_target[self.b_slice],
                                              },
                                      updates=updates_target)
        test_model = theano.function([self.num_batch_test],
                                     [loss_test, out_out_test],
                                     givens={sym_x: self.test_x,
                                             sym_mask_x: self.test_mask_x,
                                             sym_target: self.test_target,
                                             })

        return train_model, test_model