Ejemplo n.º 1
0
class TestBidirectional(unittest.TestCase):
    def setUp(self):
        self.bidir = Bidirectional(weights_init=Orthogonal(),
                                   prototype=SimpleRecurrent(
                                       dim=3, activation=Tanh()))
        self.simple = SimpleRecurrent(dim=3, weights_init=Orthogonal(),
                                      activation=Tanh(), seed=1)
        self.bidir.allocate()
        self.simple.initialize()
        self.bidir.children[0].params[0].set_value(
            self.simple.params[0].get_value())
        self.bidir.children[1].params[0].set_value(
            self.simple.params[0].get_value())
        self.x_val = 0.1 * numpy.asarray(
            list(itertools.permutations(range(4))),
            dtype=floatX)
        self.x_val = (numpy.ones((24, 4, 3), dtype=floatX) *
                      self.x_val[..., None])
        self.mask_val = numpy.ones((24, 4), dtype=floatX)
        self.mask_val[12:24, 3] = 0

    def test(self):
        x = tensor.tensor3('x')
        mask = tensor.matrix('mask')
        calc_bidir = theano.function([x, mask],
                                     [self.bidir.apply(x, mask=mask)])
        calc_simple = theano.function([x, mask],
                                      [self.simple.apply(x, mask=mask)])
        h_bidir = calc_bidir(self.x_val, self.mask_val)[0]
        h_simple = calc_simple(self.x_val, self.mask_val)[0]
        h_simple_rev = calc_simple(self.x_val[::-1], self.mask_val[::-1])[0]

        assert_allclose(h_simple, h_bidir[..., :3], rtol=1e-04)
        assert_allclose(h_simple_rev, h_bidir[::-1, ...,  3:], rtol=1e-04)
Ejemplo n.º 2
0
def example():
    """ Simple reccurent example. Taken from : https://github.com/mdda/pycon.sg-2015_deep-learning/blob/master/ipynb/blocks-recurrent-docs.ipynb """
    x = tensor.tensor3('x')

    rnn = SimpleRecurrent(dim=3, activation=Identity(), weights_init=initialization.Identity())
    rnn.initialize()
    h = rnn.apply(x)

    f = theano.function([x], h)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX))) 

    doubler = Linear(
                 input_dim=3, output_dim=3, weights_init=initialization.Identity(2),
                 biases_init=initialization.Constant(0))
    doubler.initialize()
    h_doubler = rnn.apply(doubler.apply(x))

    f = theano.function([x], h_doubler)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX))) 

    #Initial State
    h0 = tensor.matrix('h0')
    h = rnn.apply(inputs=x, states=h0)

    f = theano.function([x, h0], h)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX),
            np.ones((1, 3), dtype=theano.config.floatX))) 
Ejemplo n.º 3
0
def example():
    """ Simple reccurent example. Taken from : https://github.com/mdda/pycon.sg-2015_deep-learning/blob/master/ipynb/blocks-recurrent-docs.ipynb """
    x = tensor.tensor3('x')

    rnn = SimpleRecurrent(dim=3,
                          activation=Identity(),
                          weights_init=initialization.Identity())
    rnn.initialize()
    h = rnn.apply(x)

    f = theano.function([x], h)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX)))

    doubler = Linear(input_dim=3,
                     output_dim=3,
                     weights_init=initialization.Identity(2),
                     biases_init=initialization.Constant(0))
    doubler.initialize()
    h_doubler = rnn.apply(doubler.apply(x))

    f = theano.function([x], h_doubler)
    print(f(np.ones((3, 1, 3), dtype=theano.config.floatX)))

    #Initial State
    h0 = tensor.matrix('h0')
    h = rnn.apply(inputs=x, states=h0)

    f = theano.function([x, h0], h)
    print(
        f(np.ones((3, 1, 3), dtype=theano.config.floatX),
          np.ones((1, 3), dtype=theano.config.floatX)))
Ejemplo n.º 4
0
def example5():
    """Bidir + simplereccurent. Adaptation from a unittest in blocks """
    
    bidir = Bidirectional(weights_init=Orthogonal(),
                               prototype=SimpleRecurrent(
                                   dim=3, activation=Tanh()))
    
    simple = SimpleRecurrent(dim=3, weights_init=Orthogonal(),
                                  activation=Tanh(), seed=1)
    
    bidir.allocate()
    simple.initialize()
    
    bidir.children[0].parameters[0].set_value(
        
        simple.parameters[0].get_value())
    
    bidir.children[1].parameters[0].set_value(        
        simple.parameters[0].get_value())
    
    #Initialize theano variables and functions
    x = tensor.tensor3('x')
    mask = tensor.matrix('mask')
 
    calc_bidir = theano.function([x, mask],
                                 [bidir.apply(x, mask=mask)])
    calc_simple = theano.function([x, mask],
                                  [simple.apply(x, mask=mask)])
 

    #Testing time
 
    x_val = 0.1 * np.asarray(
        list(itertools.permutations(range(4))),
        dtype=theano.config.floatX)
        
    x_val = (np.ones((24, 4, 3), dtype=theano.config.floatX) *
                  x_val[..., None])
                  
    mask_val = np.ones((24, 4), dtype=theano.config.floatX)
    mask_val[12:24, 3] = 0

    h_bidir = calc_bidir(x_val, mask_val)[0]
    h_simple = calc_simple(x_val, mask_val)[0]
    h_simple_rev = calc_simple(x_val[::-1], mask_val[::-1])[0]
    

    print(h_bidir)
    print(h_simple)
    print(h_simple_rev)
Ejemplo n.º 5
0
class TestSimpleRecurrent(unittest.TestCase):
    def setUp(self):
        self.simple = SimpleRecurrent(dim=3,
                                      weights_init=Constant(2),
                                      activation=Tanh())
        self.simple.initialize()

    def test_one_step(self):
        h0 = tensor.matrix('h0')
        x = tensor.matrix('x')
        mask = tensor.vector('mask')
        h1 = self.simple.apply(x, h0, mask=mask, iterate=False)
        next_h = theano.function(inputs=[h0, x, mask], outputs=[h1])

        h0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]],
                                   dtype=theano.config.floatX)
        x_val = 0.1 * numpy.array([[1, 2, 3], [4, 5, 6]],
                                  dtype=theano.config.floatX)
        mask_val = numpy.array([1, 0]).astype(theano.config.floatX)
        h1_val = numpy.tanh(h0_val.dot(2 * numpy.ones((3, 3))) + x_val)
        h1_val = mask_val[:, None] * h1_val + (1 - mask_val[:, None]) * h0_val
        assert_allclose(h1_val, next_h(h0_val, x_val, mask_val)[0])

    def test_many_steps(self):
        x = tensor.tensor3('x')
        mask = tensor.matrix('mask')
        h = self.simple.apply(x, mask=mask, iterate=True)
        calc_h = theano.function(inputs=[x, mask], outputs=[h])

        x_val = 0.1 * numpy.asarray(list(itertools.permutations(range(4))),
                                    dtype=theano.config.floatX)
        x_val = numpy.ones(
            (24, 4, 3), dtype=theano.config.floatX) * x_val[..., None]
        mask_val = numpy.ones((24, 4), dtype=theano.config.floatX)
        mask_val[12:24, 3] = 0
        h_val = numpy.zeros((25, 4, 3), dtype=theano.config.floatX)
        for i in range(1, 25):
            h_val[i] = numpy.tanh(h_val[i - 1].dot(2 * numpy.ones((3, 3))) +
                                  x_val[i - 1])
            h_val[i] = (mask_val[i - 1, :, None] * h_val[i] +
                        (1 - mask_val[i - 1, :, None]) * h_val[i - 1])
        h_val = h_val[1:]
        assert_allclose(h_val, calc_h(x_val, mask_val)[0], rtol=1e-04)

        # Also test that initial state is a parameter
        initial_state, = VariableFilter(roles=[INITIAL_STATE])(
            ComputationGraph(h))
        assert is_shared_variable(initial_state)
        assert initial_state.name == 'initial_state'
Ejemplo n.º 6
0
class TestSimpleRecurrent(unittest.TestCase):
    def setUp(self):
        self.simple = SimpleRecurrent(dim=3, weights_init=Constant(2),
                                      activation=Tanh())
        self.simple.initialize()

    def test_one_step(self):
        h0 = tensor.matrix('h0')
        x = tensor.matrix('x')
        mask = tensor.vector('mask')
        h1 = self.simple.apply(x, h0, mask=mask, iterate=False)
        next_h = theano.function(inputs=[h0, x, mask], outputs=[h1])

        h0_val = 0.1 * numpy.array([[1, 1, 0], [0, 1, 1]],
                                   dtype=theano.config.floatX)
        x_val = 0.1 * numpy.array([[1, 2, 3], [4, 5, 6]],
                                  dtype=theano.config.floatX)
        mask_val = numpy.array([1, 0]).astype(theano.config.floatX)
        h1_val = numpy.tanh(h0_val.dot(2 * numpy.ones((3, 3))) + x_val)
        h1_val = mask_val[:, None] * h1_val + (1 - mask_val[:, None]) * h0_val
        assert_allclose(h1_val, next_h(h0_val, x_val, mask_val)[0])

    def test_many_steps(self):
        x = tensor.tensor3('x')
        mask = tensor.matrix('mask')
        h = self.simple.apply(x, mask=mask, iterate=True)
        calc_h = theano.function(inputs=[x, mask], outputs=[h])

        x_val = 0.1 * numpy.asarray(list(itertools.permutations(range(4))),
                                    dtype=theano.config.floatX)
        x_val = numpy.ones((24, 4, 3),
                           dtype=theano.config.floatX) * x_val[..., None]
        mask_val = numpy.ones((24, 4), dtype=theano.config.floatX)
        mask_val[12:24, 3] = 0
        h_val = numpy.zeros((25, 4, 3), dtype=theano.config.floatX)
        for i in range(1, 25):
            h_val[i] = numpy.tanh(h_val[i - 1].dot(
                2 * numpy.ones((3, 3))) + x_val[i - 1])
            h_val[i] = (mask_val[i - 1, :, None] * h_val[i] +
                        (1 - mask_val[i - 1, :, None]) * h_val[i - 1])
        h_val = h_val[1:]
        assert_allclose(h_val, calc_h(x_val, mask_val)[0], rtol=1e-04)

        # Also test that initial state is a parameter
        initial_state, = VariableFilter(roles=[INITIAL_STATE])(
            ComputationGraph(h))
        assert is_shared_variable(initial_state)
        assert initial_state.name == 'initial_state'
Ejemplo n.º 7
0
def example5():
    """Bidir + simplereccurent. Adaptation from a unittest in blocks """

    bidir = Bidirectional(weights_init=Orthogonal(),
                          prototype=SimpleRecurrent(dim=3, activation=Tanh()))

    simple = SimpleRecurrent(dim=3,
                             weights_init=Orthogonal(),
                             activation=Tanh(),
                             seed=1)

    bidir.allocate()
    simple.initialize()

    bidir.children[0].parameters[0].set_value(simple.parameters[0].get_value())

    bidir.children[1].parameters[0].set_value(simple.parameters[0].get_value())

    #Initialize theano variables and functions
    x = tensor.tensor3('x')
    mask = tensor.matrix('mask')

    calc_bidir = theano.function([x, mask], [bidir.apply(x, mask=mask)])
    calc_simple = theano.function([x, mask], [simple.apply(x, mask=mask)])

    #Testing time

    x_val = 0.1 * np.asarray(list(itertools.permutations(range(4))),
                             dtype=theano.config.floatX)

    x_val = (np.ones(
        (24, 4, 3), dtype=theano.config.floatX) * x_val[..., None])

    mask_val = np.ones((24, 4), dtype=theano.config.floatX)
    mask_val[12:24, 3] = 0

    h_bidir = calc_bidir(x_val, mask_val)[0]
    h_simple = calc_simple(x_val, mask_val)[0]
    h_simple_rev = calc_simple(x_val[::-1], mask_val[::-1])[0]

    print(h_bidir)
    print(h_simple)
    print(h_simple_rev)
Ejemplo n.º 8
0
class TextRNN(object):

    def __init__(self, dim_in, dim_hidden, dim_out, **kwargs):

        self.dim_in = dim_in
        self.dim_hidden = dim_hidden
        self.dim_out = dim_out

        self.input_layer = Linear(input_dim=self.dim_in, output_dim=self.dim_hidden,
                                weights_init=initialization.IsotropicGaussian(),
                                biases_init=initialization.Constant(0))
        self.input_layer.initialize()

        sparse_init = initialization.Sparse(num_init=15, weights_init=initialization.IsotropicGaussian())
        self.recurrent_layer = SimpleRecurrent(
                                dim=self.dim_hidden, activation=Tanh(), name="first_recurrent_layer",
                                weights_init=sparse_init,
                                biases_init=initialization.Constant(0.01))
        '''
        self.recurrent_layer = LSTM(dim=self.dim_hidden, activation=Tanh(),
                                    weights_init=initialization.IsotropicGaussian(std=0.001),
                                    biases_init=initialization.Constant(0.01))
        '''
        self.recurrent_layer.initialize()

        self.output_layer = Linear(input_dim=self.dim_hidden, output_dim=self.dim_out,
                                weights_init=initialization.Uniform(width=0.01),
                                biases_init=initialization.Constant(0.01))
        self.output_layer.initialize()

        self.children = [self.input_layer, self.recurrent_layer, self.output_layer]

    '''
    @recurrent(sequences=['inputs'], 
            states=['states'],
            contexts=[],
            outputs=['states', 'output'])
    '''

    def run(self, inputs):
        output = self.output_layer.apply( self.recurrent_layer.apply(self.input_layer.apply(inputs)) )
        return output
Ejemplo n.º 9
0
def main(num_epochs=100):
    x = tensor.matrix('features')
    m = tensor.matrix('features_mask')

    x_int = x.astype(dtype='int32').T
    train_dataset = TextFile('inspirational.txt')
    train_dataset.indexables[0] = numpy.array(sorted(
        train_dataset.indexables[0], key=len
    ))

    n_voc = len(train_dataset.dict.keys())

    init_probs = numpy.array(
        [sum(filter(lambda idx:idx == w,
                    [s[0] for s in train_dataset.indexables[
                        train_dataset.sources.index('features')]]
                    )) for w in xrange(n_voc)],
        dtype=theano.config.floatX
    )
    init_probs = init_probs / init_probs.sum()

    n_h = 100
    linear_embedding = LookupTable(
        length=n_voc,
        dim=n_h,
        weights_init=Uniform(std=0.01),
        biases_init=Constant(0.)
    )
    linear_embedding.initialize()
    lstm_biases = numpy.zeros(4 * n_h).astype(dtype=theano.config.floatX)
    lstm_biases[n_h:(2 * n_h)] = 4.
    rnn = SimpleRecurrent(
        dim=n_h,
        activation=Tanh(),
        weights_init=Uniform(std=0.01),
        biases_init=Constant(0.)
    )
    rnn.initialize()
    score_layer = Linear(
        input_dim=n_h,
        output_dim=n_voc,
        weights_init=Uniform(std=0.01),
        biases_init=Constant(0.)
    )
    score_layer.initialize()

    embedding = (linear_embedding.apply(x_int[:-1])
                 * tensor.shape_padright(m.T[1:]))
    rnn_out = rnn.apply(inputs=embedding, mask=m.T[1:])
    probs = softmax(
        sequence_map(score_layer.apply, rnn_out, mask=m.T[1:])[0]
    )
    idx_mask = m.T[1:].nonzero()
    cost = CategoricalCrossEntropy().apply(
        x_int[1:][idx_mask[0], idx_mask[1]],
        probs[idx_mask[0], idx_mask[1]]
    )
    cost.name = 'cost'
    misclassification = MisclassificationRate().apply(
        x_int[1:][idx_mask[0], idx_mask[1]],
        probs[idx_mask[0], idx_mask[1]]
    )
    misclassification.name = 'misclassification'

    cg = ComputationGraph([cost])
    params = cg.parameters

    algorithm = GradientDescent(
        cost=cost,
        params=params,
        step_rule=Adam()
    )

    train_data_stream = Padding(
        data_stream=DataStream(
            dataset=train_dataset,
            iteration_scheme=BatchwiseShuffledScheme(
                examples=train_dataset.num_examples,
                batch_size=10,
            )
        ),
        mask_sources=('features',)
    )

    model = Model(cost)

    extensions = []
    extensions.append(Timing())
    extensions.append(FinishAfter(after_n_epochs=num_epochs))
    extensions.append(TrainingDataMonitoring(
        [cost, misclassification],
        prefix='train',
        after_epoch=True))

    batch_size = 10
    length = 30
    trng = MRG_RandomStreams(18032015)
    u = trng.uniform(size=(length, batch_size, n_voc))
    gumbel_noise = -tensor.log(-tensor.log(u))
    init_samples = (tensor.log(init_probs).dimshuffle(('x', 0))
                    + gumbel_noise[0]).argmax(axis=-1)
    init_states = rnn.initial_state('states', batch_size)

    def sampling_step(g_noise, states, samples_step):
        embedding_step = linear_embedding.apply(samples_step)
        next_states = rnn.apply(inputs=embedding_step,
                                            states=states,
                                            iterate=False)
        probs_step = softmax(score_layer.apply(next_states))
        next_samples = (tensor.log(probs_step)
                        + g_noise).argmax(axis=-1)

        return next_states, next_samples

    [_, samples], _ = theano.scan(
        fn=sampling_step,
        sequences=[gumbel_noise[1:]],
        outputs_info=[init_states, init_samples]
    )

    sampling = theano.function([], samples.owner.inputs[0].T)

    plotters = []
    plotters.append(Plotter(
        channels=[['train_cost', 'train_misclassification']],
        titles=['Costs']))

    extensions.append(PlotManager('Language modelling example',
                                  plotters=plotters,
                                  after_epoch=True,
                                  after_training=True))
    extensions.append(Printing())
    extensions.append(PrintSamples(sampler=sampling,
                                   voc=train_dataset.inv_dict))

    main_loop = MainLoop(model=model,
                         data_stream=train_data_stream,
                         algorithm=algorithm,
                         extensions=extensions)

    main_loop.run()
Ejemplo n.º 10
0
                           weights_init=initialization.Uniform(width=0.01),
                           biases_init=Constant(0))
lookup_input.initialize()

linear_input = Linear(name='linear_input',
                      input_dim=hidden_layer_dim,
                      output_dim=hidden_layer_dim,
                      weights_init=initialization.Uniform(width=0.01),
                      biases_init=Constant(0))
linear_input.initialize()

rnn = SimpleRecurrent(name='hidden',
                      dim=hidden_layer_dim,
                      activation=Tanh(),
                      weights_init=initialization.Uniform(width=0.01))
rnn.initialize()

linear_output = Linear(name='linear_output',
                       input_dim=hidden_layer_dim,
                       output_dim=train_dataset.durations_vocab_size(),
                       weights_init=initialization.Uniform(width=0.01),
                       biases_init=Constant(0))
linear_output.initialize()

softmax = NDimensionalSoftmax(name='ndim_softmax')

activation_input = lookup_input.apply(x)
hidden = rnn.apply(linear_input.apply(activation_input))
activation_output = linear_output.apply(hidden)
y_est = softmax.apply(activation_output, extra_ndim=1)
Ejemplo n.º 11
0
lookup_input.initialize()

linear_input = Linear(
    name='linear_input',
    input_dim=hidden_layer_dim,
    output_dim=hidden_layer_dim,
    weights_init=initialization.Uniform(width=0.01),
    biases_init=Constant(0))
linear_input.initialize()

rnn = SimpleRecurrent(
    name='hidden',
    dim=hidden_layer_dim,
    activation=Tanh(),
    weights_init=initialization.Uniform(width=0.01))
rnn.initialize()

linear_output = Linear(
    name='linear_output',
    input_dim=hidden_layer_dim,
    output_dim=charset_size,
    weights_init=initialization.Uniform(width=0.01),
    biases_init=Constant(0))
linear_output.initialize()

softmax = NDimensionalSoftmax(name='ndim_softmax')

activation_input = lookup_input.apply(x)
hidden = rnn.apply(linear_input.apply(activation_input))
activation_output = linear_output.apply(hidden)
y_est = softmax.apply(activation_output, extra_ndim=1)
Ejemplo n.º 12
0
class ETHM(EUTHM):
    '''Model with only textual-hashtag information'''
    def __init__(self, config, dataset, *args, **kwargs):
        super(ETHM, self).__init__(config, dataset)

    def _build_model(self, *args, **kwargs):
        # Define inputs
        self._define_inputs()
        self._build_bricks()
        self._set_OV_value()
        # Transpose text
        self.text = self.text.dimshuffle(1, 0)
        self.text_mask = self.text_mask.dimshuffle(1, 0)
        self.sparse_word = self.sparse_word.dimshuffle(1, 0)
        self.sparse_word_mask = self.sparse_word_mask.dimshuffle(1, 0)
        # Turn word, and hashtag into vector representation
        text_vec = self.word_embed.apply(self.text)
        # Apply word and hashtag word and url
        text_vec = self._apply_hashtag_word(text_vec)
        text_vec = self._apply_sparse_word(text_vec)
        # Encode text
        mlstm_hidden, mlstm_cell = self.mlstm.apply(
            inputs=self.mlstm_ins.apply(text_vec),
            mask=self.text_mask.astype(theano.config.floatX))
        text_encodes = mlstm_hidden[-1]
        input_vec = text_encodes
        self._get_cost(input_vec, None, None)

    def _define_inputs(self, *args, **kwargs):
        self.hashtag = tensor.ivector('hashtag')
        self.text = tensor.imatrix('text')
        self.text_mask = tensor.matrix('text_mask', dtype=theano.config.floatX)
        self.hashtag_word = tensor.ivector('hashtag_word')
        self.hashtag_sparse_mask = tensor.vector('hashtag_word_sparse_mask',
                                                 dtype=theano.config.floatX)
        self.hashtag_word_left_idx = tensor.ivector(
            'hashtag_word_idx_left_idx')
        self.hashtag_word_right_idx = tensor.ivector(
            'hashtag_word_idx_right_idx')
        self.sparse_word = tensor.imatrix('sparse_word')
        self.sparse_word_sparse_mask = tensor.vector(
            'sparse_word_sparse_mask', dtype=theano.config.floatX)
        self.sparse_word_mask = tensor.matrix('sparse_word_mask',
                                              dtype=theano.config.floatX)
        self.sparse_word_left_idx = tensor.ivector('sparse_word_idx_left_idx')
        self.sparse_word_right_idx = tensor.ivector(
            'sparse_word_idx_right_idx')

    def _build_bricks(self, *args, **kwargs):
        # Build lookup tables
        self.word_embed = self._embed(len(self.dataset.word2index),
                                      self.config.word_embed_dim,
                                      name='word_embed')

        self.hashtag_embed = self._embed(len(self.dataset.hashtag2index),
                                         self.config.lstm_dim,
                                         name='hashtag_embed')
        # Build text encoder
        self.mlstm_ins = Linear(input_dim=self.config.word_embed_dim,
                                output_dim=4 * self.config.lstm_dim,
                                name='mlstm_in')
        self.mlstm_ins.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) /
            numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim))
        self.mlstm_ins.biases_init = Constant(0)
        self.mlstm_ins.initialize()
        self.mlstm = MLSTM(self.config.lstm_time,
                           self.config.lstm_dim,
                           shared=False)
        self.mlstm.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) /
            numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim))
        self.mlstm.biases_init = Constant(0)
        self.mlstm.initialize()
        self.hashtag2word = MLP(
            activations=[Tanh('hashtag2word_tanh')],
            dims=[self.config.lstm_dim, self.config.word_embed_dim],
            name='hashtag2word_mlp')
        self.hashtag2word.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.hashtag2word.biases_init = Constant(0)
        self.hashtag2word.initialize()
        self.hashtag2word_bias = Bias(dim=1, name='hashtag2word_bias')
        self.hashtag2word_bias.biases_init = Constant(0)
        self.hashtag2word_bias.initialize()
        #Build character embedding
        self.char_embed = self._embed(len(self.dataset.char2index),
                                      self.config.char_embed_dim,
                                      name='char_embed')
        # Build sparse word encoder
        self.rnn_ins = Linear(input_dim=self.config.char_embed_dim,
                              output_dim=self.config.word_embed_dim,
                              name='rnn_in')
        self.rnn_ins.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) / numpy.sqrt(self.config.char_embed_dim +
                                           self.config.word_embed_dim))
        self.rnn_ins.biases_init = Constant(0)
        self.rnn_ins.initialize()
        self.rnn = SimpleRecurrent(dim=self.config.word_embed_dim,
                                   activation=Tanh())
        self.rnn.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.rnn.initialize()

    def _apply_dropout(self, outputs, *args, **kwargs):
        variables = [self.word_embed.W, self.hashtag_embed.W]
        cgs = ComputationGraph(outputs)
        cg_dropouts = apply_dropout(cgs,
                                    variables,
                                    drop_prob=self.config.dropout_prob,
                                    seed=123).outputs
        return cg_dropouts

    def _apply_reg(self, cost, params=None, *args, **kwargs):
        try:
            if self.config.l2_norm > 0:
                cost = cost + self.config.l2_norm * theano_expressions.l2_norm(
                    tensors=[self.hashtag_embed.W, self.word_embed.W])**2

            else:
                pass
        except Exception:
            pass
        return cost
Ejemplo n.º 13
0
class EUTHM(UTHM):
    '''
    UTH model with extend information
    '''
    def __init__(self, config, dataset, *args, **kwargs):
        super(EUTHM, self).__init__(config, dataset)

    def _define_inputs(self, *args, **kwargs):
        super(EUTHM, self)._define_inputs()
        self.user_word = tensor.ivector('user_word')
        self.user_word_sparse_mask = tensor.vector('user_word_sparse_mask',
                                                   dtype=theano.config.floatX)
        self.user_word_left_idx = tensor.ivector('user_word_idx_left_idx')
        self.user_word_right_idx = tensor.ivector('user_word_idx_right_idx')
        self.hashtag_word = tensor.ivector('hashtag_word')
        self.hashtag_sparse_mask = tensor.vector('hashtag_word_sparse_mask',
                                                 dtype=theano.config.floatX)
        self.hashtag_word_left_idx = tensor.ivector(
            'hashtag_word_idx_left_idx')
        self.hashtag_word_right_idx = tensor.ivector(
            'hashtag_word_idx_right_idx')
        self.sparse_word = tensor.imatrix('sparse_word')
        self.sparse_word_sparse_mask = tensor.vector(
            'sparse_word_sparse_mask', dtype=theano.config.floatX)
        self.sparse_word_mask = tensor.matrix('sparse_word_mask',
                                              dtype=theano.config.floatX)
        self.sparse_word_left_idx = tensor.ivector('sparse_word_idx_left_idx')
        self.sparse_word_right_idx = tensor.ivector(
            'sparse_word_idx_right_idx')

    def _build_bricks(self, *args, **kwargs):
        # Build lookup tables
        super(EUTHM, self)._build_bricks()
        self.user2word = MLP(
            activations=[Tanh('user2word_tanh')],
            dims=[self.config.user_embed_dim, self.config.word_embed_dim],
            name='user2word_mlp')
        self.user2word.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.user2word.biases_init = Constant(0)
        self.user2word.initialize()
        self.hashtag2word = MLP(
            activations=[Tanh('hashtag2word_tanh')],
            dims=[
                self.config.user_embed_dim + self.config.word_embed_dim,
                self.config.word_embed_dim
            ],
            name='hashtag2word_mlp')
        self.hashtag2word.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.hashtag2word.biases_init = Constant(0)
        self.hashtag2word.initialize()
        self.user2word_bias = Bias(dim=1, name='user2word_bias')
        self.user2word_bias.biases_init = Constant(0)
        self.user2word_bias.initialize()
        self.hashtag2word_bias = Bias(dim=1, name='hashtag2word_bias')
        self.hashtag2word_bias.biases_init = Constant(0)
        self.hashtag2word_bias.initialize()
        #Build character embedding
        self.char_embed = self._embed(len(self.dataset.char2index),
                                      self.config.char_embed_dim,
                                      name='char_embed')
        # Build sparse word encoder
        self.rnn_ins = Linear(input_dim=self.config.char_embed_dim,
                              output_dim=self.config.word_embed_dim,
                              name='rnn_in')
        self.rnn_ins.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) / numpy.sqrt(self.config.char_embed_dim +
                                           self.config.word_embed_dim))
        self.rnn_ins.biases_init = Constant(0)
        self.rnn_ins.initialize()
        self.rnn = SimpleRecurrent(dim=self.config.word_embed_dim,
                                   activation=Tanh())
        self.rnn.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.rnn.initialize()

    def _set_OV_value(self, *args, **kwargs):
        '''Train a <unk> representation'''
        tensor.set_subtensor(
            self.char_embed.W[self.dataset.char2index['<unk>']],
            numpy.zeros(self.config.char_embed_dim,
                        dtype=theano.config.floatX))

    def _get_text_vec(self, *args, **kwargs):
        # Transpose text
        self.text = self.text.dimshuffle(1, 0)
        self.text_mask = self.text_mask.dimshuffle(1, 0)
        self.sparse_word = self.sparse_word.dimshuffle(1, 0)
        self.sparse_word_mask = self.sparse_word_mask.dimshuffle(1, 0)
        # Turn word, user and hashtag into vector representation
        text_vec = self.word_embed.apply(self.text)
        # Apply user word, hashtag word and url
        text_vec = self._apply_user_word(text_vec)
        text_vec = self._apply_hashtag_word(text_vec)
        text_vec = self._apply_sparse_word(text_vec)
        return text_vec

    @abstractmethod
    def _apply_user_word(self, text_vec, *args, **kwargs):
        '''
        Replace @a with transformed author vector
        :param text_vec:
        :param args:
        :param kwargs:
        :return:
        '''
        user_word_vec = self.user2word.apply(self.user_embed.apply(self.user_word)) + \
                        self.user2word_bias.parameters[0][0]
        text_vec = tensor.set_subtensor(
            text_vec[self.user_word_right_idx, self.user_word_left_idx],
            text_vec[self.user_word_right_idx, self.user_word_left_idx] *
            (1 - self.user_word_sparse_mask[:, None]) +
            user_word_vec * self.user_word_sparse_mask[:, None])
        return text_vec

    @abstractmethod
    def _apply_hashtag_word(self, text_vec, *args, **kwargs):
        '''
        Replace #h with transformed hashtag vector
        :param text_vec:
        :param args:
        :param kwargs:
        :return:
        '''
        hashtag_word_vec = self.hashtag2word.apply(self.hashtag_embed.apply(self.hashtag_word)) +\
                           self.hashtag2word_bias.parameters[0][0]
        text_vec = tensor.set_subtensor(
            text_vec[self.hashtag_word_right_idx, self.hashtag_word_left_idx],
            text_vec[self.hashtag_word_right_idx, self.hashtag_word_left_idx] *
            (1 - self.hashtag_sparse_mask[:, None]) +
            hashtag_word_vec * self.hashtag_sparse_mask[:, None])
        return text_vec

    @abstractmethod
    def _apply_sparse_word(self, text_vec, *args, **kwargs):
        '''
        Replace sparse word encoding with character embedding. (maybe lstm)
        :param text_vec:
        :param args:
        :param kwargs:
        :return:
        '''
        sparse_word_vec = self.char_embed.apply(self.sparse_word)
        sparse_word_hiddens = self.rnn.apply(
            inputs=self.rnn_ins.apply(sparse_word_vec),
            mask=self.sparse_word_mask)
        tmp = sparse_word_hiddens[-1]
        text_vec = tensor.set_subtensor(
            text_vec[self.sparse_word_right_idx, self.sparse_word_left_idx],
            text_vec[self.sparse_word_right_idx, self.sparse_word_left_idx] *
            (1 - self.sparse_word_sparse_mask[:, None]) +
            tmp * self.sparse_word_sparse_mask[:, None])
        return text_vec
Ejemplo n.º 14
0
#lstm = GatedRecurrent(dim=h_dim,
#                      activation=Tanh())

decode = Linear(name='decode',
                input_dim=h_dim,
                output_dim=1)

for brick in (encode, gates, decode):
    brick.weights_init = IsotropicGaussian(0.01)
    brick.biases_init = Constant(0.)
    brick.initialize()

lstm.weights_init = IsotropicGaussian(0.01)
#lstm.weights_init = Orthogonal()
lstm.biases_init = Constant(0.)
lstm.initialize()

#ComputationGraph(encode.apply(x)).get_theano_function()(features_test)[0].shape
#ComputationGraph(lstm.apply(encoded)).get_theano_function()(features_test)
#ComputationGraph(decode.apply(hiddens[-1])).get_theano_function()(features_test)[0].shape

#ComputationGraph(SquaredError().apply(y, y_hat.flatten())).get_theano_function()(features_test, targets_test)[0].shape

encoded = encode.apply(x)
#hiddens = lstm.apply(encoded, gates.apply(x))
hiddens = lstm.apply(encoded)
y_hat  = decode.apply(hiddens[-1])

cost = SquaredError().apply(y, y_hat)
cost.name = 'cost'
Ejemplo n.º 15
0
    def __init__(self, rnn_dims, num_actions, data_X_np=None, data_y_np=None, width=32, height=32):
        ###############################################################
        #
        #       Network and data setup
        #
        ##############################################################
        RNN_DIMS = 100
        NUM_ACTIONS = num_actions

        tensor5 = T.TensorType('float32', [False, True, True, True, True])
        self.x = T.tensor4('features')
        self.reward = T.tensor3('targets', dtype='float32')
        self.state = T.matrix('states', dtype='float32')

        self.hidden_states = [] # holds hidden states in np array form

        
        #data_X & data_Y supplied in init function now...

        if data_X_np is None or data_y_np is None:
            print 'you did not supply data at init'
            data_X_np = np.float32(np.random.normal(size=(1280, 1,1, width, height)))
            data_y_np = np.float32(np.random.normal(size=(1280, 1,1,1)))
        #data_states_np = np.float32(np.ones((1280, 1, 100)))
        state_shape = (data_X_np.shape[0],rnn_dims)
        self.data_states_np = np.float32(np.zeros(state_shape))


        self.datastream = IterableDataset(dict(features=data_X_np,
                                            targets=data_y_np,
                                            states=self.data_states_np)).get_example_stream()
        self.datastream_test = IterableDataset(dict(features=data_X_np,
                                            targets=data_y_np,
                                            states=self.data_states_np)).get_example_stream()
        data_X = self.datastream


        # 2 conv inputs
        # we want to take our sequence of input images and convert them to convolutional
        # representations
        conv_layers = [ConvolutionalLayer(Rectifier().apply, (3, 3), 16, (2, 2), name='l1'),
                       ConvolutionalLayer(Rectifier().apply, (3, 3), 32, (2, 2), name='l2'),
                       ConvolutionalLayer(Rectifier().apply, (3, 3), 64, (2, 2), name='l3'),
                       ConvolutionalLayer(Rectifier().apply, (3, 3), 128, (2, 2), name='l4'),
                       ConvolutionalLayer(Rectifier().apply, (3, 3), 128, (2, 2), name='l5'),
                       ConvolutionalLayer(Rectifier().apply, (3, 3), 128, (2, 2), name='l6')]
        convnet = ConvolutionalSequence(conv_layers, num_channels=4,
                                        image_size=(width, height),
                                        weights_init=init.Uniform(0, 0.01),
                                        biases_init=init.Constant(0.0),
                                        tied_biases=False,
                                        border_mode='full')
        convnet.initialize()
        output_dim = np.prod(convnet.get_dim('output'))

        conv_out = convnet.apply(self.x)

        reshape_dims = (conv_out.shape[0], conv_out.shape[1]*conv_out.shape[2]*conv_out.shape[3])
        hidden_repr = conv_out.reshape(reshape_dims)
        conv2rnn = Linear(input_dim=output_dim, output_dim=RNN_DIMS, 
                            weights_init=init.Uniform(width=0.01),
                            biases_init=init.Constant(0.))
        conv2rnn.initialize()
        conv2rnn_output = conv2rnn.apply(hidden_repr)

        # RNN hidden layer
        # then we want to feed those conv representations into an RNN
        rnn = SimpleRecurrent(dim=RNN_DIMS, activation=Rectifier(), weights_init=init.Uniform(width=0.01))
        rnn.initialize()
        self.learned_state = rnn.apply(inputs=conv2rnn_output, states=self.state, iterate=False)


        # linear output from hidden layer
        # the RNN has two outputs, but only this one has a target. That is, this is "expected return"
        # which the network attempts to minimize difference between expected return and actual return
        lin_output = Linear(input_dim=RNN_DIMS, output_dim=1, 
                            weights_init=init.Uniform(width=0.01),
                            biases_init=init.Constant(0.))
        lin_output.initialize()
        self.exp_reward = lin_output.apply(self.learned_state)
        self.get_exp_reward = theano.function([self.x, self.state], self.exp_reward)

        # softmax output from hidden layer
        # this provides a softmax of action recommendations
        # the hypothesis is that adjusting the other outputs magically influences this set of outputs
        # to suggest smarter (or more realistic?) moves
        action_output = Linear(input_dim=RNN_DIMS, output_dim=NUM_ACTIONS, 
                            weights_init=init.Constant(.001), 
                            biases_init=init.Constant(0.))
        action_output.initialize()

        self.suggested_actions = Softmax().apply(action_output.apply(self.learned_state[-1]))

        ######################
        # use this to get suggested actions... it requires the state of the hidden units from the previous
        # timestep
        #####################
        self.get_suggested_actions = theano.function([self.x, self.state], [self.suggested_actions, self.learned_state])
Ejemplo n.º 16
0
def main(save_to, num_epochs):
    batch_size = 128
    dim = 100
    n_steps = 20
    i2h1 = MLP([Identity()], [784, dim], biases_init=Constant(0.), weights_init=IsotropicGaussian(.001))
    h2o1 = MLP([Rectifier(), Logistic()], [dim, dim, 784],
               biases_init=Constant(0.), weights_init=IsotropicGaussian(.001))
    rec1 = SimpleRecurrent(dim=dim, activation=Tanh(), weights_init=Orthogonal())
    i2h1.initialize()
    h2o1.initialize()
    rec1.initialize()

    x = tensor.tensor3('features')
    x1 = x[1:, :, :]
    x2 = x[:-1, :, :]

    preproc = i2h1.apply(x1)
    h1 = rec1.apply(preproc)
    x_hat = h2o1.apply(h1)
    cost = tensor.nnet.binary_crossentropy(x_hat, x2).mean()
    # cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    cost.name = 'final_cost'

    cg = ComputationGraph([cost, ])

    mnist_train = MNIST("train", subset=slice(0, 50000), sources=('features', ))
    mnist_valid = MNIST("train", subset=slice(50000, 60000), sources=('features',))
    mnist_test = MNIST("test")
    trainstream = Mapping(Flatten(DataStream(mnist_train,
                          iteration_scheme=SequentialScheme(50000, batch_size))),
                          _meanize(n_steps))
    validstream = Mapping(Flatten(DataStream(mnist_valid,
                                             iteration_scheme=SequentialScheme(10000,
                                                                               batch_size))),
                          _meanize(n_steps))
    teststream = Mapping(Flatten(DataStream(mnist_test,
                                            iteration_scheme=SequentialScheme(10000,
                                                                              batch_size))),
                         _meanize(n_steps))

    algorithm = GradientDescent(
        cost=cost, params=cg.parameters,
        step_rule=CompositeRule([Adam(), StepClipping(100)]))
    main_loop = MainLoop(
        algorithm,
        trainstream,
        extensions=[Timing(),
                    FinishAfter(after_n_epochs=num_epochs),
                    # DataStreamMonitoring(
                    #     [cost, ],
                    #     teststream,
                    #     prefix="test"),
                    DataStreamMonitoringAndSaving(
                    [cost, ],
                    validstream,
                    [i2h1, h2o1, rec1],
                    'best_'+save_to+'.pkl',
                    cost_name=cost.name,
                    after_epoch=True,
                    prefix='valid'),
                    TrainingDataMonitoring(
                        [cost,
                         aggregation.mean(algorithm.total_gradient_norm)],
                        prefix="train",
                        after_epoch=True),
                    # Plot(
                    #     save_to,
                    #     channels=[
                    #         ['test_final_cost',
                    #          'test_misclassificationrate_apply_error_rate'],
                    #         ['train_total_gradient_norm']]),
                    Printing()])
    main_loop.run()
Ejemplo n.º 17
0
lstm = SimpleRecurrent(dim=h_dim, activation=Tanh())

#lstm = GatedRecurrent(dim=h_dim,
#                      activation=Tanh())

decode = Linear(name='decode', input_dim=h_dim, output_dim=1)

for brick in (encode, gates, decode):
    brick.weights_init = IsotropicGaussian(0.01)
    brick.biases_init = Constant(0.)
    brick.initialize()

lstm.weights_init = IsotropicGaussian(0.01)
#lstm.weights_init = Orthogonal()
lstm.biases_init = Constant(0.)
lstm.initialize()

#ComputationGraph(encode.apply(x)).get_theano_function()(features_test)[0].shape
#ComputationGraph(lstm.apply(encoded)).get_theano_function()(features_test)
#ComputationGraph(decode.apply(hiddens[-1])).get_theano_function()(features_test)[0].shape

#ComputationGraph(SquaredError().apply(y, y_hat.flatten())).get_theano_function()(features_test, targets_test)[0].shape

encoded = encode.apply(x)
#hiddens = lstm.apply(encoded, gates.apply(x))
hiddens = lstm.apply(encoded)
y_hat = decode.apply(hiddens[-1])

cost = SquaredError().apply(y, y_hat)
cost.name = 'cost'