Example #1
0
def jointModelOutput(num_sub_activities, num_affordances,
                     num_sub_activities_anticipation,
                     num_affordances_anticipation, inputJointFeatures,
                     inputHumanFeatures, inputObjectFeatures):

    shared_input_layer = TemporalInputFeatures(inputJointFeatures)
    shared_hidden_layer = LSTM('tanh', 'sigmoid', 'orthogonal', 4, 128)
    #shared_hidden_layer = simpleRNN('tanh','orthogonal',4,128)
    shared_layers = [shared_input_layer, shared_hidden_layer]
    human_layers = [
        ConcatenateFeatures(inputHumanFeatures),
        LSTM('tanh', 'sigmoid', 'orthogonal', 4, 256)
    ]
    object_layers = [
        ConcatenateFeatures(inputObjectFeatures),
        LSTM('tanh', 'sigmoid', 'orthogonal', 4, 256)
    ]

    human_anticipation = [softmax(num_sub_activities_anticipation)]
    human_detection = [softmax(num_sub_activities)]

    object_anticipation = [softmax(num_affordances_anticipation)]
    object_detection = [softmax(num_affordances)]

    trY_1_detection = T.lmatrix()
    trY_2_detection = T.lmatrix()
    trY_1_anticipation = T.lmatrix()
    trY_2_anticipation = T.lmatrix()
    sharedrnn = SharedRNNOutput(shared_layers, human_layers, object_layers,
                                human_detection, human_anticipation,
                                object_detection, object_anticipation,
                                softmax_loss, trY_1_detection, trY_2_detection,
                                trY_1_anticipation, trY_2_anticipation, 1e-3)
    return sharedrnn
Example #2
0
def jointModelVectors(num_sub_activities, num_affordances, inputJointFeatures,
                      inputHumanFeatures, inputObjectFeatures):
    shared_input_layer = TemporalInputFeatures(inputJointFeatures)
    shared_hidden_layer = LSTM('tanh', 'sigmoid', 'orthogonal', 4, 128)
    shared_layers = [shared_input_layer, shared_hidden_layer]

    human_layers = [
        TemporalInputFeatures(inputHumanFeatures),
        LSTM('tanh', 'sigmoid', 'orthogonal', 4, 256)
    ]
    human_activity_classification = [
        ConcatenateVectors(),
        softmax(num_sub_activities)
    ]

    object_layers = [
        TemporalInputFeatures(inputObjectFeatures),
        LSTM('tanh', 'sigmoid', 'orthogonal', 4, 256)
    ]
    object_affordance_classification = [
        ConcatenateVectors(), softmax(num_affordances)
    ]

    trY_1 = T.lmatrix()
    trY_2 = T.lmatrix()
    sharedrnn = SharedRNNVectors(shared_layers, human_layers, object_layers,
                                 human_activity_classification,
                                 object_affordance_classification,
                                 softmax_loss, trY_1, trY_2, 1e-3)
    return sharedrnn
Example #3
0
def DRAmodelnoedge(nodeList,edgeList,edgeListComplete,edgeFeatures,nodeFeatures,nodeToEdgeConnections,clipnorm=25.0,train_for='joint'):
	edgeRNNs = {}
	edgeTypes = edgeList
	lstm_init = 'orthogonal'
	softmax_init = 'uniform'
	
	rng = np.random.RandomState(1234567890)

	for et in edgeTypes:
		inputJointFeatures = edgeFeatures[et]
		print inputJointFeatures
		edgeRNNs[et] = [TemporalInputFeatures(inputJointFeatures)] #128

	nodeRNNs = {}
	nodeTypes = nodeList.keys()
	nodeLabels = {}
	outputLayer = {}
	for nt in nodeTypes:
		num_classes = nodeList[nt]
		#nodeRNNs[nt] = [LSTM('tanh','sigmoid',lstm_init,truncate_gradient=4,size=256,rng=rng),softmax(num_classes,softmax_init,rng=rng)] #256
		nodeRNNs[nt] = [LSTM('tanh','sigmoid',lstm_init,truncate_gradient=4,size=args.nodeRNN_size,rng=rng)] #256
		if train_for=='joint':
			nodeLabels[nt] = {}
			nodeLabels[nt]['detection'] = T.lmatrix()
			nodeLabels[nt]['anticipation'] = T.lmatrix()
			outputLayer[nt] = [softmax(num_classes,softmax_init,rng=rng),softmax(num_classes+1,softmax_init,rng=rng)]
		else:
			nodeLabels[nt] = T.lmatrix()
			outputLayer[nt] = [softmax(num_classes,softmax_init,rng=rng)]
		et = nt+'_input'
		edgeRNNs[et] = [TemporalInputFeatures(nodeFeatures[nt])]
	learning_rate = T.fscalar()
	dra = DRAanticipation(edgeRNNs,nodeRNNs,outputLayer,nodeToEdgeConnections,edgeListComplete,softmax_loss,nodeLabels,learning_rate,clipnorm,train_for=train_for)
	return dra
Example #4
0
def test_maxpool_layer_forward_pass():
    W_emb = [[0, 0, 0, 0, 1],
             [0, 0, 0, 1, 0],
             [0, 0, 1, 0, 0],
             [0, 1, 0, 0, 0]]
    W_emb = np.array(W_emb)

    W_dense = [[0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
               [0, 0, 0, 1, 0, 0, 0, 0,-0.5, 0],
               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
    W_dense = np.array(W_dense, dtype=float).T

    bounds = T.lmatrix('bounds')
    X = T.lmatrix('X')

    l_in1 = InputLayer((None, 2), input_var=bounds)
    l_in2 = InputLayer((None, 2), input_var=X)
        
    h1 = lasagne.layers.EmbeddingLayer(l_in2, input_size=4, output_size=5, W=W_emb)
    h2 = lasagne.layers.FlattenLayer(h1)

    h3 = lasagne.layers.DenseLayer(h2, num_units=5, nonlinearity=rectify, W=W_dense)

    l_pool = MaxpoolLayer([l_in1, h3])

    predictions = get_output(l_pool)

    pred_func = theano.function([bounds, X], predictions, allow_input_downcast=True, on_unused_input='warn')

    test_bounds = np.array([[0, 4]])
    test_X = np.array([[0, 1], [0, 0], [1, 1], [3, 3]])

    print pred_func(test_bounds, test_X)
Example #5
0
def multMatVect(v, A, m1, B, m2):
    # TODO : need description for parameter and return
    """
    Multiply the first half of v by A with a modulo of m1 and the second half
    by B with a modulo of m2.

    Notes
    -----
    The parameters of dot_modulo are passed implicitly because passing them
    explicitly takes more time than running the function's C-code.

    """
    if multMatVect.dot_modulo is None:
        A_sym = tensor.lmatrix('A')
        s_sym = tensor.ivector('s')
        m_sym = tensor.iscalar('m')
        A2_sym = tensor.lmatrix('A2')
        s2_sym = tensor.ivector('s2')
        m2_sym = tensor.iscalar('m2')
        o = DotModulo()(A_sym, s_sym, m_sym, A2_sym, s2_sym, m2_sym)
        multMatVect.dot_modulo = function(
            [A_sym, s_sym, m_sym, A2_sym, s2_sym, m2_sym], o, profile=False)

    # This way of calling the Theano fct is done to bypass Theano overhead.
    f = multMatVect.dot_modulo
    f.input_storage[0].storage[0] = A
    f.input_storage[1].storage[0] = v[:3]
    f.input_storage[2].storage[0] = m1
    f.input_storage[3].storage[0] = B
    f.input_storage[4].storage[0] = v[3:]
    f.input_storage[5].storage[0] = m2
    f.fn()
    r = f.output_storage[0].storage[0]

    return r
Example #6
0
def multMatVect(v, A, m1, B, m2):
    """
    multiply the first half of v by A with a modulo of m1
    and the second half by B with a modulo of m2

    Note: The parameters of dot_modulo are passed implicitly because passing
    them explicitly takes more time then running the function's C-code.
    """
    if multMatVect.dot_modulo is None:
        A_sym = tensor.lmatrix("A")
        s_sym = tensor.ivector("s")
        m_sym = tensor.iscalar("m")
        A2_sym = tensor.lmatrix("A2")
        s2_sym = tensor.ivector("s2")
        m2_sym = tensor.iscalar("m2")
        o = DotModulo()(A_sym, s_sym, m_sym, A2_sym, s2_sym, m2_sym)
        multMatVect.dot_modulo = function([A_sym, s_sym, m_sym, A2_sym, s2_sym, m2_sym], o)

    # This way of calling the Theano fct is done to bypass Theano overhead.
    f = multMatVect.dot_modulo
    f.input_storage[0].storage[0] = A
    f.input_storage[1].storage[0] = v[:3]
    f.input_storage[2].storage[0] = m1
    f.input_storage[3].storage[0] = B
    f.input_storage[4].storage[0] = v[3:]
    f.input_storage[5].storage[0] = m2
    f.fn()
    r = f.output_storage[0].storage[0]

    return r
Example #7
0
def test_multMatVect():
    A1 = tensor.lmatrix('A1')
    s1 = tensor.ivector('s1')
    m1 = tensor.iscalar('m1')
    A2 = tensor.lmatrix('A2')
    s2 = tensor.ivector('s2')
    m2 = tensor.iscalar('m2')

    g0 = rng_mrg.DotModulo()(A1, s1, m1, A2, s2, m2)
    f0 = theano.function([A1, s1, m1, A2, s2, m2], g0)

    i32max = numpy.iinfo(numpy.int32).max

    A1 = numpy.random.randint(0, i32max, (3, 3)).astype('int64')
    s1 = numpy.random.randint(0, i32max, 3).astype('int32')
    m1 = numpy.asarray(numpy.random.randint(i32max), dtype="int32")
    A2 = numpy.random.randint(0, i32max, (3, 3)).astype('int64')
    s2 = numpy.random.randint(0, i32max, 3).astype('int32')
    m2 = numpy.asarray(numpy.random.randint(i32max), dtype="int32")

    f0.input_storage[0].storage[0] = A1
    f0.input_storage[1].storage[0] = s1
    f0.input_storage[2].storage[0] = m1
    f0.input_storage[3].storage[0] = A2
    f0.input_storage[4].storage[0] = s2
    f0.input_storage[5].storage[0] = m2

    r_a1 = rng_mrg.matVecModM(A1, s1, m1)
    r_a2 = rng_mrg.matVecModM(A2, s2, m2)
    f0.fn()
    r_b = f0.output_storage[0].value

    assert numpy.allclose(r_a1, r_b[:3])
    assert numpy.allclose(r_a2, r_b[3:])
def jointModelOutput(num_sub_activities, num_affordances, num_sub_activities_anticipation, 
		num_affordances_anticipation, inputJointFeatures, inputHumanFeatures, inputObjectFeatures):

	shared_input_layer = TemporalInputFeatures(inputJointFeatures)
	shared_hidden_layer = LSTM('tanh','sigmoid','orthogonal',4,128)
	#shared_hidden_layer = simpleRNN('tanh','orthogonal',4,128)
	shared_layers = [shared_input_layer,shared_hidden_layer]
	human_layers = [ConcatenateFeatures(inputHumanFeatures),LSTM('tanh','sigmoid','orthogonal',4,256)]
	object_layers = [ConcatenateFeatures(inputObjectFeatures),LSTM('tanh','sigmoid','orthogonal',4,256)]

	human_anticipation = [softmax(num_sub_activities_anticipation)]
	human_detection = [softmax(num_sub_activities)]

	object_anticipation = [softmax(num_affordances_anticipation)]
	object_detection = [softmax(num_affordances)]

	trY_1_detection = T.lmatrix()
	trY_2_detection = T.lmatrix()
	trY_1_anticipation = T.lmatrix()
	trY_2_anticipation = T.lmatrix()
	sharedrnn = SharedRNNOutput(
				shared_layers, human_layers, object_layers, 
				human_detection, human_anticipation, object_detection,
				object_anticipation, softmax_loss, trY_1_detection, 
				trY_2_detection,trY_1_anticipation,trY_2_anticipation,1e-3
				)
	return sharedrnn
Example #9
0
def test_blocksparse_grad_merge():
    b = tensor.fmatrix()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    W_val, h_val, iIdx_val, b_val, oIdx_val = blocksparse_data()
    W = float32_shared_constructor(W_val)

    o = sparse_block_gemv_ss(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
    gW = theano.grad(o.sum(), W)

    lr = numpy.asarray(0.05, dtype='float32')

    upd = W - lr * gW

    f1 = theano.function([h, iIdx, b, oIdx], updates=[(W, upd)],
                         mode=mode_with_gpu)
    # not running with mode=gpu ensures that the elemwise is not merged in
    mode = None
    if theano.config.mode == 'FAST_COMPILE':
        mode = theano.compile.mode.get_mode('FAST_RUN')

    f2 = theano.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode)

    f2(h_val, iIdx_val, b_val, oIdx_val)
    W_ref = W.get_value()

    # reset the var
    W.set_value(W_val)
    f1(h_val, iIdx_val, b_val, oIdx_val)
    W_opt = W.get_value()

    utt.assert_allclose(W_ref, W_opt)
Example #10
0
def jointModel(num_sub_activities, num_affordances, inputJointFeatures,
               inputHumanFeatures, inputObjectFeatures):
    lstm_init = 'orthogonal'
    softmax_init = 'uniform'
    rng = np.random.RandomState(1234567890)

    shared_input_layer = TemporalInputFeatures(inputJointFeatures)
    shared_hidden_layer = LSTM('tanh', 'sigmoid', lstm_init, 4, 128, rng=rng)
    #shared_hidden_layer = simpleRNN('tanh','orthogonal',4,128)
    shared_layers = [shared_input_layer, shared_hidden_layer]
    human_layers = [
        ConcatenateFeatures(inputHumanFeatures),
        LSTM('tanh', 'sigmoid', lstm_init, 4, 256, rng=rng),
        softmax(num_sub_activities, softmax_init, rng=rng)
    ]
    object_layers = [
        ConcatenateFeatures(inputObjectFeatures),
        LSTM('tanh', 'sigmoid', lstm_init, 4, 256, rng=rng),
        softmax(num_affordances, softmax_init, rng=rng)
    ]

    trY_1 = T.lmatrix()
    trY_2 = T.lmatrix()
    sharedrnn = SharedRNN(shared_layers, human_layers, object_layers,
                          softmax_loss, trY_1, trY_2, 1e-3)
    return sharedrnn
Example #11
0
def test_multMatVect():
    A1 = tensor.lmatrix('A1')
    s1 = tensor.ivector('s1')
    m1 = tensor.iscalar('m1')
    A2 = tensor.lmatrix('A2')
    s2 = tensor.ivector('s2')
    m2 = tensor.iscalar('m2')

    g0 = rng_mrg.DotModulo()(A1, s1, m1, A2, s2, m2)
    f0 = theano.function([A1, s1, m1, A2, s2, m2], g0)

    i32max = np.iinfo(np.int32).max

    A1 = np.random.randint(0, i32max, (3, 3)).astype('int64')
    s1 = np.random.randint(0, i32max, 3).astype('int32')
    m1 = np.asarray(np.random.randint(i32max), dtype="int32")
    A2 = np.random.randint(0, i32max, (3, 3)).astype('int64')
    s2 = np.random.randint(0, i32max, 3).astype('int32')
    m2 = np.asarray(np.random.randint(i32max), dtype="int32")

    f0.input_storage[0].storage[0] = A1
    f0.input_storage[1].storage[0] = s1
    f0.input_storage[2].storage[0] = m1
    f0.input_storage[3].storage[0] = A2
    f0.input_storage[4].storage[0] = s2
    f0.input_storage[5].storage[0] = m2

    r_a1 = rng_mrg.matVecModM(A1, s1, m1)
    r_a2 = rng_mrg.matVecModM(A2, s2, m2)
    f0.fn()
    r_b = f0.output_storage[0].value

    assert np.allclose(r_a1, r_b[:3])
    assert np.allclose(r_a2, r_b[3:])
Example #12
0
    def train_minibatch_fn(self, evaluate=False):
        """
        Initialize this Theano function once
        """
        X = T.lmatrix('X_train')
        L_x = T.lvector('L_X_train')

        Y = T.lmatrix('Y_train')
        L_y = T.lvector('L_y_train')

        learning_rate = T.dscalar('learning_rate')
        momentum = T.dscalar('momentum')
        weight_decay = T.dscalar('weight_decay')

        loss, accuracy = self.loss(X, L_x, Y, L_y, weight_decay)
        updates = self.get_sgd_updates(loss, learning_rate, momentum)

        outputs = [loss, accuracy]

        if evaluate:
            precision, recall = self.evaluate(X, L_x, Y, L_y)
            outputs = outputs + [precision, recall]

        return theano.function(
            inputs=[X, L_x, Y, L_y, learning_rate, momentum, weight_decay],
            outputs=outputs,
            updates=updates
        )
Example #13
0
def multMatVect(v, A, m1, B, m2):
    # TODO : need description for parameter and return
    """
    Multiply the first half of v by A with a modulo of m1 and the second half
    by B with a modulo of m2.

    Notes
    -----
    The parameters of dot_modulo are passed implicitly because passing them
    explicitly takes more time than running the function's C-code.

    """
    if multMatVect.dot_modulo is None:
        A_sym = tensor.lmatrix("A")
        s_sym = tensor.ivector("s")
        m_sym = tensor.iscalar("m")
        A2_sym = tensor.lmatrix("A2")
        s2_sym = tensor.ivector("s2")
        m2_sym = tensor.iscalar("m2")
        o = DotModulo()(A_sym, s_sym, m_sym, A2_sym, s2_sym, m2_sym)
        multMatVect.dot_modulo = function(
            [A_sym, s_sym, m_sym, A2_sym, s2_sym, m2_sym], o, profile=False)

    # This way of calling the Theano fct is done to bypass Theano overhead.
    f = multMatVect.dot_modulo
    f.input_storage[0].storage[0] = A
    f.input_storage[1].storage[0] = v[:3]
    f.input_storage[2].storage[0] = m1
    f.input_storage[3].storage[0] = B
    f.input_storage[4].storage[0] = v[3:]
    f.input_storage[5].storage[0] = m2
    f.fn()
    r = f.output_storage[0].storage[0]

    return r
Example #14
0
    def arch_memnet_selfsup(self):
        '''
        memory net with self supervision.
        '''
        contexts = T.ltensor3('contexts')
        querys = T.lmatrix('querys')
        yvs = T.lmatrix('yvs')

        params = []
        question_layer = Embed(self.vocab_size, self.hidden_dim)
        q = T.reshape(question_layer(querys.flatten()),
                      (self.batchsize, self.sen_maxlen, self.hidden_dim)
                      )
        if self.kwargs.get('position_encoding'):
            lmat = position_encoding(self.sen_maxlen, self.hidden_dim).dimshuffle('x', 0, 1)
            print '[memory network] use PE'
            q = q * lmat
        u = mean(q, axis=1)
        params.extend(question_layer.params)

        mem_layer = MemoryLayer(self.batchsize, self.mem_size, self.unit_size, self.vocab_size, self.hidden_dim,
                                **self.kwargs)
        probs = mem_layer.get_probs(contexts, u).dimshuffle(0, 2)

        inputs = {
            'contexts': contexts,
            'querys': querys,
            'yvs': yvs,
            'cvs': T.lmatrix('cvs')
        }
        return (probs, inputs, params)
Example #15
0
def get_sampling_model_and_input(exp_config):
    # Create Theano variables
    encoder = BidirectionalEncoder(
        exp_config['src_vocab_size'], exp_config['enc_embed'], exp_config['enc_nhids'])

    decoder = Decoder(
        exp_config['trg_vocab_size'], exp_config['dec_embed'], exp_config['dec_nhids'],
        exp_config['enc_nhids'] * 2,
        loss_function='min_risk'
    )

    # Create Theano variables
    logger.info('Creating theano variables')
    sampling_source_input = tensor.lmatrix('source')
    sampling_target_prefix_input = tensor.lmatrix('target')

    # Get beam search
    logger.info("Building sampling model")
    sampling_representation = encoder.apply(
        sampling_source_input, tensor.ones(sampling_source_input.shape))

    generated = decoder.generate(sampling_source_input, sampling_representation,
                                 target_prefix=sampling_target_prefix_input)

    # build the model that will let us get a theano function from the sampling graph
    logger.info("Creating Sampling Model...")
    sampling_model = Model(generated)

    # Set the parameters from a trained models
    logger.info("Loading parameters from model: {}".format(exp_config['saved_parameters']))
    # load the parameter values from an .npz file
    param_values = LoadNMT.load_parameter_values(exp_config['saved_parameters'], brick_delimiter='-')
    LoadNMT.set_model_parameters(sampling_model, param_values)

    return sampling_model, sampling_source_input, encoder, decoder
def create_phones_encoder(config):
    encoder = BidirectionalPhonesEncoder(config['phones_vocab_size'],
                                         config['enc_embed'],
                                         config['enc_nhids'])
    encoder.weights_init = IsotropicGaussian(config['weight_scale'])
    encoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    encoder.embedding.prototype.weights_init = Orthogonal()
    encoder.initialize()

    phones = tensor.lmatrix('phones')
    phones_mask = tensor.matrix('phones_mask')
    phones_words_ends = tensor.lmatrix('phones_words_ends')
    phones_words_ends_mask = tensor.matrix('phones_words_ends_mask')
    training_representation = encoder.apply(phones, phones_mask,
                                            phones_words_ends,
                                            phones_words_ends_mask)
    training_representation.name = "phones_representation"

    sampling_phones = tensor.lmatrix('sampling_phones')
    sampling_phones_mask = tensor.ones(
        (sampling_phones.shape[0], sampling_phones.shape[1]))
    sampling_phones_words_ends = tensor.lmatrix('sampling_phones_words_ends')
    sampling_phones_words_ends_mask = tensor.ones(
        (sampling_phones_words_ends.shape[0],
         sampling_phones_words_ends.shape[1]))
    sampling_representation = encoder.apply(sampling_phones,
                                            sampling_phones_mask,
                                            sampling_phones_words_ends,
                                            sampling_phones_words_ends_mask)

    return encoder, training_representation, sampling_representation
def create_audio_encoder(config):
    encoder = BidirectionalAudioEncoder(config['audio_feat_size'],
                                        config['enc_embed'],
                                        config['enc_nhids'])
    encoder.weights_init = IsotropicGaussian(config['weight_scale'])
    encoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    encoder.embedding.prototype.weights_init = Orthogonal()
    encoder.initialize()

    audio = tensor.ftensor3('audio')
    audio_mask = tensor.matrix('audio_mask')
    words_ends = tensor.lmatrix('words_ends')
    words_ends_mask = tensor.matrix('words_ends_mask')
    training_representation = encoder.apply(audio, audio_mask, words_ends,
                                            words_ends_mask)
    training_representation.name = "audio_representation"

    sampling_audio = tensor.ftensor3('sampling_audio')
    sampling_audio_mask = tensor.ones(
        (sampling_audio.shape[0], sampling_audio.shape[1]))
    sampling_words_ends = tensor.lmatrix('sampling_words_ends')
    sampling_words_ends_mask = tensor.ones(
        (sampling_words_ends.shape[0], sampling_words_ends.shape[1]))
    sampling_representation = encoder.apply(sampling_audio,
                                            sampling_audio_mask,
                                            sampling_words_ends,
                                            sampling_words_ends_mask)

    return encoder, training_representation, sampling_representation
Example #18
0
def DRAmodelnoedge(nodeList,
                   edgeList,
                   edgeListComplete,
                   edgeFeatures,
                   nodeFeatures,
                   nodeToEdgeConnections,
                   clipnorm=25.0,
                   train_for='joint'):
    edgeRNNs = {}
    edgeTypes = edgeList
    lstm_init = 'orthogonal'
    softmax_init = 'uniform'

    rng = np.random.RandomState(1234567890)

    for et in edgeTypes:
        inputJointFeatures = edgeFeatures[et]
        print inputJointFeatures
        edgeRNNs[et] = [TemporalInputFeatures(inputJointFeatures)]  #128

    nodeRNNs = {}
    nodeTypes = nodeList.keys()
    nodeLabels = {}
    outputLayer = {}
    for nt in nodeTypes:
        num_classes = nodeList[nt]
        #nodeRNNs[nt] = [LSTM('tanh','sigmoid',lstm_init,truncate_gradient=4,size=256,rng=rng),softmax(num_classes,softmax_init,rng=rng)] #256
        nodeRNNs[nt] = [
            LSTM('tanh',
                 'sigmoid',
                 lstm_init,
                 truncate_gradient=4,
                 size=args.nodeRNN_size,
                 rng=rng)
        ]  #256
        if train_for == 'joint':
            nodeLabels[nt] = {}
            nodeLabels[nt]['detection'] = T.lmatrix()
            nodeLabels[nt]['anticipation'] = T.lmatrix()
            outputLayer[nt] = [
                softmax(num_classes, softmax_init, rng=rng),
                softmax(num_classes + 1, softmax_init, rng=rng)
            ]
        else:
            nodeLabels[nt] = T.lmatrix()
            outputLayer[nt] = [softmax(num_classes, softmax_init, rng=rng)]
        et = nt + '_input'
        edgeRNNs[et] = [TemporalInputFeatures(nodeFeatures[nt])]
    learning_rate = T.fscalar()
    dra = DRAanticipation(edgeRNNs,
                          nodeRNNs,
                          outputLayer,
                          nodeToEdgeConnections,
                          edgeListComplete,
                          softmax_loss,
                          nodeLabels,
                          learning_rate,
                          clipnorm,
                          train_for=train_for)
    return dra
Example #19
0
def multMatVect(v, A, m1, B, m2):
    """
    multiply the first half of v by A with a modulo of m1
    and the second half by B with a modulo of m2

    Note: The parameters of dot_modulo are passed implicitly because passing
    them explicitly takes more time then running the function's C-code.
    """
    if multMatVect.dot_modulo is None:
        A_sym = tensor.lmatrix('A')
        s_sym = tensor.ivector('s')
        m_sym = tensor.iscalar('m')
        A2_sym = tensor.lmatrix('A2')
        s2_sym = tensor.ivector('s2')
        m2_sym = tensor.iscalar('m2')
        o = DotModulo()(A_sym, s_sym, m_sym, A2_sym, s2_sym, m2_sym)
        multMatVect.dot_modulo = function(
            [A_sym, s_sym, m_sym, A2_sym, s2_sym, m2_sym], o)

    # This way of calling the Theano fct is done to bypass Theano overhead.
    f = multMatVect.dot_modulo
    f.input_storage[0].storage[0] = A
    f.input_storage[1].storage[0] = v[:3]
    f.input_storage[2].storage[0] = m1
    f.input_storage[3].storage[0] = B
    f.input_storage[4].storage[0] = v[3:]
    f.input_storage[5].storage[0] = m2
    f.fn()
    r = f.output_storage[0].storage[0]

    return r
Example #20
0
def main(config, tr_stream):
    # Create Theano variables
    logger.info('Creating theano variables')
    source_char_seq = tensor.lmatrix('source_char_seq')
    source_sample_matrix = tensor.btensor3('source_sample_matrix')
    source_char_aux = tensor.bmatrix('source_char_aux')
    source_word_mask = tensor.bmatrix('source_word_mask')
    target_char_seq = tensor.lmatrix('target_char_seq')
    target_char_aux = tensor.bmatrix('target_char_aux')
    target_char_mask = tensor.bmatrix('target_char_mask')
    target_sample_matrix = tensor.btensor3('target_sample_matrix')
    target_word_mask = tensor.bmatrix('target_word_mask')
    target_resample_matrix = tensor.btensor3('target_resample_matrix')
    target_prev_char_seq = tensor.lmatrix('target_prev_char_seq')
    target_prev_char_aux = tensor.bmatrix('target_prev_char_aux')
    target_bos_idx = tr_stream.trg_bos
    target_space_idx = tr_stream.space_idx['target']
    src_vocab = pickle.load(open(config['src_vocab'], 'rb'))

    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(config['src_vocab_size'], config['enc_embed'], config['src_dgru_nhids'],
                                   config['enc_nhids'], config['src_dgru_depth'], config['bidir_encoder_depth'])

    decoder = Decoder(config['trg_vocab_size'], config['dec_embed'], config['trg_dgru_nhids'], config['trg_igru_nhids'],
                      config['dec_nhids'], config['enc_nhids'] * 2, config['transition_depth'], config['trg_igru_depth'],
                      config['trg_dgru_depth'], target_space_idx, target_bos_idx)

    representation = encoder.apply(source_char_seq, source_sample_matrix, source_char_aux,
                                   source_word_mask)
    cost = decoder.cost(representation, source_word_mask, target_char_seq, target_sample_matrix,
                        target_resample_matrix, target_char_aux, target_char_mask,
                        target_word_mask, target_prev_char_seq, target_prev_char_aux)

    # Set up model
    logger.info("Building model")
    training_model = Model(cost)

    # Set extensions
    logger.info("Initializing extensions")
    # Reload model if necessary
    extensions = [LoadNMT(config['saveto'])]

    # Initialize main loop
    logger.info("Initializing main loop")
    main_loop = MainLoop(
        model=training_model,
        algorithm=None,
        data_stream=None,
        extensions=extensions
    )

    for extension in main_loop.extensions:
        extension.main_loop = main_loop
    main_loop._run_extensions('before_training')

    char_embedding = encoder.decimator.apply(source_char_seq.T, source_sample_matrix, source_char_aux.T)
    embedding(Model(char_embedding), src_vocab)
    def _generate_train_model_item_function(self):
        u = T.lvector('u')
        i = T.lmatrix('i')
        j = T.lmatrix('j')
        n1 = T.lvector('n1')
        n2 = T.lvector('n2')
        di = T.dvector('di')
        dj = T.dvector('dj')

        self.W1 = bpr_item.W
        self.H1 = theano.shared(H_item.astype('float32'), name='H')
        self.B1 = theano.shared(B_item.astype('float32'), name='B')

        self.M1 = theano.shared(numpy.random.random(
            (self._rank, self._rank)).astype('float64'),
                                name='M1')
        self.M2 = theano.shared(numpy.random.random(
            (self._rank, self._rank)).astype('float64'),
                                name='M2')
        self.K = theano.shared(numpy.random.rand(), name='K')
        self.D = theano.shared(numpy.random.rand(), name='D')
        self.N = theano.shared(numpy.random.random(
            self._bundle_rank).astype('float32'),
                               name='N')

        x_ui = T.dot(
            T.dot(self.W1[u], self.M2),
            T.dot(self.M1, self.H1[i].sum(axis=1).T /
                  n1)).diagonal() + self.K * (self.B1[i].T / n1).T.sum(
                      axis=1) + self.N[n1] + self.D * di
        x_uj = T.dot(
            T.dot(self.W1[u], self.M2),
            T.dot(self.M1, self.H1[j].sum(axis=1).T /
                  n2)).diagonal() + self.K * (self.B1[j].T / n2).T.sum(
                      axis=1) + self.N[n2] + self.D * dj

        x_uij = T.nnet.sigmoid(x_ui - x_uj)
        obj = T.sum(T.log(x_uij) - self._lambda_u * (self.M1 ** 2).sum() - \
                    self._lambda_u * (self.M2 ** 2).sum()  - self._lambda_d * (self.K**2) - self._lambda_d * (self.D**2)\
                    -self._lambda_p * (self.N[n2]**2) - self._lambda_p * (self.N[n1]**2))
        cost = -obj

        g_cost_M1 = T.grad(cost=cost, wrt=self.M1)
        g_cost_M2 = T.grad(cost=cost, wrt=self.M2)
        g_cost_K = T.grad(cost=cost, wrt=self.K)
        g_cost_N = T.grad(cost=cost, wrt=self.N)
        g_cost_D = T.grad(cost=cost, wrt=self.D)

        updates = [(self.M1, self.M1 - self._learning_rate * .001 * g_cost_M1),
                   (self.M2, self.M2 - self._learning_rate * .001 * g_cost_M2),
                   (self.K, self.K - self._learning_rate * .001 * g_cost_K),
                   (self.N, self.N - self._learning_rate * g_cost_N),
                   (self.D, self.D - self._learning_rate * g_cost_D)]

        self.train_model_item = theano.function(
            inputs=[u, i, j, n1, n2, di, dj], outputs=cost, updates=updates)
Example #22
0
    def get_fns(self,
                input_dim=123,
                p_learning_rate=0.01,
                d_learning_rate=0.0001,
                p=0.23928176569346055):
        x = T.lmatrix('X')
        y = T.vector('y')
        m = T.lmatrix('mask_tr')
        primal_updates, loss_weighed, \
            reward, primal_var = self.primal_step(x,
                                                  y,
                                                  p_learning_rate,
                                                  input_dim, p, mask=m)
        [r, q] = primal_var
        dual_updates = self.dual_class.dual_updates(r=r, q=q)
        updates = primal_updates, dual_updates
        pu, du = updates

        primal_train_fn = theano.function([x, y, m], [r[0], self.alpha[0]],
                                          updates=primal_updates,
                                          name="Primal Train")
        dual_train_fn = theano.function([], [self.alpha[0], self.beta[0]],
                                        updates=dual_updates,
                                        name="Dual Train")

        def train_fn(x, y, mask):
            r0_d, r1_d = primal_train_fn(x, y, mask.transpose())
            alpha_d, beta_d = dual_train_fn()
            return alpha_d, beta_d

        # Calculate Validation in batch_mode for speedup
        x_mat = T.lmatrix('x_mat')
        y_mat = T.vector('y_mat')
        mask_mat = T.lmatrix('mask_te')
        pred_labels = self.calc_cost(self.model, x_mat, y_mat, mask_mat)

        valid_th_fns = theano.function([x_mat, mask_mat], pred_labels)

        def valid_fns(X_mat, Y_mat, mask_mat, flag=0):
            Y_mat = Y_mat.ravel()
            pred_labels = valid_th_fns(X_mat, mask_mat).ravel()
            # print pred_labels, Y_mat
            # print np.sum(pred_labels == 0), np.sum(pred_labels == 1),
            # print np.sum(Y_mat == 1)
            # TPR = np.sum((pred_labels > 0.5) * 1.0 *
            #             (Y_mat == 1)) / np.sum(Y_mat == 1)
            # TNR = np.sum((pred_labels <= 0.5) * 1.0 *
            #             (Y_mat == 0)) / np.sum(Y_mat == 0)
            # print "TPR, TNR below"
            #P = np.mean(pred_labels)
            #N = np.mean(1 - pred_labels)
            # print TPR, TNR, np.sum(pred_labels), P, N
            return self.dual_class.perf(pred_labels, Y_mat, flag), pred_labels

        return train_fn, valid_fns
Example #23
0
 def test_correct_solution(self):
     x = tensor.lmatrix()
     y = tensor.lmatrix()
     z = tensor.lscalar()
     b = theano.tensor.nlinalg.lstsq()(x, y, z)
     f = function([x, y, z], b)
     TestMatrix1 = np.asarray([[2, 1], [3, 4]])
     TestMatrix2 = np.asarray([[17, 20], [43, 50]])
     TestScalar = np.asarray(1)
     f = function([x, y, z], b)
     m = f(TestMatrix1, TestMatrix2, TestScalar)
     self.assertTrue(np.allclose(TestMatrix2, np.dot(TestMatrix1, m[0])))
Example #24
0
def test_blocksparse_gpu_gemv_opt():
    b = tensor.fmatrix()
    W = tensor.ftensor4()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    o = sparse_block_dot(W, h, iIdx, b, oIdx)

    f = theano.function([W, h, iIdx, b, oIdx], o, mode=mode_with_gpu)

    assert isinstance(f.maker.fgraph.toposort()[-2].op, GpuSparseBlockGemv)
Example #25
0
def test_blocksparse_gpu_gemv_opt():
    b = tensor.fmatrix()
    W = tensor.ftensor4()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    o = sparse_block_dot(W, h, iIdx, b, oIdx)

    f = theano.function([W, h, iIdx, b, oIdx], o, mode=mode_with_gpu)

    assert isinstance(f.maker.fgraph.toposort()[-2].op, GpuSparseBlockGemv)
Example #26
0
 def test_correct_solution(self):
     x = tensor.lmatrix()
     y = tensor.lmatrix()
     z = tensor.lscalar()
     b = theano.tensor.nlinalg.lstsq()(x, y, z)
     f = function([x, y, z], b)
     TestMatrix1 = np.asarray([[2, 1], [3, 4]])
     TestMatrix2 = np.asarray([[17, 20], [43, 50]])
     TestScalar = np.asarray(1)
     f = function([x, y, z], b)
     m = f(TestMatrix1, TestMatrix2, TestScalar)
     self.assertTrue(np.allclose(TestMatrix2, np.dot(TestMatrix1, m[0])))
Example #27
0
def test_blocksparse_gpu_gemv_opt():
    b = tensor.fmatrix()
    W = tensor.ftensor4()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    o = sparse_block_dot(W, h, iIdx, b, oIdx)

    f = theano.function([W, h, iIdx, b, oIdx], o, mode=mode_with_gpu)

    assert sum(1 for n in f.maker.fgraph.apply_nodes
               if isinstance(n.op, GpuSparseBlockGemv)) == 1
Example #28
0
def test_blocksparse_gpu_gemv_opt():
    b = tensor.fmatrix()
    W = tensor.ftensor4()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    o = sparse_block_dot(W, h, iIdx, b, oIdx)

    f = theano.function([W, h, iIdx, b, oIdx], o, mode=mode_with_gpu)

    assert sum(1 for n in f.maker.fgraph.apply_nodes
               if isinstance(n.op, GpuSparseBlockGemv)) == 1
Example #29
0
def test7():
    A = T.lmatrix("A")
    A_start = T.lvector("A_start")
    f = T.lmatrix("f")
    tgt = T.ivector("tgt")
    v = Viterbi(A , A_start , f , tgt)
    decode = v.decode()
    ff = theano.function([A , A_start , f , tgt] , outputs = v.apply())
    ff2 = theano.function([A , A_start , f , tgt] , decode)
    print ff2([[1 , 3 , 1] , [1 , 2 , 2] , [2 , 1 , 3]]
            , [1 , 2 , 1]
            , [[1 , 2 , 3] , [2 , 2 , 1] , [3 , 3 , 2] , [1 , 1 , 2]]
            , [1 , 2 , 1 , 2])
Example #30
0
    def setup_backprop(self):
        eta = T.scalar('eta_for_backprop')
        x = T.lvector('x_for_backprop')
        y = T.lvector('y_for_backprop')
        y_in_x_inds = T.lmatrix('y_in_x_inds_for_backprop')
        y_in_src_inds = T.lmatrix('y_in_src_inds_for_backprop')
        y_in_domain = T.lmatrix('y_in_domain_for_backprop')
        l2_reg = T.scalar('l2_reg_for_backprop')

        # Normal operation
        dec_init_state, annotations = self._symb_encoder(x)
        nll, p_y_seq, objective, updates = self._setup_backprop_with(
            dec_init_state, annotations, y, y_in_x_inds, y_in_src_inds,
            y_in_domain, eta, l2_reg)
        self._get_nll = theano.function(
            inputs=[x, y, y_in_x_inds, y_in_src_inds, y_in_domain],
            outputs=nll,
            on_unused_input='warn')
        self._backprop = theano.function(inputs=[
            x, y, eta, y_in_x_inds, y_in_src_inds, y_in_domain, l2_reg
        ],
                                         outputs=[p_y_seq, objective],
                                         updates=updates,
                                         on_unused_input='warn')
        # Add distractors
        self._get_nll_distract = []
        self._backprop_distract = []
        if self.distract_num > 0:
            x_distracts = [
                T.lvector('x_distract_%d_for_backprop' % i)
                for i in range(self.distract_num)
            ]
            all_annotations = [annotations]
            for i in range(self.distract_num):
                _, annotations_distract = self._symb_encoder(x_distracts[i])
                all_annotations.append(annotations_distract)
            annotations_with_distract = T.concatenate(all_annotations, axis=0)
            nll_d, p_y_seq_d, objective_d, updates_d = self._setup_backprop_with(
                dec_init_state, annotations_with_distract, y, y_in_x_inds,
                y_in_src_inds, y_in_domain, eta, l2_reg)
            self._get_nll_distract = theano.function(
                inputs=[x, y, y_in_x_inds, y_in_src_inds, y_in_domain] +
                x_distracts,
                outputs=nll_d,
                on_unused_input='warn')
            self._backprop_distract = theano.function(
                inputs=[
                    x, y, eta, y_in_x_inds, y_in_src_inds, y_in_domain, l2_reg
                ] + x_distracts,
                outputs=[p_y_seq_d, objective_d],
                updates=updates_d)
Example #31
0
def test_blocksparse_gpu_outer_opt():
    b = tensor.fmatrix()
    W = tensor.ftensor4()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    o = sparse_block_dot(W, h, iIdx, b, oIdx)

    f = theano.function([W, h, iIdx, b, oIdx],
                        [o, tensor.grad(o.sum(), wrt=W)],
                        mode=mode_with_gpu)

    assert isinstance(f.maker.fgraph.toposort()[-2].op, GpuSparseBlockOuter)
Example #32
0
def test_blocksparse_gpu_outer_opt():
    b = tensor.fmatrix()
    W = tensor.ftensor4()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    o = sparse_block_dot(W, h, iIdx, b, oIdx)

    f = theano.function([W, h, iIdx, b, oIdx], [o, tensor.grad(o.sum(),
                                                               wrt=W)],
                        mode=mode_with_gpu)

    assert isinstance(f.maker.fgraph.toposort()[-2].op, GpuSparseBlockOuter)
Example #33
0
 def _generate_test_model_function(self):
     u = T.lvector('u')
     i = T.lmatrix('i')
     j = T.lmatrix('j')
     n1 = T.lvector('n1')
     n2 = T.lvector('n2')
     di = T.dvector('di')
     dj = T.dvector('dj')
     
     x_ui = T.dot(T.dot(self.W1[u],self.M2), T.dot(self.M1, self.H1[i].sum(axis=1).T/n1)).diagonal() + self.K*(self.B1[i].T/n1).T.sum(axis=1) + self.N[n1] + self.D*di
     x_uj = T.dot(T.dot(self.W1[u],self.M2), T.dot(self.M1, self.H1[j].sum(axis=1).T/n2)).diagonal() + self.K*(self.B1[j].T/n2).T.sum(axis=1) + self.N[n2] + self.D*dj 
     
     x_uij = x_ui-x_uj
     self.test_model = theano.function(inputs=[u, i, j, n1, n2, di, dj], outputs=x_uij)
Example #34
0
def test_blocksparse_inplace_gemv_opt():
    b = tensor.fmatrix()
    W = tensor.ftensor4()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    o = sparse_block_dot(W, h, iIdx, b, oIdx)

    f = theano.function([W, h, iIdx, b, oIdx], o)

    if theano.config.mode == "FAST_COMPILE":
        assert not f.maker.fgraph.toposort()[-1].op.inplace
    else:
        assert f.maker.fgraph.toposort()[-1].op.inplace
Example #35
0
    def getAlignment(self):
        unk_idx = self.config['unk_id']
        source_sentence = tensor.lmatrix('source')
        target_sentence = tensor.lmatrix('target')

        ftrans = open('/Users/lqy/Documents/transout.txt','w',0)

        falign = gzip.open('/Users/lqy/Documents/alignmentout','w',0)

        sampling_representation = encoder.apply(source_sentence, tensor.ones(source_sentence.shape))

        for i, line in enumerate(self.data_stream.get_epoch_iterator()):
            seq = self._oov_to_unk(line[0], self.config['src_vocab_size'], unk_idx)
            input_ = numpy.tile(seq, (config['beam_size'], 1))
            print "input_: ",input_
Example #36
0
def test_lookup_table():
    lt = LookupTable(5, 3)
    lt.allocate()

    lt.W.set_value(numpy.arange(15).reshape(5, 3).astype(theano.config.floatX))

    x = tensor.lmatrix("x")
    y = lt.apply(x)
    f = theano.function([x], [y])

    x_val = [[1, 2], [0, 3]]
    desired = numpy.array([[[3, 4, 5], [6, 7, 8]], [[0, 1, 2], [9, 10, 11]]],
                          dtype=theano.config.floatX)
    assert_equal(f(x_val)[0], desired)

    # Test get_dim
    assert_equal(lt.get_dim(lt.apply.inputs[0]), 0)
    assert_equal(lt.get_dim(lt.apply.outputs[0]), lt.dim)
    assert_raises(ValueError, lt.get_dim, 'random_name')

    # Test feedforward interface
    assert lt.input_dim == 0
    assert lt.output_dim == 3
    lt.output_dim = 4
    assert lt.output_dim == 4

    def assign_input_dim():
        lt.input_dim = 11

    assert_raises(ValueError, assign_input_dim)
    lt.input_dim = 0
Example #37
0
def test_ctc_targets():
    LENGTH = 20
    BATCHES = 4
    CLASSES = 2
    y_hat = T.tensor3('features')
    input_mask = T.matrix('features_mask')
    y_hat_mask = input_mask
    y = T.lmatrix('phonemes')
    y_mask = T.matrix('phonemes_mask')
    ctc_target = ctc_cost.get_targets(y, T.log(y_hat), y_mask, y_hat_mask)
    Y_hat = np.zeros((LENGTH, BATCHES, CLASSES + 1), dtype=floatX)
    Y_hat[:, :, 0] = .7
    Y_hat[:, :, 1] = .2
    Y_hat[:, :, 2] = .1
    Y_hat[3, :, 0] = .3
    Y_hat[3, :, 1] = .4
    Y_hat[3, :, 2] = .3
    Y = np.zeros((2, BATCHES), dtype='int64')
    Y_hat_mask = np.ones((LENGTH, BATCHES), dtype=floatX)
    Y_hat_mask[-5:] = 0
    # default blank symbol is the highest class index (3 in this case)
    Y_mask = np.asarray(np.ones_like(Y), dtype=floatX)
    target = ctc_target.eval({
        y_hat: Y_hat,
        y: Y,
        y_hat_mask: Y_hat_mask,
        y_mask: Y_mask
    })
    # Note that this part is the same as the cross entropy gradient
    grad = -target / Y_hat
    test_grad = finite_diff(Y, Y_hat, Y_mask, Y_hat_mask, eps=1e-2, n_steps=5)
    testing.assert_almost_equal(grad.flatten()[:5],
                                test_grad.flatten()[:5],
                                decimal=3)
Example #38
0
def test10():
    src = T.ltensor3("src")
    tgt = T.lmatrix("tgt")
    mask = T.matrix("mask")
    prd = T.matrix("prd")
    n_hids, vocab_size = 3, 60
    hs = HierarchicalSoftmax(src, n_hids, vocab_size)
    #prd = hs.test()
    res = hs.cost(tgt, mask)
    x = [
            [[1,1,1],[2,2,2],[3,3,3],[4,4,4]],
            [[3,3,3],[4,4,4],[5,5,5],[6,6,6]]
        ]
    y = [
            [1,1,1,1],
            [1,1,1,1]
        ]
    m = [
            [1,1,0,0],
            [1,1,0,0]
        ]
    fn3 = theano.function(inputs=[src,tgt,mask], outputs=[res], on_unused_input='ignore')
    res = fn3(x,y,m)
    print res , res[0].shape
    x_a = np.array(x)
    print x_a.shape, x_a[y]
def test_ctc_log_path_probabs():
    LENGTH = 10
    BATCHES = 3
    CLASSES = 2
    N_LABELS = 3
    y_hat = T.tensor3('features')
    input_mask = T.matrix('features_mask')
    y_hat_mask = input_mask
    y = T.lmatrix('phonemes')
    y_mask = T.matrix('phonemes_mask')
    blanked_y, blanked_y_mask = ctc_cost._add_blanks(
        y=y,
        blank_symbol=1,
        y_mask=y_mask)
    p = ctc_cost._log_path_probabs(blanked_y, y_hat, blanked_y_mask, y_hat_mask, 1)
    Y_hat = np.zeros((LENGTH, BATCHES, CLASSES + 1), dtype=floatX)
    Y_hat[:, :, 0] = .7
    Y_hat[:, :, 1] = .2
    Y_hat[:, :, 2] = .1
    Y = np.zeros((N_LABELS, BATCHES), dtype='int64')
    Y_hat_mask = np.ones((LENGTH, BATCHES), dtype=floatX)
    Y_hat_mask[-2:, 0] = 0
    Y_mask = np.asarray(np.ones_like(Y), dtype=floatX)
    forward_probs = p.eval({y_hat: Y_hat, y: Y,
                            y_hat_mask: Y_hat_mask, y_mask: Y_mask})
    assert forward_probs[-2, 0, 0] == -np.inf
    Y_mask[-1] = 0
    forward_probs_y_mask = p.eval({y_hat: Y_hat, y: Y,
                                   y_hat_mask: Y_hat_mask, y_mask: Y_mask})
    assert forward_probs_y_mask[-1, 1, -2] == -np.inf
    assert not np.isnan(forward_probs).any()
Example #40
0
def test_lookup_table():
    lt = LookupTable(5, 3)
    lt.allocate()

    lt.W.set_value(numpy.arange(15).reshape(5, 3).astype(theano.config.floatX))

    x = tensor.lmatrix("x")
    y = lt.apply(x)
    f = theano.function([x], [y])

    x_val = [[1, 2], [0, 3]]
    desired = numpy.array([[[3, 4, 5], [6, 7, 8]], [[0, 1, 2], [9, 10, 11]]],
                          dtype=theano.config.floatX)
    assert_equal(f(x_val)[0], desired)

    # Test get_dim
    assert_equal(lt.get_dim(lt.apply.inputs[0]), 0)
    assert_equal(lt.get_dim(lt.apply.outputs[0]), lt.dim)
    assert_raises(ValueError, lt.get_dim, 'random_name')

    # Test feedforward interface
    assert lt.input_dim == 0
    assert lt.output_dim == 3
    lt.output_dim = 4
    assert lt.output_dim == 4

    def assign_input_dim():
        lt.input_dim = 11
    assert_raises(ValueError, assign_input_dim)
    lt.input_dim = 0
Example #41
0
    def __init__(self, R, k, E, U, EU, embedding_size):
        self.k = k # Slices count
        self.R = R
        self.embedding_size = embedding_size

        init_range = 0.07
        init_range_W = 0.001
        # Setup params
        #Tensor matrix
        W = np.random.uniform(low=-init_range_W, high=init_range_W, size=(self.embedding_size, self.embedding_size, k))
        #Neural matrix
        V = np.random.uniform(low=-init_range, high=init_range, size=(2*self.embedding_size, k))
        #Bias
        b = np.random.uniform(low=-init_range, high=init_range, size=(k,))
        #Concatenation
        u = np.random.uniform(low=-init_range, high=init_range, size=(k, ))

        self.embedding_size_t = theano.shared(self.embedding_size)
        self.W = theano.shared(np.asarray(W, dtype=theano.config.floatX), name="W")

        self.E, self.U, self.EU = E, U, EU # Shared among networks

        self.V, self.b, self.u = theano.shared(np.asarray(V, dtype=theano.config.floatX), name="V"+str(R)), \
                                 theano.shared(np.asarray(b, dtype=theano.config.floatX), name="b"+str(R)), \
                                 theano.shared(np.asarray(u, dtype=theano.config.floatX), name="u"+str(R))

        self.params = [self.W, self.U, self.V, self.b, self.u]


        self.input = T.lmatrix()

        self.inputs = [self.input] # For trainer
Example #42
0
    def __init__(self,
                 dim,
                 initializer=default_initializer,
                 normalize=True,
                 dropout=0,
                 activation="tanh",
                 verbose=True):
        super(NegativePhraseRAE, self).__init__(dim,
                                                initializer=initializer,
                                                normalize=normalize,
                                                dropout=dropout,
                                                activation=activation,
                                                verbose=verbose)
        self.neg_seq = T.lmatrix()
        self.neg_vectors = T.fmatrix()
        self.neg_scan_result, _ = theano.scan(
            self.encode,
            sequences=[self.neg_seq],
            outputs_info=[self.neg_vectors, None],
            name="neg_rae_build")
        # all Negative history vector in scan
        self.neg_history_output = self.neg_scan_result[0]
        self.neg_all_output = self.neg_history_output[-1]

        # Consider Negative Phrase Only One
        self.neg_output = ifelse(
            T.eq(self.neg_vectors.shape[0], 1),
            self.neg_vectors[0],  # True
            self.neg_all_output[-1])  # False
        self.neg_loss_rec = ifelse(
            T.eq(self.neg_vectors.shape[0], 1),
            0.0,  # True
            T.sum(self.neg_scan_result[1]))  # False
def test_ctc_pseudo_cost():
    LENGTH = 500
    BATCHES = 40
    CLASSES = 2
    N_LABELS = 45
    y_hat = T.tensor3('features')
    input_mask = T.matrix('features_mask')
    y_hat_mask = input_mask
    y = T.lmatrix('phonemes')
    y_mask = T.matrix('phonemes_mask')
    pseudo_cost = ctc_cost.pseudo_cost(y, y_hat, y_mask, y_hat_mask)

    Y_hat = np.zeros((LENGTH, BATCHES, CLASSES + 1), dtype=floatX)
    Y_hat[:, :, 0] = .75
    Y_hat[:, :, 1] = .2
    Y_hat[:, :, 2] = .05
    Y_hat[3, 0, 0] = .3
    Y_hat[3, 0, 1] = .4
    Y_hat[3, 0, 2] = .3
    Y = np.zeros((N_LABELS, BATCHES), dtype='int64')
    Y[25:, :] = 1
    Y_hat_mask = np.ones((LENGTH, BATCHES), dtype=floatX)
    Y_hat_mask[-5:] = 0
    # default blank symbol is the highest class index (3 in this case)
    Y_mask = np.asarray(np.ones_like(Y), dtype=floatX)
    Y_mask[30:] = 0
    cost = pseudo_cost.eval({y_hat: Y_hat, y: Y,
                             y_hat_mask: Y_hat_mask, y_mask: Y_mask})
    pseudo_grad = T.grad(ctc_cost.pseudo_cost(y, y_hat,
                                              y_mask, y_hat_mask).sum(),
                         y_hat)
    #test_grad2 = pseudo_grad.eval({y_hat: Y_hat, y: Y,
    #                               y_hat_mask: Y_hat_mask, y_mask: Y_mask})
    # TODO: write some more meaningful asserts here
    assert cost.sum() > 0
Example #44
0
    def create_layers(self, X_dim, y_dim, random_state):
        initW = kitchen.init.GlorotUniform(random_state=random_state, gain='relu')
        initb = kitchen.init.Uniform(random_state=random_state)

        i0 = lasagne.layers.InputLayer(shape=(None, X_dim[0]), input_var=T.lmatrix('bounds'))
        i1 = lasagne.layers.InputLayer(shape=(None, X_dim[1]), input_var=T.lmatrix('X'))

        h1 = lasagne.layers.EmbeddingLayer(i1, input_size=X_dim[2], output_size=40, W=initW)

        h2 = lasagne.layers.DenseLayer(h1, num_units=40, nonlinearity=lasagne.nonlinearities.rectify, W=initW, b=initb)

        h3 = MaxpoolLayer([i0, h2])

        o1 = lasagne.layers.DenseLayer(h3, num_units=1, nonlinearity=lasagne.nonlinearities.sigmoid, W=initW, b=initb)

        return (i0, i1), o1
def test_ctc_symmetry_logscale():
    LENGTH = 5000
    BATCHES = 3
    CLASSES = 4
    y_hat = T.tensor3('features')
    input_mask = T.matrix('features_mask')
    y_hat_mask = input_mask
    y = T.lmatrix('phonemes')
    y_mask = T.matrix('phonemes_mask')
    ctc_cost_t = ctc_cost.cost(y, y_hat, y_mask, y_hat_mask)

    Y_hat = np.zeros((LENGTH, BATCHES, CLASSES), dtype=floatX)
    Y_hat[:, :, 0] = .3
    Y_hat[:, :, 1] = .2
    Y_hat[:, :, 2] = .4
    Y_hat[:, :, 3] = .1
    Y_hat_mask = np.ones((LENGTH, BATCHES), dtype=floatX)
    # default blank symbol is the highest class index (3 in this case)
    Y = np.repeat(np.array([0, 1, 2, 1, 2, 0, 2, 2, 2]),
                  BATCHES).reshape((9, BATCHES))
    # the masks for this test should be all ones.
    Y_mask = np.asarray(np.ones_like(Y), dtype=floatX)
    forward_cost = ctc_cost_t.eval({y_hat: Y_hat, y: Y,
                                  y_hat_mask: Y_hat_mask, y_mask: Y_mask})
    backward_cost = ctc_cost_t.eval({y_hat: Y_hat, y: Y[::-1],
                                   y_hat_mask: Y_hat_mask, y_mask: Y_mask})
    testing.assert_almost_equal(forward_cost[0], backward_cost[0])
    assert not np.isnan(forward_cost[0])
    assert not np.isnan(backward_cost[0])
    assert not np.isinf(np.abs(forward_cost[0]))
    assert not np.isinf(np.abs(backward_cost[0]))
def test_ctc_add_blanks():
    BATCHES = 3
    N_LABELS = 3
    y = T.lmatrix('phonemes')
    y_mask = T.matrix('phonemes_mask')
    blanked_y, blanked_y_mask = ctc_cost._add_blanks(
        y=y,
        blank_symbol=1,
        y_mask=y_mask)
    Y = np.zeros((N_LABELS, BATCHES), dtype='int64')
    Y_mask = np.asarray(np.ones_like(Y), dtype=floatX)
    Y_mask[-1, 0] = 0
    Blanked_y_mask = blanked_y_mask.eval({y_mask: Y_mask})
    Blanked_y = blanked_y.eval({y: Y})
    assert (Blanked_y == np.array([[1, 1, 1],
                                   [0, 0, 0],
                                   [1, 1, 1],
                                   [0, 0, 0],
                                   [1, 1, 1],
                                   [0, 0, 0],
                                   [1, 1, 1]], dtype='int32')).all()
    assert (Blanked_y_mask == np.array([[1., 1., 1.],
                                        [1., 1., 1.],
                                        [1., 1., 1.],
                                        [1., 1., 1.],
                                        [1., 1., 1.],
                                        [0., 1., 1.],
                                        [0., 1., 1.]], dtype=floatX)).all()
def test_ctc_pseudo_cost_skip_softmax_stability():
    LENGTH = 500
    BATCHES = 40
    CLASSES = 2
    N_LABELS = 45
    y_hat = T.tensor3('features')
    input_mask = T.matrix('features_mask')
    y_hat_mask = input_mask
    y = T.lmatrix('phonemes')
    y_mask = T.matrix('phonemes_mask')
    pseudo_cost = ctc_cost.pseudo_cost(y, y_hat, y_mask, y_hat_mask,
                                       skip_softmax=True)

    Y_hat = np.asarray(np.random.normal(0, 1, (LENGTH, BATCHES, CLASSES + 1)),
                       dtype=floatX)
    Y = np.zeros((N_LABELS, BATCHES), dtype='int64')
    Y[25:, :] = 1
    Y_hat_mask = np.ones((LENGTH, BATCHES), dtype=floatX)
    Y_hat_mask[-5:] = 0
    # default blank symbol is the highest class index (3 in this case)
    Y_mask = np.asarray(np.ones_like(Y), dtype=floatX)
    Y_mask[30:] = 0
    pseudo_grad = T.grad(pseudo_cost.sum(), y_hat)
    test_grad = pseudo_grad.eval({y_hat: Y_hat, y: Y,
                                  y_hat_mask: Y_hat_mask, y_mask: Y_mask})
    y_hat_softmax = T.exp(y_hat) / T.exp(y_hat).sum(2)[:, :, None]
    pseudo_cost2 = ctc_cost.pseudo_cost(y, y_hat_softmax, y_mask, y_hat_mask,
                                        skip_softmax=False)
    pseudo_grad2 = T.grad(pseudo_cost2.sum(), y_hat)
    test_grad2 = pseudo_grad2.eval({y_hat: Y_hat, y: Y,
                                    y_hat_mask: Y_hat_mask, y_mask: Y_mask})
    testing.assert_almost_equal(test_grad, test_grad2, decimal=4)
    def GRU_question(self, dimension_fact_embedding, num_hidden_units_questions, num_hidden_units_episodes, max_question_len, dimension_word_embeddings):

        self.question_idxs = T.lmatrix("question_indices") # as many columns as words in the context window and as many lines as words in the sentence
        self.question_mask = T.lvector("question_mask")
        q = self.emb[self.question_idxs].reshape((self.question_idxs.shape[0], dimension_word_embeddings)) # x basically represents the embeddings of the words IN the current sentence.  So it is shape

        def slice_w(x, n):
            return x[n*num_hidden_units_questions:(n+1)*num_hidden_units_questions]

        def question_gru_recursion(x_cur, h_prev, q_mask):

            W_in_stacked = T.concatenate([self.W_question_reset_gate_x, self.W_question_update_gate_x, self.W_question_hidden_gate_x], axis=1)
            W_hid_stacked = T.concatenate([self.W_question_reset_gate_h, self.W_question_update_gate_h, self.W_question_hidden_gate_h], axis=1)

            input_n = T.dot(x_cur, W_in_stacked)
            hid_input = T.dot(h_prev, W_hid_stacked)
            resetgate = slice_w(hid_input, 0) + slice_w(input_n, 0)
            updategate = slice_w(hid_input, 1) + slice_w(input_n, 1)
            resetgate = T.tanh(resetgate)
            updategate = T.tanh(updategate)

            hidden_update = slice_w(input_n, 2) + resetgate * slice_w(hid_input, 2)
            hidden_update = T.tanh(hidden_update)
            h_cur = (1 - updategate) * hidden_update + updategate * hidden_update

            h_cur = q_mask * h_cur + (1 - q_mask) * h_prev
            # h_cur = T.tanh(T.dot(self.W_fact_to_hidden, x_cur) + T.dot(self.W_hidden_to_hidden, h_prev))
            return h_cur

        state = self.h0_questions
        for jdx in range(max_question_len):
            state = question_gru_recursion(q[jdx], state, self.question_mask[jdx])

        return T.tanh(T.dot(state, self.W_question_to_vector) + self.b_question_to_vector)
Example #49
0
  def get_char_emb_function(self):
    """
    Return embeddings, with OOVs replaced by context-estimation
    Used at test time
    """
    oov_char = tensor.lmatrix('oov_char_pred')
    rnn_mask = tensor.matrix('rnn_mask_pred', dtype=floatX)

    self.inputs = [None] * (self.num_lstm_layers + 1)
    self.inputs[0] = self.embedding_layer.connect(self.x)
    self.rev_mask = self.mask[::-1]

    emb_mat = self.embedding_layer.embeddings[0]
    char_states = self.gemb.char_rnn.connect(oov_char, rnn_mask) # (oov_num, 2*char_hidden_dim)
    char_preact = char_states.dimshuffle((0,'x',1))

    feat = self.gemb.mlp.connect(char_preact) # (oov_num, batch=1, num_words)
    probs = tensor.nnet.softmax(feat.reshape([feat.shape[0]*feat.shape[1], feat.shape[2]])) # (oov_num*batch, num_words)
    emb_reweight = probs.dimshuffle(0,1,'x') * emb_mat # (oov_num*batch, num_words, emb_dim)
    gembedding = emb_reweight.sum(axis=1).reshape([feat.shape[0], feat.shape[1], -1]) # ??? (oov_num, batch, emb_dim)

    return theano.function([self.x0, self.mask0, oov_char, rnn_mask], [gembedding, self.inputs[0]],
                name='char_gemb_pred',
                on_unused_input='warn',
                givens=({self.is_train: numpy.cast['int8'](1)}))
Example #50
0
  def __theano_init__(self):

    # Theano tensor for I/O 
    X = T.lmatrix('X')
    Y = T.lvector('Y')
    N = T.lvector('N')

    # network structure
    l_in = L.layers.InputLayer(shape=(self.batch_size, self.n_gram), input_var = X)
    l_we = L.layers.EmbeddingLayer(l_in, self.vocab_size, self.word_dim, W = self.D)
    l_f1 = L.layers.DenseLayer(l_we, self.hidden_dim1, W = self.C, b = self.Cb)
    l_f2 = L.layers.DenseLayer(l_f1, self.hidden_dim2, W = self.M, b = self.Mb)
    l_out = L.layers.DenseLayer(l_f2, self.vocab_size, W = self.E, b = self.Eb, nonlinearity=None)
    
    # lasagne.layers.get_output produces a variable for the output of the net
    O = L.layers.get_output(l_out) # (batch_size, vocab_size)

    lossfunc = NCE(self.batch_size, self.vocab_size, self.noise_dist, self.noise_sample_size)
    loss = lossfunc.evaluate(O, Y, N)
    # loss = T.nnet.categorical_crossentropy(O, Y).mean()

    # Retrieve all parameters from the network
    all_params = L.layers.get_all_params(l_out, trainable=True)

    # Compute AdaGrad updates for training
    updates = L.updates.adadelta(loss, all_params)

    # Theano functions for training and computing cost
    self.train = theano.function([l_in.input_var, Y, N], loss, updates=updates, allow_input_downcast=True)
    self.compute_loss = theano.function([l_in.input_var, Y, N], loss, allow_input_downcast=True)
    self.weights = theano.function(inputs = [], outputs = [self.D, self.C, self.M, self.E, self.Cb, self.Mb, self.Eb])
Example #51
0
  def get_char_gemb_loss_function(self):
    oov_pos = tensor.lvector('oov_pos')
    oov_char = tensor.lmatrix('oov_char')
    rnn_mask = tensor.matrix('rnn_mask', dtype=floatX)

    oov_pos_x = oov_pos.flatten()
    oov_pos_y = tensor.arange(oov_pos_x.shape[0])

    emb_mat = self.embedding_layer.embeddings[0]

    char_states = self.gemb.char_rnn.connect(oov_char, rnn_mask) # (batch, 2*char_hidden_dim)
    char_preact = char_states

    feat = self.gemb.mlp.connect(char_preact) # (batch, num_words)

    probs = tensor.nnet.softmax(feat) # (oov_num*batch, num_words) oov_num=1 fixed
    log_probs = tensor.log(probs)

    loss = CrossEntropyLoss().connect(inputs=log_probs, weights=None, labels=self.x[oov_pos_x,oov_pos_y,0].reshape([-1,1]))

    grads = gradient_clipping(tensor.grad(loss, self.gemb.params),
                  self.max_grad_norm)
    updates = adadelta(self.gemb.params, grads)

    return theano.function([self.x0, self.mask0, oov_pos, oov_char, rnn_mask], loss,
                 name='f_char_gemb_loss',
                 updates=updates,
                 on_unused_input='warn',
                 givens=({self.is_train: numpy.cast['int8'](1)}))
Example #52
0
def get_sampling_model_and_input(exp_config):
    # Create Theano variables
    encoder = BidirectionalEncoder(exp_config['src_vocab_size'],
                                   exp_config['enc_embed'],
                                   exp_config['enc_nhids'])

    decoder = Decoder(exp_config['trg_vocab_size'],
                      exp_config['dec_embed'],
                      exp_config['dec_nhids'],
                      exp_config['enc_nhids'] * 2,
                      loss_function='min_risk')

    # Create Theano variables
    logger.info('Creating theano variables')
    sampling_input = tensor.lmatrix('source')

    # Get beam search
    logger.info("Building sampling model")
    sampling_representation = encoder.apply(sampling_input,
                                            tensor.ones(sampling_input.shape))
    generated = decoder.generate(sampling_input, sampling_representation)

    # build the model that will let us get a theano function from the sampling graph
    logger.info("Creating Sampling Model...")
    sampling_model = Model(generated)

    return sampling_model, sampling_input, encoder, decoder
Example #53
0
    def __init__(self, data, config, fast_predict=False):
        self.embedding_shapes = data.embedding_shapes
        self.lstm_type = config.lstm_cell
        self.lstm_hidden_size = int(config.lstm_hidden_size)
        self.num_lstm_layers = int(config.num_lstm_layers)
        self.max_grad_norm = float(config.max_grad_norm)

        self.vocab_size = data.word_dict.size()
        self.label_space_size = data.label_dict.size()
        self.unk_id = data.unk_id

        # Initialize layers and parameters
        self.embedding_layer = EmbeddingLayer(data.embedding_shapes,
                                              data.embeddings)
        self.params = [p for p in self.embedding_layer.params]

        self.rnn_layers = [None] * self.num_lstm_layers
        for l in range(self.num_lstm_layers):
            input_dim = self.embedding_layer.output_size if l == 0 else self.lstm_hidden_size
            input_dropout = config.input_dropout_prob if (
                config.per_layer_dropout or l == 0) else 0.0
            recurrent_dropout = config.recurrent_dropout_prob

            self.rnn_layers[l] = get_rnn_layer(self.lstm_type)(
                input_dim,
                self.lstm_hidden_size,
                input_dropout_prob=input_dropout,
                recurrent_dropout_prob=recurrent_dropout,
                fast_predict=fast_predict,
                prefix='lstm_{}'.format(l))
            self.params.extend(self.rnn_layers[l].params)

        self.softmax_layer = SoftmaxLayer(self.lstm_hidden_size,
                                          self.label_space_size)
        self.params.extend(self.softmax_layer.params)

        # Build model
        # Shape of x: [seq_len, batch_size, num_features]
        self.x0 = tensor.ltensor3('x')
        self.y0 = tensor.lmatrix('y')
        self.mask0 = tensor.matrix('mask', dtype=floatX)
        self.is_train = tensor.bscalar('is_train')

        self.x = self.x0.dimshuffle(1, 0, 2)
        self.y = self.y0.dimshuffle(1, 0)
        self.mask = self.mask0.dimshuffle(1, 0)

        self.inputs = [None] * (self.num_lstm_layers + 1)
        self.inputs[0] = self.embedding_layer.connect(self.x)
        self.rev_mask = self.mask[::-1]

        for l, rnn in enumerate(self.rnn_layers):
            outputs = rnn.connect(self.inputs[l],
                                  self.mask if l % 2 == 0 else self.rev_mask,
                                  self.is_train)
            self.inputs[l + 1] = outputs[::-1]

        self.scores, self.pred = self.softmax_layer.connect(self.inputs[-1])
        self.pred0 = self.pred.reshape([self.x.shape[0],
                                        self.x.shape[1]]).dimshuffle(1, 0)
Example #54
0
def testrun(params,datasets):
# 学習したパラメータを使って汎化性能のテストを行う。 
    w1 = params[0]
    w2 = params[1]
    b1 = params[2]
    b2 = params[3]
    costs = params[4]
    test_set_x, test_set_t = datasets

    x = T.dmatrix('x')
    h1 = T.nnet.relu( T.dot(x,w1) + b1 )
    h2 = T.nnet.relu( T.dot(h1,w2) + b2 )
    y = T.nnet.softmax( h2 )
    t = T.lmatrix('t')
    f2 = theano.function(inputs = [x], outputs = y)
    print("------------------------")
    print("TEST MODEL IS READY!!")

    accuracy = calc_accuracy( test_set_t , f2(test_set_x) )
    print("*********")
    print("OUR ACCURACY :", accuracy*100, " PER CENTO !!" )
    print("*********")
# loss-functionの描写
    plt.plot( costs ,'-')
    plt.show()
Example #55
0
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(tensor.flatten(x, outdim=2))
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train", ))
    mnist_test = MNIST(("test", ))

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             Flatten(DataStream.default_stream(
                                 mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                                     which_sources=('features', )),
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]))

    main_loop = MainLoop(algorithm,
                         Flatten(DataStream.default_stream(
                             mnist_train,
                             iteration_scheme=SequentialScheme(
                                 mnist_train.num_examples, 50)),
                                 which_sources=('features', )),
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
Example #56
0
def test_beam_search():
    """Test beam search using the model from the reverse_words demo.

    Ideally this test should be done with a trained model, but so far
    only with a randomly initialized one. So it does not really test
    the ability to find the best output sequence, but only correctness
    of returned costs.

    """
    rng = numpy.random.RandomState(1234)
    alphabet_size = 20
    beam_size = 10
    length = 15

    reverser = WordReverser(10, alphabet_size)
    reverser.weights_init = reverser.biases_init = IsotropicGaussian(0.5)
    reverser.initialize()

    inputs = tensor.lmatrix('inputs')
    samples, = VariableFilter(bricks=[reverser.generator], name="outputs")(
        ComputationGraph(reverser.generate(inputs)))

    input_vals = numpy.tile(rng.randint(alphabet_size, size=(length,)),
                            (beam_size, 1)).T

    search = BeamSearch(10, samples)
    results, mask, costs = search.search({inputs: input_vals},
                                         0, 3 * length)

    true_costs = reverser.cost(
        input_vals, numpy.ones((length, beam_size), dtype=floatX),
        results, mask).eval()
    true_costs = (true_costs * mask).sum(axis=0)
    assert_allclose(costs, true_costs, rtol=1e-5)
def test_ctc_targets():
    LENGTH = 20
    BATCHES = 4
    CLASSES = 2
    y_hat = T.tensor3('features')
    input_mask = T.matrix('features_mask')
    y_hat_mask = input_mask
    y = T.lmatrix('phonemes')
    y_mask = T.matrix('phonemes_mask')
    ctc_target = ctc_cost.get_targets(y, T.log(y_hat), y_mask, y_hat_mask)
    Y_hat = np.zeros((LENGTH, BATCHES, CLASSES + 1), dtype=floatX)
    Y_hat[:, :, 0] = .7
    Y_hat[:, :, 1] = .2
    Y_hat[:, :, 2] = .1
    Y_hat[3, :, 0] = .3
    Y_hat[3, :, 1] = .4
    Y_hat[3, :, 2] = .3
    Y = np.zeros((2, BATCHES), dtype='int64')
    Y_hat_mask = np.ones((LENGTH, BATCHES), dtype=floatX)
    Y_hat_mask[-5:] = 0
    # default blank symbol is the highest class index (3 in this case)
    Y_mask = np.asarray(np.ones_like(Y), dtype=floatX)
    target = ctc_target.eval({y_hat: Y_hat, y: Y,
                              y_hat_mask: Y_hat_mask, y_mask: Y_mask})
    # Note that this part is the same as the cross entropy gradient
    grad = -target / Y_hat
    test_grad = finite_diff(Y, Y_hat, Y_mask, Y_hat_mask, eps=1e-2, n_steps=5)
    testing.assert_almost_equal(grad.flatten()[:5],
                                test_grad.flatten()[:5], decimal=3)
Example #58
0
    def test_minibatch_fn(self):
        """
        Returns a theano function that evaluates a dataset
        """
        X = T.lmatrix('X_test')
        L_x = T.lvector('L_X_test')

        Y = T.lmatrix('Y_test')
        L_y = T.lvector('L_y_test')

        precision, recall = self.evaluate(X, L_x, Y, L_y)

        return theano.function(
            inputs=[X, L_x, Y, L_y],
            outputs=[precision, recall]
        )