Example #1
0
def trainer_tester(mapping,train_data,test_data):
	data = theano.shared(train_data)
	test_data = theano.shared(test_data)
	init_weights = 0.1*np.random.randn(len(mapping),2,100)
	W = theano.shared(init_weights)

	matches = T.wmatrix('matches')
	weights = T.dtensor3('weights')
	t_matches = T.wmatrix('t_matches')
	delta   = theano.shared(np.zeros(init_weights.shape))

	cost, accuracy = cost_fn(matches,weights)
	log_loss_fn = log_loss(t_matches,weights)
	grad = T.grad(cost,wrt=weights)
	train = theano.function(
			inputs = [],
			outputs = cost,
			givens  = { matches: data, weights: W },
			updates = [
				(W, W - 0.1*( grad + 0.5 * delta )),
				(delta, 0.1*( grad + 0.5 * delta ))
			]
		)
	test = theano.function(
			inputs = [],
			outputs = [log_loss_fn],
			givens  = { t_matches: test_data, weights: W }
		)
	return train,test,W
Example #2
0
def test_matrixmul():
    """
    Tests matrix multiplication for a range of different
    dtypes. Checks both normal and transpose multiplication
    using randomly generated matrices.
    """
    rng = np.random.RandomState(222)
    dtypes = [
        'int16', 'int32', 'int64', 'float64', 'float32'
    ]
    tensor_x = [
        tensor.wmatrix(),
        tensor.imatrix(),
        tensor.lmatrix(),
        tensor.dmatrix(),
        tensor.fmatrix()
    ]
    np_W, np_x, np_x_T = [], [], []
    for dtype in dtypes:
        if 'int' in dtype:
            np_W.append(rng.randint(
                -10, 10, rng.random_integers(5, size=2)
            ).astype(dtype))
            np_x.append(rng.randint(
                -10, 10, (rng.random_integers(5),
                          np_W[-1].shape[0])
            ).astype(dtype))
            np_x_T.append(rng.randint(
                -10, 10, (rng.random_integers(5),
                          np_W[-1].shape[1])
            ).astype(dtype))
        elif 'float' in dtype:
            np_W.append(rng.uniform(
                -1, 1, rng.random_integers(5, size=2)
            ).astype(dtype))
            np_x.append(rng.uniform(
                -10, 10, (rng.random_integers(5),
                          np_W[-1].shape[0])
            ).astype(dtype))
            np_x.append(rng.uniform(
                -10, 10, (rng.random_integers(5),
                          np_W[-1].shape[1])
            ).astype(dtype))
        else:
            assert False

    def sharedW(value, dtype):
        return theano.shared(theano._asarray(value, dtype=dtype))
    tensor_W = [sharedW(W, dtype) for W in np_W]
    matrixmul = [MatrixMul(W) for W in tensor_W]
    assert all(mm.get_params()[0] == W for mm, W in zip(matrixmul, tensor_W))

    fn = [theano.function([x], mm.lmul(x))
          for x, mm in zip(tensor_x, matrixmul)]
    fn_T = [theano.function([x], mm.lmul_T(x))
            for x, mm in zip(tensor_x, matrixmul)]
    for W, x, x_T, f, f_T in zip(np_W, np_x, np_x_T, fn, fn_T):
        np.testing.assert_allclose(f(x), np.dot(x, W))
        np.testing.assert_allclose(f_T(x_T), np.dot(x_T, W.T))
Example #3
0
def test_matrixmul():
    """
    Tests for projection
    """
    rng = np.random.RandomState(222)
    dtypes = [
        'int16', 'int32', 'int64'
    ]
    tensor_x = [
        tensor.wmatrix(),
        tensor.imatrix(),
        tensor.lmatrix(),
        tensor.wvector(),
        tensor.ivector(),
        tensor.lvector()
    ]
    np_W, np_x = [], []
    for dtype in dtypes:
        np_W.append(rng.rand(10, np.random.randint(1, 10)))
        np_x.append(rng.randint(
            0, 10, (rng.random_integers(5),
                    rng.random_integers(5))
        ).astype(dtype))
    for dtype in dtypes:
        np_W.append(rng.rand(10, np.random.randint(1, 10)))
        np_x.append(
            rng.randint(0, 10, (rng.random_integers(5),)).astype(dtype)
        )

    tensor_W = [sharedX(W) for W in np_W]
    matrixmul = [MatrixMul(W) for W in tensor_W]
    assert all(mm.get_params()[0] == W for mm, W in zip(matrixmul, tensor_W))

    fn = [theano.function([x], mm.project(x))
          for x, mm in zip(tensor_x, matrixmul)]
    for W, x, f in zip(np_W, np_x, fn):
        W_x = W[x]
        if x.ndim == 2:
            W_x = W_x.reshape((W_x.shape[0], np.prod(W_x.shape[1:])))
        else:
            W_x = W_x.flatten()
        np.testing.assert_allclose(f(x), W_x)
Example #4
0
def objective_train_model(params):
    # Initialise parameters
    start = timeit.default_timer()
    print(params)
    num_lstm_units = int(params['num_lstm_units'])
    num_lstm_layers = int(params['num_lstm_layers'])
    num_dense_layers = int(params['num_dense_layers'])
    num_dense_units = int(params['num_dense_units'])
    num_epochs = params['num_epochs']
    learn_rate = params['learn_rate']
    mb_size = params['mb_size']
    l2reg = params['l2reg']
    rng_seed = params['rng_seed']
    #%%
    # Load training data
    path = 'saved_data'
    brancharray = numpy.load(os.path.join(path, 'train/branch_arrays.npy'))
    num_features = numpy.shape(brancharray)[-1]
    train_mask = numpy.load(os.path.join(path,
                                         'train/mask.npy')).astype(numpy.int16)
    train_label = numpy.load(os.path.join(path, 'train/padlabel.npy'))
    train_rmdoublemask = numpy.load(
        os.path.join(path, 'train/rmdoublemask.npy')).astype(numpy.int16)
    train_rmdoublemask = train_rmdoublemask.flatten()
    #%%
    numpy.random.seed(rng_seed)
    rng_inst = numpy.random.RandomState(rng_seed)
    lasagne.random.set_rng(rng_inst)
    input_var = T.ftensor3('inputs')
    mask = T.wmatrix('mask')
    target_var = T.ivector('targets')
    rmdoublesmask = T.wvector('rmdoublemask')
    # Build network
    network = build_nn(input_var,
                       mask,
                       num_features,
                       num_lstm_layers=num_lstm_layers,
                       num_lstm_units=num_lstm_units,
                       num_dense_layers=num_dense_layers,
                       num_dense_units=num_dense_units)
    # This function returns the values of the parameters
    # of all layers below one or more given Layer instances,
    # including the layer(s) itself.

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):

    prediction = lasagne.layers.get_output(network)

    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss * rmdoublesmask
    loss = lasagne.objectives.aggregate(loss, mask.flatten())
    # regularisation

    l2_penalty = l2reg * lasagne.regularization.regularize_network_params(
        network, lasagne.regularization.l2)
    loss = loss + l2_penalty

    # We could add some weight decay as well here, see lasagne.regularization.
    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Adadelta
    parameters = lasagne.layers.get_all_params(network, trainable=True)
    my_updates = lasagne.updates.adam(loss,
                                      parameters,
                                      learning_rate=learn_rate)
    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)

    test_loss = lasagne.objectives.categorical_crossentropy(
        prediction, target_var)
    test_loss = test_loss * rmdoublesmask
    test_loss = lasagne.objectives.aggregate(test_loss, mask.flatten())

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function(
        inputs=[input_var, mask, rmdoublesmask, target_var],
        outputs=loss,
        updates=my_updates,
        on_unused_input='warn')

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, mask, rmdoublesmask, target_var],
                             [test_loss, test_prediction],
                             on_unused_input='warn')
    #%%
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # print("Epoch {} ".format(epoch))
        train_err = 0
        # In each epoch, we do a full pass over the training data:
        for batch in iterate_minibatches(brancharray,
                                         train_mask,
                                         train_rmdoublemask,
                                         train_label,
                                         mb_size,
                                         shuffle=False):
            inputs, mask, rmdmask, targets = batch
            train_err += train_fn(inputs, mask, rmdmask, targets)

#%%
# Load development data
    dev_brancharray = numpy.load(os.path.join(path, 'dev/branch_arrays.npy'))
    dev_mask = numpy.load(os.path.join(path,
                                       'dev/mask.npy')).astype(numpy.int16)
    dev_label = numpy.load(os.path.join(path, 'dev/padlabel.npy'))

    dev_rmdoublemask = numpy.load(os.path.join(
        path, 'dev/rmdoublemask.npy')).astype(numpy.int16).flatten()

    with open(os.path.join(path, 'dev/ids.pkl'), 'rb') as handle:
        dev_ids_padarray = pickle.load(handle)

#%%
# get predictions for development set
    err, val_ypred = val_fn(dev_brancharray, dev_mask, dev_rmdoublemask,
                            dev_label.flatten())
    val_ypred = numpy.argmax(val_ypred, axis=1).astype(numpy.int32)

    acv_label = dev_label.flatten()
    acv_prediction = numpy.asarray(val_ypred)
    acv_mask = dev_mask.flatten()
    clip_dev_label = [o for o, m in zip(acv_label, acv_mask) if m == 1]
    clip_dev_ids = [o for o, m in zip(dev_ids_padarray, acv_mask) if m == 1]
    clip_dev_prediction = [
        o for o, m in zip(acv_prediction, acv_mask) if m == 1
    ]
    # remove repeating instances
    uniqtwid, uindices2 = numpy.unique(clip_dev_ids, return_index=True)
    uniq_dev_label = [clip_dev_label[i] for i in uindices2]
    uniq_dev_prediction = [clip_dev_prediction[i] for i in uindices2]
    uniq_dev_id = [clip_dev_ids[i] for i in uindices2]
    dev_accuracy = accuracy_score(uniq_dev_label, uniq_dev_prediction)
    mactest_P, mactest_R, mactest_F, _ = precision_recall_fscore_support(
        uniq_dev_label, uniq_dev_prediction, average='macro')
    mictest_P, mictest_R, mictest_F, _ = precision_recall_fscore_support(
        uniq_dev_label, uniq_dev_prediction, average='micro')
    test_P, test_R, test_F, _ = precision_recall_fscore_support(
        uniq_dev_label, uniq_dev_prediction)
    # to change scoring objective you need to change 'loss'
    output = {
        'loss': 1 - dev_accuracy,
        'status': STATUS_OK,
        'Params': params,
        'Macro': {
            'Macro_Precision': mactest_P,
            'Macro_Recall': mactest_R,
            'macro_F_score': mactest_F
        },
        'Micro': {
            'Micro_Precision': mictest_P,
            'Micro_Recall': mictest_R,
            'micro_F_score': mictest_F
        },
        'Support': {
            'Support_Precision': test_P[0],
            'Support_Recall': test_R[0],
            'Support_F_score': test_F[0]
        },
        'Comment': {
            'Comment_Precision': test_P[1],
            'Comment_Recall': test_R[1],
            'Comment_F_score': test_F[1]
        },
        'Deny': {
            'Deny_Precision': test_P[2],
            'Deny_Recall': test_R[2],
            'Deny_F_score': test_F[2]
        },
        'Appeal': {
            'Appeal_Precision': test_P[3],
            'Appeal_Recall': test_R[3],
            'Appeal_F_score': test_F[3]
        },
        'attachments': {
            'Labels': pickle.dumps(uniq_dev_label),
            'Predictions': pickle.dumps(uniq_dev_prediction),
            'ID': pickle.dumps(uniq_dev_id)
        }
    }

    print("1-accuracy loss = ", output['loss'])

    stop = timeit.default_timer()
    print("Time: ", stop - start)
    return output
def run():
    configs = [0]
    for config in configs:
        bs = 48
        feature_dim = 4000

        from uniform_dataset import UniformDataset
        data_test = UniformDataset(bs=bs,
                                   filename='/ssd2/hmdb/hmdb-tdd-1.hdf5',
                                   which_sets=['test'],
                                   sources=['features', 'time_mask', 'labels'])

        test_stream = DataStream.default_stream(
            data_test,
            iteration_scheme=SequentialScheme(data_test.num_examples, bs))

        x = T.tensor3('features')
        time_mask = T.wmatrix('time_mask')
        y = T.imatrix('labels')

        classes = eval(sys.argv[1])
        outputs = []
        for clas in classes:
            print 'Loading', clas
            model = cPickle.load(open('models/learned_' + str(clas), 'rb'))
            prob, loss, (tp, tn, fp, fn) = model.run(x, time_mask, y)
            prob.name = 'prob_' + str(clas)

            outputs += [prob]
        # prob is Nx1
        # outputs is 51xNx1
        # stack and take max along 51-class index
        outputs = T.stacklists(outputs)
        preds = T.argmax(outputs, axis=0)

        # predicted class is now outputs
        # which is shape Nx1, reshape to vector of N
        preds = preds.reshape((preds.shape[0], 1))

        num_err = T.neq(preds, y).sum()
        acc = 1 - (num_err / y.shape[0])

        test_func = theano.function([x, time_mask, y],
                                    outputs,
                                    on_unused_input='warn')

        data = test_stream.get_epoch_iterator(as_dict=True)
        total_acc = 0
        num = 0
        res = None
        labs = None
        for batch in data:
            o = test_func(batch['features'], batch['time_mask'],
                          batch['labels'])
            if res is None:
                res = o
                labs = batch['labels']
            else:
                # append on axis 1, to get 51xDs_size
                res = np.append(res, o, axis=1)
                labs = np.append(labs, batch['labels'], axis=0)
            continue

            total_acc += acc
            num += 1
            print acc
        np.save('results' + sys.argv[2], res)
        np.save('labs' + sys.argv[2], labs)
Example #6
0
	def __init__(
		self,
		rng,
		batchsize=100,
		activation=tanh
	):
		
		import load
		(num_sent, word_cnt, max_sen_len, k_wrd, x_wrd, y) \
		= load.read("tweets_clean.txt")


		dim_word = 100
		cl_word = 300
		k_wrd = 5
		vocab_size = word_cnt
		n_hidden = 300

		data_train,\
		data_test,\
		target_train,\
		target_test\
		= train_test_split(x_wrd, y, random_state=1234, test_size=0.1)

		x_train = theano.shared(np.asarray(data_train, dtype='int16'), borrow=True)
		y_train = theano.shared(np.asarray(target_train, dtype='int32'), borrow=True)
		x_test = theano.shared(np.asarray(data_test, dtype='int16'), borrow=True)
		y_test = theano.shared(np.asarray(target_test, dtype='int32'), borrow=True)

		self.n_train_batches = x_train.get_value(borrow=True).shape[0] / batchsize
		self.n_test_batches = x_test.get_value(borrow=True).shape[0] / batchsize


		
		"""symbol definition"""
		index = T.iscalar()
		x = T.wmatrix('x')
		y = T.ivector('y')
		train = T.iscalar('train')


		layer_embed_input = x#.reshape((batchsize, max_sen_len))

		layer_embed = EmbedIDLayer(
			rng,
			layer_embed_input,
			n_input=vocab_size,
			n_output=dim_word,
		)

		layer1_input = layer_embed.output.reshape((batchsize, 1, max_sen_len, dim_word))

		layer1 = ConvolutionalLayer(
			rng,
			layer1_input,
			filter_shape=(cl_word, 1, k_wrd, dim_word),#1は入力チャネル数
			image_shape=(batchsize, 1, max_sen_len, dim_word),
			activation=activation
		)

		layer2 = MaxPoolingLayer(
			layer1.output,
			poolsize=(max_sen_len-k_wrd+1, 1)
		)

		layer3_input = layer2.output.reshape((batchsize, cl_word))

		layer3 = FullyConnectedLayer(
			rng,
			dropout(rng, layer3_input, train),
			n_input=cl_word,
			n_output=n_hidden,
			activation=activation
		)

		layer4 = FullyConnectedLayer(
			rng,
			dropout(rng, layer3.output, train),
			n_input=n_hidden,
			n_output=2,
			activation=None
		)

		result = Result(layer4.output, y)
		# loss = result.negative_log_likelihood()
		loss = result.cross_entropy()
		accuracy = result.accuracy()
		params = layer4.params + layer3.params + layer1.params + layer_embed.params
		# updates = AdaDelta(params=params).updates(loss)
		updates = RMSprop(learning_rate=0.001, params=params).updates(loss)
		

		self.train_model = theano.function(
			inputs=[index],
			outputs=[loss, accuracy],
			updates=updates,
			givens={
				x: x_train[index*batchsize: (index+1)*batchsize],
				y: y_train[index*batchsize: (index+1)*batchsize],
				train: np.cast['int32'](1)
			}
		)

		self.test_model = theano.function(
			inputs=[index],
			outputs=[loss, accuracy],
			givens={
				x: x_test[index*batchsize: (index+1)*batchsize],
				y: y_test[index*batchsize: (index+1)*batchsize],
				train: np.cast['int32'](0)
			}
		)
Example #7
0
def eval_train_model(params):
    print("Retrain model on train+dev set and evaluate on testing set")
    # Initialise parameters
    num_lstm_units = int(params['num_lstm_units'])
    num_lstm_layers = int(params['num_lstm_layers'])
    num_dense_layers = int(params['num_dense_layers'])
    num_dense_units = int(params['num_dense_units'])
    num_epochs = params['num_epochs']
    learn_rate = params['learn_rate']
    mb_size = params['mb_size']
    l2reg = params['l2reg']
    rng_seed = params['rng_seed']
    #%%
    # Load data
    path = 'saved_data'
    brancharray = numpy.load(os.path.join(path, 'train/branch_arrays.npy'))
    num_features = numpy.shape(brancharray)[-1]
    train_mask = numpy.load(os.path.join(path,
                                         'train/mask.npy')).astype(numpy.int16)
    train_label = numpy.load(os.path.join(path, 'train/padlabel.npy'))

    train_rmdoublemask = numpy.load(
        os.path.join(path, 'train/rmdoublemask.npy')).astype(numpy.int16)
    train_rmdoublemask = train_rmdoublemask.flatten()
    #%%
    numpy.random.seed(rng_seed)
    rng_inst = numpy.random.RandomState(rng_seed)
    lasagne.random.set_rng(rng_inst)
    input_var = T.ftensor3('inputs')
    mask = T.wmatrix('mask')
    target_var = T.ivector('targets')
    rmdoublesmask = T.wvector('rmdoublemask')
    # Build network
    network = build_nn(input_var,
                       mask,
                       num_features,
                       num_lstm_layers=num_lstm_layers,
                       num_lstm_units=num_lstm_units,
                       num_dense_layers=num_dense_layers,
                       num_dense_units=num_dense_units)
    # This function returns the values of the parameters of all
    # layers below one or more given Layer instances,
    # including the layer(s) itself.

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss * rmdoublesmask
    loss = lasagne.objectives.aggregate(loss, mask.flatten())
    # regularisation
    l2_penalty = l2reg * lasagne.regularization.regularize_network_params(
        network, lasagne.regularization.l2)
    loss = loss + l2_penalty

    # We could add some weight decay as well here, see lasagne.regularization.
    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step.
    parameters = lasagne.layers.get_all_params(network, trainable=True)
    my_updates = lasagne.updates.adam(loss,
                                      parameters,
                                      learning_rate=learn_rate)
    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function(
        inputs=[input_var, mask, rmdoublesmask, target_var],
        outputs=loss,
        updates=my_updates,
        on_unused_input='warn')
    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, mask],
                             test_prediction,
                             on_unused_input='warn')
    #%%
    # READ THE DATA
    dev_brancharray = numpy.load(os.path.join(path, 'dev/branch_arrays.npy'))
    dev_mask = numpy.load(os.path.join(path,
                                       'dev/mask.npy')).astype(numpy.int16)
    dev_label = numpy.load(os.path.join(path, 'dev/padlabel.npy'))

    dev_rmdoublemask = numpy.load(os.path.join(
        path, 'dev/rmdoublemask.npy')).astype(numpy.int16).flatten()

    with open(os.path.join(path, 'dev/ids.pkl'), 'rb') as handle:
        dev_ids_padarray = pickle.load(handle)

    test_brancharray = numpy.load(os.path.join(path, 'test/branch_arrays.npy'))
    test_mask = numpy.load(os.path.join(path,
                                        'test/mask.npy')).astype(numpy.int16)

    test_rmdoublemask = numpy.load(os.path.join(
        path, 'test/rmdoublemask.npy')).astype(numpy.int16).flatten()

    with open(os.path.join(path, 'test/ids.pkl'), 'rb') as handle:
        test_ids_padarray = pickle.load(handle)

#%%
#start training loop
# We iterate over epochs:
    for epoch in range(num_epochs):
        #print("Epoch {} ".format(epoch))
        train_err = 0
        # In each epoch, we do a full pass over the training data:
        for batch in iterate_minibatches(brancharray,
                                         train_mask,
                                         train_rmdoublemask,
                                         train_label,
                                         mb_size,
                                         max_seq_len=25,
                                         shuffle=False):
            inputs, mask, rmdmask, targets = batch
            train_err += train_fn(inputs, mask, rmdmask, targets)
        for batch in iterate_minibatches(dev_brancharray,
                                         dev_mask,
                                         dev_rmdoublemask,
                                         dev_label,
                                         mb_size,
                                         max_seq_len=20,
                                         shuffle=False):
            inputs, mask, rmdmask, targets = batch
            train_err += train_fn(inputs, mask, rmdmask, targets)
    # And a full pass over the test data:
    test_ypred = val_fn(test_brancharray, test_mask)
    # get class label instead of probabilities
    new_test_ypred = numpy.argmax(test_ypred, axis=1).astype(numpy.int32)

    #Take mask into account
    acv_prediction = numpy.asarray(new_test_ypred)
    acv_mask = test_mask.flatten()
    clip_dev_ids = [o for o, m in zip(test_ids_padarray, acv_mask) if m == 1]
    clip_dev_prediction = [
        o for o, m in zip(acv_prediction, acv_mask) if m == 1
    ]
    # remove repeating instances
    uniqtwid, uindices2 = numpy.unique(clip_dev_ids, return_index=True)
    uniq_dev_prediction = [clip_dev_prediction[i] for i in uindices2]
    uniq_dev_id = [clip_dev_ids[i] for i in uindices2]
    output = {
        'status': STATUS_OK,
        'Params': params,
        'attachments': {
            'Predictions': pickle.dumps(uniq_dev_prediction),
            'ID': pickle.dumps(uniq_dev_id)
        }
    }

    return output
Example #8
0
    def __init__(self, rng, batchsize=100, activation=tanh):

        import load
        (num_sent, word_cnt, max_sen_len, k_wrd, x_wrd, y) \
        = load.read("tweets_clean.txt")

        dim_word = 100
        cl_word = 300
        k_wrd = 5
        vocab_size = word_cnt
        n_hidden = 300

        data_train,\
        data_test,\
        target_train,\
        target_test\
        = train_test_split(x_wrd, y, random_state=1234, test_size=0.1)

        x_train = theano.shared(np.asarray(data_train, dtype='int16'),
                                borrow=True)
        y_train = theano.shared(np.asarray(target_train, dtype='int32'),
                                borrow=True)
        x_test = theano.shared(np.asarray(data_test, dtype='int16'),
                               borrow=True)
        y_test = theano.shared(np.asarray(target_test, dtype='int32'),
                               borrow=True)

        self.n_train_batches = x_train.get_value(
            borrow=True).shape[0] / batchsize
        self.n_test_batches = x_test.get_value(
            borrow=True).shape[0] / batchsize
        """symbol definition"""
        index = T.iscalar()
        x = T.wmatrix('x')
        y = T.ivector('y')
        train = T.iscalar('train')

        layer_embed_input = x  #.reshape((batchsize, max_sen_len))

        layer_embed = EmbedIDLayer(
            rng,
            layer_embed_input,
            n_input=vocab_size,
            n_output=dim_word,
        )

        layer1_input = layer_embed.output.reshape(
            (batchsize, 1, max_sen_len, dim_word))

        layer1 = ConvolutionalLayer(
            rng,
            layer1_input,
            filter_shape=(cl_word, 1, k_wrd, dim_word),  #1は入力チャネル数
            image_shape=(batchsize, 1, max_sen_len, dim_word),
            activation=activation)

        layer2 = MaxPoolingLayer(layer1.output,
                                 poolsize=(max_sen_len - k_wrd + 1, 1))

        layer3_input = layer2.output.reshape((batchsize, cl_word))

        layer3 = FullyConnectedLayer(rng,
                                     dropout(rng, layer3_input, train),
                                     n_input=cl_word,
                                     n_output=n_hidden,
                                     activation=activation)

        layer4 = FullyConnectedLayer(rng,
                                     dropout(rng, layer3.output, train),
                                     n_input=n_hidden,
                                     n_output=2,
                                     activation=None)

        result = Result(layer4.output, y)
        # loss = result.negative_log_likelihood()
        loss = result.cross_entropy()
        accuracy = result.accuracy()
        params = layer4.params + layer3.params + layer1.params + layer_embed.params
        # updates = AdaDelta(params=params).updates(loss)
        updates = RMSprop(learning_rate=0.001, params=params).updates(loss)

        self.train_model = theano.function(
            inputs=[index],
            outputs=[loss, accuracy],
            updates=updates,
            givens={
                x: x_train[index * batchsize:(index + 1) * batchsize],
                y: y_train[index * batchsize:(index + 1) * batchsize],
                train: np.cast['int32'](1)
            })

        self.test_model = theano.function(
            inputs=[index],
            outputs=[loss, accuracy],
            givens={
                x: x_test[index * batchsize:(index + 1) * batchsize],
                y: y_test[index * batchsize:(index + 1) * batchsize],
                train: np.cast['int32'](0)
            })
Example #9
0
    def __init__(
            self,
            rng,
            batchsize=100,
            activation=relu
    ):

        import char_load
        (num_sent, char_cnt, word_cnt, max_word_len, max_sen_len, \
         k_chr, k_wrd, x_chr, x_wrd, y) = char_load.read("tweets_clean.txt")

        dim_word = 30
        dim_char = 5
        cl_word = 300
        cl_char = 50
        k_word = k_wrd
        k_char = k_chr

        data_train_word, \
        data_test_word, \
        data_train_char, \
        data_test_char, \
        target_train, \
        target_test \
            = train_test_split(x_wrd, x_chr, y, random_state=1234, test_size=0.1)

        x_train_word = theano.shared(np.asarray(data_train_word, dtype='int16'), borrow=True)
        x_train_char = theano.shared(np.asarray(data_train_char, dtype='int16'), borrow=True)
        y_train = theano.shared(np.asarray(target_train, dtype='int8'), borrow=True)
        x_test_word = theano.shared(np.asarray(data_test_word, dtype='int16'), borrow=True)
        x_test_char = theano.shared(np.asarray(data_test_char, dtype='int16'), borrow=True)
        y_test = theano.shared(np.asarray(target_test, dtype='int8'), borrow=True)

        self.n_train_batches = x_train_word.get_value(borrow=True).shape[0] / batchsize
        self.n_test_batches = x_test_word.get_value(borrow=True).shape[0] / batchsize

        """symbol definition"""
        index = T.iscalar()
        x_wrd = T.wmatrix('x_wrd')
        x_chr = T.wtensor3('x_chr')
        y = T.bvector('y')
        train = T.iscalar('train')

        """network definition"""
        layer_char_embed_input = x_chr  # .reshape((batchsize, max_sen_len, max_word_len))

        layer_char_embed = EmbedIDLayer(
            rng,
            layer_char_embed_input,
            n_input=char_cnt,
            n_output=dim_char
        )

        layer1_input = layer_char_embed.output.reshape(
            (batchsize * max_sen_len, 1, max_word_len, dim_char)
        )

        layer1 = ConvolutionalLayer(
            rng,
            layer1_input,
            filter_shape=(cl_char, 1, k_char, dim_char),  # cl_charフィルタ数
            image_shape=(batchsize * max_sen_len, 1, max_word_len, dim_char)
        )

        layer2 = MaxPoolingLayer(
            layer1.output,
            poolsize=(max_word_len - k_char + 1, 1)
        )

        layer_word_embed_input = x_wrd  # .reshape((batchsize, max_sen_len))

        layer_word_embed = EmbedIDLayer(
            rng,
            layer_word_embed_input,
            n_input=word_cnt,
            n_output=dim_word
        )

        layer3_word_input = layer_word_embed.output.reshape((batchsize, 1, max_sen_len, dim_word))
        layer3_char_input = layer2.output.reshape((batchsize, 1, max_sen_len, cl_char))

        layer3_input = T.concatenate(
            [layer3_word_input,
             layer3_char_input],
            axis=3
        )  # .reshape((batchsize, 1, max_sen_len, dim_word+cl_char))

        layer3 = ConvolutionalLayer(
            rng,
            layer3_input,
            filter_shape=(cl_word, 1, k_word, dim_word + cl_char),  # 1は入力チャネル数
            image_shape=(batchsize, 1, max_sen_len, dim_word + cl_char),
            activation=activation
        )

        layer4 = MaxPoolingLayer(
            layer3.output,
            poolsize=(max_sen_len - k_word + 1, 1)
        )

        layer5_input = layer4.output.reshape((batchsize, cl_word))

        layer5 = FullyConnectedLayer(
            rng,
            dropout(rng, layer5_input, train),
            n_input=cl_word,
            n_output=50,
            activation=activation
        )

        layer6_input = layer5.output

        layer6 = FullyConnectedLayer(
            rng,
            dropout(rng, layer6_input, train, p=0.1),
            n_input=50,
            n_output=2,
            activation=None
        )

        result = Result(layer6.output, y)
        loss = result.negative_log_likelihood()
        accuracy = result.accuracy()
        params = layer6.params \
                 + layer5.params \
                 + layer3.params \
                 + layer_word_embed.params \
                 + layer1.params \
                 + layer_char_embed.params
        updates = RMSprop(learning_rate=0.001, params=params).updates(loss)

        self.train_model = theano.function(
            inputs=[index],
            outputs=[loss, accuracy],
            updates=updates,
            givens={
                x_wrd: x_train_word[index * batchsize: (index + 1) * batchsize],
                x_chr: x_train_char[index * batchsize: (index + 1) * batchsize],
                y: y_train[index * batchsize: (index + 1) * batchsize],
                train: np.cast['int32'](1)
            }
        )

        self.test_model = theano.function(
            inputs=[index],
            outputs=[loss, accuracy],
            givens={
                x_wrd: x_test_word[index * batchsize: (index + 1) * batchsize],
                x_chr: x_test_char[index * batchsize: (index + 1) * batchsize],
                y: y_test[index * batchsize: (index + 1) * batchsize],
                train: np.cast['int32'](0)
            }
        )
Example #10
0
def test_matrixmul():
    """
    Tests matrix multiplication for a range of different
    dtypes. Checks both normal and transpose multiplication
    using randomly generated matrices.
    """
    rng = np.random.RandomState(222)
    dtypes = ['int16', 'int32', 'int64', 'float64', 'float32']
    tensor_x = [
        tensor.wmatrix(),
        tensor.imatrix(),
        tensor.lmatrix(),
        tensor.dmatrix(),
        tensor.fmatrix()
    ]
    np_W, np_x, np_x_T = [], [], []
    for dtype in dtypes:
        if 'int' in dtype:
            np_W.append(
                rng.randint(-10, 10,
                            rng.random_integers(5, size=2)).astype(dtype))
            np_x.append(
                rng.randint(
                    -10, 10,
                    (rng.random_integers(5), np_W[-1].shape[0])).astype(dtype))
            np_x_T.append(
                rng.randint(
                    -10, 10,
                    (rng.random_integers(5), np_W[-1].shape[1])).astype(dtype))
        elif 'float' in dtype:
            np_W.append(
                rng.uniform(-1, 1, rng.random_integers(5,
                                                       size=2)).astype(dtype))
            np_x.append(
                rng.uniform(
                    -10, 10,
                    (rng.random_integers(5), np_W[-1].shape[0])).astype(dtype))
            np_x.append(
                rng.uniform(
                    -10, 10,
                    (rng.random_integers(5), np_W[-1].shape[1])).astype(dtype))
        else:
            assert False

    def sharedW(value, dtype):
        return theano.shared(theano._asarray(value, dtype=dtype))

    tensor_W = [sharedW(W, dtype) for W in np_W]
    matrixmul = [MatrixMul(W) for W in tensor_W]
    assert all(mm.get_params()[0] == W for mm, W in zip(matrixmul, tensor_W))

    fn = [
        theano.function([x], mm.lmul(x)) for x, mm in zip(tensor_x, matrixmul)
    ]
    fn_T = [
        theano.function([x], mm.lmul_T(x))
        for x, mm in zip(tensor_x, matrixmul)
    ]
    for W, x, x_T, f, f_T in zip(np_W, np_x, np_x_T, fn, fn_T):
        np.testing.assert_allclose(f(x), np.dot(x, W))
        np.testing.assert_allclose(f_T(x_T), np.dot(x_T, W.T))
Example #11
0
	def __init__(
		self,
		rng,
		batchsize=100,
		activation=relu
	):
		
		import char_load
		(num_sent, char_cnt, word_cnt, max_word_len, max_sen_len,\
	    k_chr, k_wrd, x_chr, x_wrd, y) = char_load.read("tweets_clean.txt")

		dim_word = 30
		dim_char = 5
		cl_word = 300
		cl_char = 50
		k_word = k_wrd
		k_char = k_chr

		data_train_word,\
		data_test_word,\
		data_train_char,\
		data_test_char,\
		target_train,\
		target_test\
		= train_test_split(x_wrd, x_chr, y, random_state=1234, test_size=0.1)

		x_train_word = theano.shared(np.asarray(data_train_word, dtype='int16'), borrow=True)
		x_train_char = theano.shared(np.asarray(data_train_char, dtype='int16'), borrow=True)
		y_train = theano.shared(np.asarray(target_train, dtype='int8'), borrow=True)
		x_test_word = theano.shared(np.asarray(data_test_word, dtype='int16'), borrow=True)
		x_test_char = theano.shared(np.asarray(data_test_char, dtype='int16'), borrow=True)
		y_test = theano.shared(np.asarray(target_test, dtype='int8'), borrow=True)


		self.n_train_batches = x_train_word.get_value(borrow=True).shape[0] / batchsize
		self.n_test_batches = x_test_word.get_value(borrow=True).shape[0] / batchsize


		
		"""symbol definition"""
		index = T.iscalar()
		x_wrd = T.wmatrix('x_wrd')
		x_chr = T.wtensor3('x_chr')
		y = T.bvector('y')
		train = T.iscalar('train')

		"""network definition"""
		layer_char_embed_input = x_chr#.reshape((batchsize, max_sen_len, max_word_len))

		layer_char_embed = EmbedIDLayer(
			rng,
			layer_char_embed_input,
			n_input=char_cnt,
			n_output=dim_char
		)

		layer1_input = layer_char_embed.output.reshape(
			(batchsize*max_sen_len, 1, max_word_len, dim_char)
		)

		layer1 = ConvolutionalLayer(
			rng,
			layer1_input,
			filter_shape=(cl_char, 1, k_char, dim_char),# cl_charフィルタ数
			image_shape=(batchsize*max_sen_len, 1, max_word_len, dim_char)
		)

		layer2 = MaxPoolingLayer(
			layer1.output,
			poolsize=(max_word_len-k_char+1, 1)
		)

		layer_word_embed_input = x_wrd #.reshape((batchsize, max_sen_len))

		layer_word_embed = EmbedIDLayer(
			rng,
			layer_word_embed_input,
			n_input=word_cnt,
			n_output=dim_word
		)

		layer3_word_input = layer_word_embed.output.reshape((batchsize, 1, max_sen_len, dim_word))
		layer3_char_input = layer2.output.reshape((batchsize, 1, max_sen_len, cl_char))


		layer3_input = T.concatenate(
			[layer3_word_input,
			 layer3_char_input],
			axis=3
		)#.reshape((batchsize, 1, max_sen_len, dim_word+cl_char))


		layer3 = ConvolutionalLayer(
			rng,
			layer3_input,
			filter_shape=(cl_word, 1, k_word, dim_word + cl_char),#1は入力チャネル数
			image_shape=(batchsize, 1, max_sen_len, dim_word + cl_char),
			activation=activation
		)

		layer4 = MaxPoolingLayer(
			layer3.output,
			poolsize=(max_sen_len-k_word+1, 1)
		)

		layer5_input = layer4.output.reshape((batchsize, cl_word))

		layer5 = FullyConnectedLayer(
			rng,
			dropout(rng, layer5_input, train),
			n_input=cl_word,
			n_output=50,
			activation=activation
		)

		layer6_input = layer5.output

		layer6 = FullyConnectedLayer(
			rng,
			dropout(rng, layer6_input, train, p=0.1),
			n_input=50,
			n_output=2,
			activation=None
		)

		result = Result(layer6.output, y)
		loss = result.negative_log_likelihood()
		accuracy = result.accuracy()
		params = layer6.params\
				+layer5.params\
				+layer3.params\
				+layer_word_embed.params\
				+layer1.params\
				+layer_char_embed.params
		updates = RMSprop(learning_rate=0.001, params=params).updates(loss)

		self.train_model = theano.function(
			inputs=[index],
			outputs=[loss, accuracy],
			updates=updates,
			givens={
				x_wrd: x_train_word[index*batchsize: (index+1)*batchsize],
				x_chr: x_train_char[index*batchsize: (index+1)*batchsize],
				y: y_train[index*batchsize: (index+1)*batchsize],
				train: np.cast['int32'](1)
			}
		)

		self.test_model = theano.function(
			inputs=[index],
			outputs=[loss, accuracy],
			givens={
				x_wrd: x_test_word[index*batchsize: (index+1)*batchsize],
				x_chr: x_test_char[index*batchsize: (index+1)*batchsize],
				y: y_test[index*batchsize: (index+1)*batchsize],
				train: np.cast['int32'](0)
			}
		)
def run():
    report = file('report-hmdb-tdd.txt', 'w')
    max_time = 200
    configs = []
    cc = create_config
    for d in ['1', '2', '3']:
        configs.append(
            cc('tdd-max-pool-h-4000 ' + d, max_time, 4000, 'hmdb-tdd-1.hdf5', {
                'method': 'max',
                'hidden_size': 4000
            }, 'hidden_2_layer_model', 0.0001))
        configs.append(
            cc('tdd-mean-pool-h-4000 ' + d, max_time, 4000, 'hmdb-tdd.hdf5', {
                'method': 'mean',
                'hidden_size': 4000
            }, 'hidden_2_layer_model', 0.0001))
        configs.append(
            cc('tdd-sum-pool-h-4000 ' + d, max_time, 4000, 'hmdb-tdd.hdf5', {
                'method': 'sum',
                'hidden_size': 4000
            }, 'hidden_2_layer_model', 0.0005))

        configs.append(
            cc('tdd-spyramid-1-h-1000', max_time, 4000, 'hmdb-tdd.hdf5', {
                'levels': 1,
                'hidden_size': 1000
            }, 'temporal_pyramid_model'))
        configs.append(
            cc('tdd-spyramid-4-h-4000 ' + d, max_time, 4000,
               '/ssd2/hmdb/hmdb-tdd.hdf5', {
                   'levels': 4,
                   'hidden_size': 4000
               }, 'temporal_pyramid_model', 0.0001))

    for d in ['1', '2', '3']:
        for model in ['temporal_learned_model']:
            s = s + ' split=' + d
            for num_f in [3]:
                configs.append(
                    cc('tdd-pyramid-1-N-' + str(num_f) + '-h-1000' + s,
                       max_time, 4000, 'hmdb-tdd.hdf5', {
                           'levels': 1,
                           'hidden_size': 1000,
                           'N': num_f
                       }, model, 0.05))

    for config in configs:
        name = config['name']
        epochs = 250
        subdir = name + "-" + time.strftime("%Y%m%d-%H%M%S")
        if not os.path.isdir(subdir):
            os.mkdir(subdir)

        bs = 100  #int(sys.argv[1])
        max_time = config['max_time']  #int(sys.argv[2])
        feature_dim = config['feature_dim']  #int(sys.argv[3])

        from uniform_dataset import UniformDataset
        data_train = UniformDataset(
            bs=bs,
            filename=config['filename'],
            which_sets=['train'],
            sources=['features', 'time_mask', 'labels'])
        data_test = UniformDataset(bs=bs,
                                   filename=config['filename'],
                                   which_sets=['test'],
                                   sources=['features', 'time_mask', 'labels'])

        train_stream = DataStream.default_stream(
            data_train,
            iteration_scheme=SequentialScheme(data_train.num_examples, bs))
        test_stream = DataStream.default_stream(
            data_test,
            iteration_scheme=SequentialScheme(data_test.num_examples, bs))

        x = T.tensor3('features')
        time_mask = T.wmatrix('time_mask')
        y = T.imatrix('labels')

        mod = importlib.import_module(config['model'])
        classes = 51
        model = mod.TemporalModel([x, time_mask, y], bs, max_time, classes,
                                  feature_dim, **config['model_kwargs'])

        prob, pred, loss, error, acc = model.run(x, time_mask, y)
        prob.name = 'prob'
        acc.name = 'acc'
        pred.name = 'pred'
        loss.name = 'loss'
        error.name = 'error'

        model._outputs = [prob, pred, loss, error, acc]

        params = model.params

        #        from solvers.sgd import SGD as solver
        from solvers.RMSProp import RMSProp as solver
        updates = solver(loss, params, lr=config['lr'], clipnorm=10.0)
        for i, u in enumerate(updates):
            if u[0].name == 'g' or u[0].name == 'sigma' or u[0].name == 'd':
                updates[i] = (u[0], T.mean(u[1]).dimshuffle(['x']))

        model._updates = updates

        # ============= TRAIN =========
        plots = [['train_loss', 'test_loss'], ['train_acc', 'test_acc']]
        main_loop = MainLoop(
            model,
            train_stream,
            [
                FinishAfter(epochs),
                Track(variables=['loss', 'error', 'acc'], prefix='train'),
                DataStreamTrack(test_stream, ['loss', 'error', 'acc'],
                                prefix='test',
                                best_method=[min, min, max]),
                #SaveModel(subdir, name+'.model'),
                TimeProfile(),
                Report(os.path.join(subdir, 'report.txt'), name=name),
                Printing()
            ])
        main_loop.run()
        config['best_acc'] = main_loop.log.current_row['best_test_acc']
        print >> report, config['name'], 'best test acc', config['best_acc']
        report.flush()

    print ''.join(79 * '-')
    print 'FINAL REPORT'
    print ''.join(79 * '-')

    for config in configs:
        print config['name'], 'best test acc', config['best_acc']
Example #13
0
    def __init__(self, rng, batchsize=100, activation=relu):

        import loader
        (numsent, charcnt, wordcnt, maxwordlen, maxsenlen,\
        kchr, kwrd, xchr, xwrd, y) = loader.read("tweets_clean.txt")

        dimword = 30
        dimchar = 5
        clword = 300
        clchar = 50
        kword = kwrd
        kchar = kchr

        datatrainword,\
        datatestword,\
        datatrainchar,\
        datatestchar,\
        targettrain,\
        targettest\
        = train_test_split(xwrd, xchr, y, random_state=1234, test_size=0.1)

        xtrainword = theano.shared(np.asarray(datatrainword, dtype='int16'),
                                   borrow=True)
        xtrainchar = theano.shared(np.asarray(datatrainchar, dtype='int16'),
                                   borrow=True)
        ytrain = theano.shared(np.asarray(targettrain, dtype='int8'),
                               borrow=True)
        xtestword = theano.shared(np.asarray(datatestword, dtype='int16'),
                                  borrow=True)
        xtestchar = theano.shared(np.asarray(datatestchar, dtype='int16'),
                                  borrow=True)
        ytest = theano.shared(np.asarray(targettest, dtype='int8'),
                              borrow=True)

        self.ntrainbatches = xtrainword.get_value(
            borrow=True).shape[0] / batchsize
        self.ntestbatches = xtestword.get_value(
            borrow=True).shape[0] / batchsize

        index = T.iscalar()
        xwrd = T.wmatrix('xwrd')
        xchr = T.wtensor3('xchr')
        y = T.bvector('y')
        train = T.iscalar('train')

        layercharembedinput = xchr

        layercharembed = EmbedIDLayer(rng,
                                      layercharembedinput,
                                      ninput=charcnt,
                                      noutput=dimchar)

        layer1input = layercharembed.output.reshape(
            (batchsize * maxsenlen, 1, maxwordlen, dimchar))

        layer1 = ConvolutionalLayer(rng,
                                    layer1input,
                                    filter_shape=(clchar, 1, kchar, dimchar),
                                    image_shape=(batchsize * maxsenlen, 1,
                                                 maxwordlen, dimchar))

        layer2 = MaxPoolingLayer(layer1.output,
                                 poolsize=(maxwordlen - kchar + 1, 1))

        layerwordembedinput = xwrd

        layerwordembed = EmbedIDLayer(rng,
                                      layerwordembedinput,
                                      ninput=wordcnt,
                                      noutput=dimword)

        layer3wordinput = layerwordembed.output.reshape(
            (batchsize, 1, maxsenlen, dimword))
        layer3charinput = layer2.output.reshape(
            (batchsize, 1, maxsenlen, clchar))

        layer3input = T.concatenate([layer3wordinput, layer3charinput], axis=3)

        layer3 = ConvolutionalLayer(rng,
                                    layer3input,
                                    filter_shape=(clword, 1, kword,
                                                  dimword + clchar),
                                    image_shape=(batchsize, 1, maxsenlen,
                                                 dimword + clchar),
                                    activation=activation)

        layer4 = MaxPoolingLayer(layer3.output,
                                 poolsize=(maxsenlen - kword + 1, 1))

        layer5input = layer4.output.reshape((batchsize, clword))

        layer5 = FullyConnectedLayer(rng,
                                     dropout(rng, layer5input, train),
                                     ninput=clword,
                                     noutput=50,
                                     activation=activation)

        layer6input = layer5.output

        layer6 = FullyConnectedLayer(rng,
                                     dropout(rng, layer6input, train, p=0.1),
                                     ninput=50,
                                     noutput=2,
                                     activation=None)

        result = Result(layer6.output, y)
        loss = result.negativeloglikelihood()
        accuracy = result.accuracy()
        params = layer6.params\
                +layer5.params\
                +layer3.params\
                +layerwordembed.params\
                +layer1.params\
                +layercharembed.params
        updates = RMSprop(learningrate=0.001, params=params).updates(loss)

        self.trainmodel = theano.function(
            inputs=[index],
            outputs=[loss, accuracy],
            updates=updates,
            givens={
                xwrd: xtrainword[index * batchsize:(index + 1) * batchsize],
                xchr: xtrainchar[index * batchsize:(index + 1) * batchsize],
                y: ytrain[index * batchsize:(index + 1) * batchsize],
                train: np.cast['int32'](1)
            })

        self.testmodel = theano.function(
            inputs=[index],
            outputs=[loss, accuracy],
            givens={
                xwrd: xtestword[index * batchsize:(index + 1) * batchsize],
                xchr: xtestchar[index * batchsize:(index + 1) * batchsize],
                y: ytest[index * batchsize:(index + 1) * batchsize],
                train: np.cast['int32'](0)
            })
def run():
    report = file('report-hmdb-tdd-binary.txt', 'w')
    max_time = 200
    configs = []
    cc = create_config
    for d in ['1', '2', '3']:
        configs.append(
            cc('tdd-max-pool-h-4000 ' + d, max_time, 4000, 'hmdb-tdd-1.hdf5', {
                'method': 'max',
                'hidden_size': 4000
            }, 'baseline_binary_model', 0.01))
        configs.append(
            cc('tdd-mean-pool-h-4000 ' + d, max_time, 4000, 'hmdb-tdd.hdf5', {
                'method': 'mean',
                'hidden_size': 4000
            }, 'baseline_binary_model', 0.0001))
        configs.append(
            cc('tdd-sum-pool-h-4000 ' + d, max_time, 4000, 'hmdb-tdd.hdf5', {
                'method': 'sum',
                'hidden_size': 4000
            }, 'baseline_binary_model', 0.0005))

        #configs.append(cc('tdd-spyramid-1-h-1000', max_time, 4000, 'hmdb-tdd.hdf5', {'levels':1, 'hidden_size':1000}, 'temporal_pyramid_model'))


#        configs.append(cc('tdd-spyramid-4-h-4000 '+d, max_time, 4000, 'hmdb-tdd.hdf5', {'levels':3, 'hidden_size':4000}, 'temporal_pyramid_binary_model', 0.01))
#

    for d in ['1', '2', '3']:
        for model in ['binary_learned_model']:  #, 'temporal_random_model']:
            s = s + ' split=' + d
            for num_f in [3]:
                configs.append(
                    cc('plot-attention-', max_time, 4000, 'hmdb-tdd.hdf5', {
                        'levels': 6,
                        'hidden_size': 4000,
                        'N': num_f
                    }, model, 0.005))

    for config in configs:
        name = config['name'] + sys.argv[1]
        epochs = 150
        subdir = name + "-" + time.strftime("%Y%m%d-%H%M%S")
        if not os.path.isdir(subdir):
            os.mkdir(subdir)

        bs = 64  #int(sys.argv[1])
        max_time = config['max_time']  #int(sys.argv[2])
        feature_dim = config['feature_dim']  #int(sys.argv[3])

        from uniform_dataset import UniformDataset
        data_train = UniformDataset(
            bs=bs,
            filename=config['filename'],
            which_sets=['train'],
            sources=['features', 'time_mask', 'labels'])
        data_test = UniformDataset(bs=bs,
                                   filename=config['filename'],
                                   which_sets=['test'],
                                   sources=['features', 'time_mask', 'labels'])

        train_stream = DataStream.default_stream(
            data_train,
            iteration_scheme=SequentialScheme(data_train.num_examples, bs))
        test_stream = DataStream.default_stream(
            data_test,
            iteration_scheme=SequentialScheme(data_test.num_examples, bs))

        x = T.tensor3('features')
        time_mask = T.wmatrix('time_mask')
        y = T.imatrix('labels')

        mod = importlib.import_module(config['model'])
        models = []
        b_model = None
        classes = eval(sys.argv[1])
        for clas in classes:
            model = mod.TemporalModel([x, time_mask, y], bs, max_time, clas,
                                      feature_dim, **config['model_kwargs'])
            models.append(model)
            if not b_model:
                b_model = model
                b_model._outputs = []
                b_model._updates = []

            prob, loss, (tp, tn, fp, fn) = model.run(x, time_mask, y)
            prob.name = 'prob_' + str(clas)
            loss.name = 'loss_' + str(clas)
            tp.name = 'tp_' + str(clas)
            tn.name = 'tn_' + str(clas)
            fp.name = 'fp_' + str(clas)
            fn.name = 'fn_' + str(clas)

            b_model._outputs += [prob, loss, tp, tn, fp, fn]

            #for filt in model.temporal_pyramid:
            #    print filt.g.name, filt.d.name, filt.sigma.name
            #    b_model._outputs += [filt.g, filt.d, filt.sigma]

            params = model.params

            #        from solvers.sgd import SGD as solver
            from solvers.RMSProp import RMSProp as solver
            updates = solver(loss, params, lr=config['lr'], clipnorm=10.0)
            for i, u in enumerate(updates):
                if u[0].name is None:
                    continue
                if 'g.' in u[0].name or 'shhigma.' in u[0].name or 'd.' in u[
                        0].name:
                    updates[i] = (u[0], T.mean(u[1]).dimshuffle(['x']))

            b_model._updates += updates

        # ============= TRAIN =========
        tc = classes
        #plots = [['_plt_g.af-0','_plt_g.af-1','_plt_g.af-2'],['_plt_d.af-0','_plt_d.af-1','_plt_d.af-2'],['_plt_sigma.af-0','_plt_sigma.af-1','_plt_sigma.af-2']]
        #track_plot = [(x[5:],'last') for sl in plots for x in sl]
        var = [[
            'loss_' + str(i), ('tp_' + str(i), 'sum'), ('tn_' + str(i), 'sum'),
            ('fp_' + str(i), 'sum'), ('fn_' + str(i), 'sum'),
            ('recall_' + str(i), 'after', 'tp_' + str(i), 'fn_' + str(i),
             lambda x, y: x / (x + y)),
            ('prec_' + str(i), 'after', 'tp_' + str(i), 'fp_' + str(i),
             lambda x, y: x / (x + y))
        ] for i in tc]
        var = [item for sublist in var for item in sublist]

        bm = [[min, max, max, min, min, max, max] for i in tc]
        bm = [item for sublist in bm for item in sublist]

        main_loop = MainLoop(
            b_model,
            train_stream,
            [
                FinishAfter(epochs),
                Track(variables=var, prefix='train'),
                #Track(variables=track_plot, prefix='_plt'),
                DataStreamTrack(
                    test_stream, var, prefix='test', best_method=bm),
                TimeProfile(),
                SaveAfter(models),
                #PlotLocal(name, subdir, plots),
                Report(os.path.join(subdir, 'report.txt'), name=name),
                Printing()
            ])
        main_loop.run()
        config['best_prec'] = main_loop.log.current_row['best_test_prec']
        print >> report, config['name'], 'best test prec', config['best_prec']
        report.flush()

    print ''.join(79 * '-')
    print 'FINAL REPORT'
    print ''.join(79 * '-')

    for config in configs:
        print config['name'], 'best test prec', config['best_prec']
Example #15
0
            sequence_length, stride_length, buckets[bb], batch_size))
    if len(valid_data[bb]) >= batch_size:
        valid_gens.append(WordLMGenerator([valid_data[bb], valid_mask[bb]], glove, \
            sequence_length, stride_length, buckets[bb], batch_size))

#for i in range(len(train_gens)):
#    train_gen = train_gens[i]
#    for index in range(train_gen.max_index):
#        # run minibatch
#        for trainset in train_gen.get_minibatch(index):  # data, mask, label, reset
#            print(i, index)

#================Build graph================#

x = T.ftensor3('X')  # (batch_size, sequence_length, 300)
m = T.wmatrix('M')  # (batch_size, sequence_length)
r = T.wvector('r')  # (batch_size,)
x_ext = T.ftensor3('X_ext')
m_ext = T.wmatrix('M_ext')
y_ext = T.imatrix('Y_ext')
r_ext = T.wvector('r_ext')

encoder = SimpleGraph(experiment_name + '_enc', batch_size)
encoder.add_layer(LSTMRecurrentLayer(input_shape=(300, ),
                                     output_shape=(512, ),
                                     forget_bias_one=True,
                                     peephole=True,
                                     output_return_index=[-1],
                                     save_state_index=stride_length - 1,
                                     also_return_cell=True,
                                     precompute=False,
Example #16
0
            sequence_length, stride_length, buckets[bb], batch_size))
    if len(valid_data[bb]) >= batch_size:
        valid_gens.append(WordLMGenerator([valid_data[bb], valid_mask[bb]], glove, \
            sequence_length, stride_length, buckets[bb], batch_size))

#for i in range(len(train_gens)):
#    train_gen = train_gens[i]
#    for index in range(train_gen.max_index):
#        # run minibatch
#        for trainset in train_gen.get_minibatch(index):  # data, mask, label, reset
#            print(i, index)

#================Build graph================#

x = T.ftensor3('X')  # (batch_size, sequence_length, 300)
m = T.wmatrix('M')  # (batch_size, sequence_length)
y = T.imatrix('Y')  # (batch_size, sequence_length)
r = T.wvector('r')  # (batch_size,)

graph = SimpleGraph(experiment_name, batch_size)
graph.add_layer(LSTMRecurrentLayer(input_shape=(300,),
                                   output_shape=(1024,),
                                   forget_bias_one=True,
                                   peephole=True,
                                   output_return_index=None,
                                   save_state_index=stride_length-1,
                                   precompute=False,
                                   unroll=False,
                                   backward=False), is_start=True)
# graph.add_layer(TimeDistributedDenseLayer((1024,), (512,)))  # not much time difference, and less memory
graph.add_layer(DenseLayer((1024,), (512,)))