def trainer_tester(mapping,train_data,test_data): data = theano.shared(train_data) test_data = theano.shared(test_data) init_weights = 0.1*np.random.randn(len(mapping),2,100) W = theano.shared(init_weights) matches = T.wmatrix('matches') weights = T.dtensor3('weights') t_matches = T.wmatrix('t_matches') delta = theano.shared(np.zeros(init_weights.shape)) cost, accuracy = cost_fn(matches,weights) log_loss_fn = log_loss(t_matches,weights) grad = T.grad(cost,wrt=weights) train = theano.function( inputs = [], outputs = cost, givens = { matches: data, weights: W }, updates = [ (W, W - 0.1*( grad + 0.5 * delta )), (delta, 0.1*( grad + 0.5 * delta )) ] ) test = theano.function( inputs = [], outputs = [log_loss_fn], givens = { t_matches: test_data, weights: W } ) return train,test,W
def test_matrixmul(): """ Tests matrix multiplication for a range of different dtypes. Checks both normal and transpose multiplication using randomly generated matrices. """ rng = np.random.RandomState(222) dtypes = [ 'int16', 'int32', 'int64', 'float64', 'float32' ] tensor_x = [ tensor.wmatrix(), tensor.imatrix(), tensor.lmatrix(), tensor.dmatrix(), tensor.fmatrix() ] np_W, np_x, np_x_T = [], [], [] for dtype in dtypes: if 'int' in dtype: np_W.append(rng.randint( -10, 10, rng.random_integers(5, size=2) ).astype(dtype)) np_x.append(rng.randint( -10, 10, (rng.random_integers(5), np_W[-1].shape[0]) ).astype(dtype)) np_x_T.append(rng.randint( -10, 10, (rng.random_integers(5), np_W[-1].shape[1]) ).astype(dtype)) elif 'float' in dtype: np_W.append(rng.uniform( -1, 1, rng.random_integers(5, size=2) ).astype(dtype)) np_x.append(rng.uniform( -10, 10, (rng.random_integers(5), np_W[-1].shape[0]) ).astype(dtype)) np_x.append(rng.uniform( -10, 10, (rng.random_integers(5), np_W[-1].shape[1]) ).astype(dtype)) else: assert False def sharedW(value, dtype): return theano.shared(theano._asarray(value, dtype=dtype)) tensor_W = [sharedW(W, dtype) for W in np_W] matrixmul = [MatrixMul(W) for W in tensor_W] assert all(mm.get_params()[0] == W for mm, W in zip(matrixmul, tensor_W)) fn = [theano.function([x], mm.lmul(x)) for x, mm in zip(tensor_x, matrixmul)] fn_T = [theano.function([x], mm.lmul_T(x)) for x, mm in zip(tensor_x, matrixmul)] for W, x, x_T, f, f_T in zip(np_W, np_x, np_x_T, fn, fn_T): np.testing.assert_allclose(f(x), np.dot(x, W)) np.testing.assert_allclose(f_T(x_T), np.dot(x_T, W.T))
def test_matrixmul(): """ Tests for projection """ rng = np.random.RandomState(222) dtypes = [ 'int16', 'int32', 'int64' ] tensor_x = [ tensor.wmatrix(), tensor.imatrix(), tensor.lmatrix(), tensor.wvector(), tensor.ivector(), tensor.lvector() ] np_W, np_x = [], [] for dtype in dtypes: np_W.append(rng.rand(10, np.random.randint(1, 10))) np_x.append(rng.randint( 0, 10, (rng.random_integers(5), rng.random_integers(5)) ).astype(dtype)) for dtype in dtypes: np_W.append(rng.rand(10, np.random.randint(1, 10))) np_x.append( rng.randint(0, 10, (rng.random_integers(5),)).astype(dtype) ) tensor_W = [sharedX(W) for W in np_W] matrixmul = [MatrixMul(W) for W in tensor_W] assert all(mm.get_params()[0] == W for mm, W in zip(matrixmul, tensor_W)) fn = [theano.function([x], mm.project(x)) for x, mm in zip(tensor_x, matrixmul)] for W, x, f in zip(np_W, np_x, fn): W_x = W[x] if x.ndim == 2: W_x = W_x.reshape((W_x.shape[0], np.prod(W_x.shape[1:]))) else: W_x = W_x.flatten() np.testing.assert_allclose(f(x), W_x)
def objective_train_model(params): # Initialise parameters start = timeit.default_timer() print(params) num_lstm_units = int(params['num_lstm_units']) num_lstm_layers = int(params['num_lstm_layers']) num_dense_layers = int(params['num_dense_layers']) num_dense_units = int(params['num_dense_units']) num_epochs = params['num_epochs'] learn_rate = params['learn_rate'] mb_size = params['mb_size'] l2reg = params['l2reg'] rng_seed = params['rng_seed'] #%% # Load training data path = 'saved_data' brancharray = numpy.load(os.path.join(path, 'train/branch_arrays.npy')) num_features = numpy.shape(brancharray)[-1] train_mask = numpy.load(os.path.join(path, 'train/mask.npy')).astype(numpy.int16) train_label = numpy.load(os.path.join(path, 'train/padlabel.npy')) train_rmdoublemask = numpy.load( os.path.join(path, 'train/rmdoublemask.npy')).astype(numpy.int16) train_rmdoublemask = train_rmdoublemask.flatten() #%% numpy.random.seed(rng_seed) rng_inst = numpy.random.RandomState(rng_seed) lasagne.random.set_rng(rng_inst) input_var = T.ftensor3('inputs') mask = T.wmatrix('mask') target_var = T.ivector('targets') rmdoublesmask = T.wvector('rmdoublemask') # Build network network = build_nn(input_var, mask, num_features, num_lstm_layers=num_lstm_layers, num_lstm_units=num_lstm_units, num_dense_layers=num_dense_layers, num_dense_units=num_dense_units) # This function returns the values of the parameters # of all layers below one or more given Layer instances, # including the layer(s) itself. # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss * rmdoublesmask loss = lasagne.objectives.aggregate(loss, mask.flatten()) # regularisation l2_penalty = l2reg * lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2) loss = loss + l2_penalty # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use Adadelta parameters = lasagne.layers.get_all_params(network, trainable=True) my_updates = lasagne.updates.adam(loss, parameters, learning_rate=learn_rate) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy( prediction, target_var) test_loss = test_loss * rmdoublesmask test_loss = lasagne.objectives.aggregate(test_loss, mask.flatten()) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function( inputs=[input_var, mask, rmdoublesmask, target_var], outputs=loss, updates=my_updates, on_unused_input='warn') # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, mask, rmdoublesmask, target_var], [test_loss, test_prediction], on_unused_input='warn') #%% # We iterate over epochs: for epoch in range(num_epochs): # print("Epoch {} ".format(epoch)) train_err = 0 # In each epoch, we do a full pass over the training data: for batch in iterate_minibatches(brancharray, train_mask, train_rmdoublemask, train_label, mb_size, shuffle=False): inputs, mask, rmdmask, targets = batch train_err += train_fn(inputs, mask, rmdmask, targets) #%% # Load development data dev_brancharray = numpy.load(os.path.join(path, 'dev/branch_arrays.npy')) dev_mask = numpy.load(os.path.join(path, 'dev/mask.npy')).astype(numpy.int16) dev_label = numpy.load(os.path.join(path, 'dev/padlabel.npy')) dev_rmdoublemask = numpy.load(os.path.join( path, 'dev/rmdoublemask.npy')).astype(numpy.int16).flatten() with open(os.path.join(path, 'dev/ids.pkl'), 'rb') as handle: dev_ids_padarray = pickle.load(handle) #%% # get predictions for development set err, val_ypred = val_fn(dev_brancharray, dev_mask, dev_rmdoublemask, dev_label.flatten()) val_ypred = numpy.argmax(val_ypred, axis=1).astype(numpy.int32) acv_label = dev_label.flatten() acv_prediction = numpy.asarray(val_ypred) acv_mask = dev_mask.flatten() clip_dev_label = [o for o, m in zip(acv_label, acv_mask) if m == 1] clip_dev_ids = [o for o, m in zip(dev_ids_padarray, acv_mask) if m == 1] clip_dev_prediction = [ o for o, m in zip(acv_prediction, acv_mask) if m == 1 ] # remove repeating instances uniqtwid, uindices2 = numpy.unique(clip_dev_ids, return_index=True) uniq_dev_label = [clip_dev_label[i] for i in uindices2] uniq_dev_prediction = [clip_dev_prediction[i] for i in uindices2] uniq_dev_id = [clip_dev_ids[i] for i in uindices2] dev_accuracy = accuracy_score(uniq_dev_label, uniq_dev_prediction) mactest_P, mactest_R, mactest_F, _ = precision_recall_fscore_support( uniq_dev_label, uniq_dev_prediction, average='macro') mictest_P, mictest_R, mictest_F, _ = precision_recall_fscore_support( uniq_dev_label, uniq_dev_prediction, average='micro') test_P, test_R, test_F, _ = precision_recall_fscore_support( uniq_dev_label, uniq_dev_prediction) # to change scoring objective you need to change 'loss' output = { 'loss': 1 - dev_accuracy, 'status': STATUS_OK, 'Params': params, 'Macro': { 'Macro_Precision': mactest_P, 'Macro_Recall': mactest_R, 'macro_F_score': mactest_F }, 'Micro': { 'Micro_Precision': mictest_P, 'Micro_Recall': mictest_R, 'micro_F_score': mictest_F }, 'Support': { 'Support_Precision': test_P[0], 'Support_Recall': test_R[0], 'Support_F_score': test_F[0] }, 'Comment': { 'Comment_Precision': test_P[1], 'Comment_Recall': test_R[1], 'Comment_F_score': test_F[1] }, 'Deny': { 'Deny_Precision': test_P[2], 'Deny_Recall': test_R[2], 'Deny_F_score': test_F[2] }, 'Appeal': { 'Appeal_Precision': test_P[3], 'Appeal_Recall': test_R[3], 'Appeal_F_score': test_F[3] }, 'attachments': { 'Labels': pickle.dumps(uniq_dev_label), 'Predictions': pickle.dumps(uniq_dev_prediction), 'ID': pickle.dumps(uniq_dev_id) } } print("1-accuracy loss = ", output['loss']) stop = timeit.default_timer() print("Time: ", stop - start) return output
def run(): configs = [0] for config in configs: bs = 48 feature_dim = 4000 from uniform_dataset import UniformDataset data_test = UniformDataset(bs=bs, filename='/ssd2/hmdb/hmdb-tdd-1.hdf5', which_sets=['test'], sources=['features', 'time_mask', 'labels']) test_stream = DataStream.default_stream( data_test, iteration_scheme=SequentialScheme(data_test.num_examples, bs)) x = T.tensor3('features') time_mask = T.wmatrix('time_mask') y = T.imatrix('labels') classes = eval(sys.argv[1]) outputs = [] for clas in classes: print 'Loading', clas model = cPickle.load(open('models/learned_' + str(clas), 'rb')) prob, loss, (tp, tn, fp, fn) = model.run(x, time_mask, y) prob.name = 'prob_' + str(clas) outputs += [prob] # prob is Nx1 # outputs is 51xNx1 # stack and take max along 51-class index outputs = T.stacklists(outputs) preds = T.argmax(outputs, axis=0) # predicted class is now outputs # which is shape Nx1, reshape to vector of N preds = preds.reshape((preds.shape[0], 1)) num_err = T.neq(preds, y).sum() acc = 1 - (num_err / y.shape[0]) test_func = theano.function([x, time_mask, y], outputs, on_unused_input='warn') data = test_stream.get_epoch_iterator(as_dict=True) total_acc = 0 num = 0 res = None labs = None for batch in data: o = test_func(batch['features'], batch['time_mask'], batch['labels']) if res is None: res = o labs = batch['labels'] else: # append on axis 1, to get 51xDs_size res = np.append(res, o, axis=1) labs = np.append(labs, batch['labels'], axis=0) continue total_acc += acc num += 1 print acc np.save('results' + sys.argv[2], res) np.save('labs' + sys.argv[2], labs)
def __init__( self, rng, batchsize=100, activation=tanh ): import load (num_sent, word_cnt, max_sen_len, k_wrd, x_wrd, y) \ = load.read("tweets_clean.txt") dim_word = 100 cl_word = 300 k_wrd = 5 vocab_size = word_cnt n_hidden = 300 data_train,\ data_test,\ target_train,\ target_test\ = train_test_split(x_wrd, y, random_state=1234, test_size=0.1) x_train = theano.shared(np.asarray(data_train, dtype='int16'), borrow=True) y_train = theano.shared(np.asarray(target_train, dtype='int32'), borrow=True) x_test = theano.shared(np.asarray(data_test, dtype='int16'), borrow=True) y_test = theano.shared(np.asarray(target_test, dtype='int32'), borrow=True) self.n_train_batches = x_train.get_value(borrow=True).shape[0] / batchsize self.n_test_batches = x_test.get_value(borrow=True).shape[0] / batchsize """symbol definition""" index = T.iscalar() x = T.wmatrix('x') y = T.ivector('y') train = T.iscalar('train') layer_embed_input = x#.reshape((batchsize, max_sen_len)) layer_embed = EmbedIDLayer( rng, layer_embed_input, n_input=vocab_size, n_output=dim_word, ) layer1_input = layer_embed.output.reshape((batchsize, 1, max_sen_len, dim_word)) layer1 = ConvolutionalLayer( rng, layer1_input, filter_shape=(cl_word, 1, k_wrd, dim_word),#1は入力チャネル数 image_shape=(batchsize, 1, max_sen_len, dim_word), activation=activation ) layer2 = MaxPoolingLayer( layer1.output, poolsize=(max_sen_len-k_wrd+1, 1) ) layer3_input = layer2.output.reshape((batchsize, cl_word)) layer3 = FullyConnectedLayer( rng, dropout(rng, layer3_input, train), n_input=cl_word, n_output=n_hidden, activation=activation ) layer4 = FullyConnectedLayer( rng, dropout(rng, layer3.output, train), n_input=n_hidden, n_output=2, activation=None ) result = Result(layer4.output, y) # loss = result.negative_log_likelihood() loss = result.cross_entropy() accuracy = result.accuracy() params = layer4.params + layer3.params + layer1.params + layer_embed.params # updates = AdaDelta(params=params).updates(loss) updates = RMSprop(learning_rate=0.001, params=params).updates(loss) self.train_model = theano.function( inputs=[index], outputs=[loss, accuracy], updates=updates, givens={ x: x_train[index*batchsize: (index+1)*batchsize], y: y_train[index*batchsize: (index+1)*batchsize], train: np.cast['int32'](1) } ) self.test_model = theano.function( inputs=[index], outputs=[loss, accuracy], givens={ x: x_test[index*batchsize: (index+1)*batchsize], y: y_test[index*batchsize: (index+1)*batchsize], train: np.cast['int32'](0) } )
def eval_train_model(params): print("Retrain model on train+dev set and evaluate on testing set") # Initialise parameters num_lstm_units = int(params['num_lstm_units']) num_lstm_layers = int(params['num_lstm_layers']) num_dense_layers = int(params['num_dense_layers']) num_dense_units = int(params['num_dense_units']) num_epochs = params['num_epochs'] learn_rate = params['learn_rate'] mb_size = params['mb_size'] l2reg = params['l2reg'] rng_seed = params['rng_seed'] #%% # Load data path = 'saved_data' brancharray = numpy.load(os.path.join(path, 'train/branch_arrays.npy')) num_features = numpy.shape(brancharray)[-1] train_mask = numpy.load(os.path.join(path, 'train/mask.npy')).astype(numpy.int16) train_label = numpy.load(os.path.join(path, 'train/padlabel.npy')) train_rmdoublemask = numpy.load( os.path.join(path, 'train/rmdoublemask.npy')).astype(numpy.int16) train_rmdoublemask = train_rmdoublemask.flatten() #%% numpy.random.seed(rng_seed) rng_inst = numpy.random.RandomState(rng_seed) lasagne.random.set_rng(rng_inst) input_var = T.ftensor3('inputs') mask = T.wmatrix('mask') target_var = T.ivector('targets') rmdoublesmask = T.wvector('rmdoublemask') # Build network network = build_nn(input_var, mask, num_features, num_lstm_layers=num_lstm_layers, num_lstm_units=num_lstm_units, num_dense_layers=num_dense_layers, num_dense_units=num_dense_units) # This function returns the values of the parameters of all # layers below one or more given Layer instances, # including the layer(s) itself. # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss * rmdoublesmask loss = lasagne.objectives.aggregate(loss, mask.flatten()) # regularisation l2_penalty = l2reg * lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2) loss = loss + l2_penalty # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. parameters = lasagne.layers.get_all_params(network, trainable=True) my_updates = lasagne.updates.adam(loss, parameters, learning_rate=learn_rate) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function( inputs=[input_var, mask, rmdoublesmask, target_var], outputs=loss, updates=my_updates, on_unused_input='warn') # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, mask], test_prediction, on_unused_input='warn') #%% # READ THE DATA dev_brancharray = numpy.load(os.path.join(path, 'dev/branch_arrays.npy')) dev_mask = numpy.load(os.path.join(path, 'dev/mask.npy')).astype(numpy.int16) dev_label = numpy.load(os.path.join(path, 'dev/padlabel.npy')) dev_rmdoublemask = numpy.load(os.path.join( path, 'dev/rmdoublemask.npy')).astype(numpy.int16).flatten() with open(os.path.join(path, 'dev/ids.pkl'), 'rb') as handle: dev_ids_padarray = pickle.load(handle) test_brancharray = numpy.load(os.path.join(path, 'test/branch_arrays.npy')) test_mask = numpy.load(os.path.join(path, 'test/mask.npy')).astype(numpy.int16) test_rmdoublemask = numpy.load(os.path.join( path, 'test/rmdoublemask.npy')).astype(numpy.int16).flatten() with open(os.path.join(path, 'test/ids.pkl'), 'rb') as handle: test_ids_padarray = pickle.load(handle) #%% #start training loop # We iterate over epochs: for epoch in range(num_epochs): #print("Epoch {} ".format(epoch)) train_err = 0 # In each epoch, we do a full pass over the training data: for batch in iterate_minibatches(brancharray, train_mask, train_rmdoublemask, train_label, mb_size, max_seq_len=25, shuffle=False): inputs, mask, rmdmask, targets = batch train_err += train_fn(inputs, mask, rmdmask, targets) for batch in iterate_minibatches(dev_brancharray, dev_mask, dev_rmdoublemask, dev_label, mb_size, max_seq_len=20, shuffle=False): inputs, mask, rmdmask, targets = batch train_err += train_fn(inputs, mask, rmdmask, targets) # And a full pass over the test data: test_ypred = val_fn(test_brancharray, test_mask) # get class label instead of probabilities new_test_ypred = numpy.argmax(test_ypred, axis=1).astype(numpy.int32) #Take mask into account acv_prediction = numpy.asarray(new_test_ypred) acv_mask = test_mask.flatten() clip_dev_ids = [o for o, m in zip(test_ids_padarray, acv_mask) if m == 1] clip_dev_prediction = [ o for o, m in zip(acv_prediction, acv_mask) if m == 1 ] # remove repeating instances uniqtwid, uindices2 = numpy.unique(clip_dev_ids, return_index=True) uniq_dev_prediction = [clip_dev_prediction[i] for i in uindices2] uniq_dev_id = [clip_dev_ids[i] for i in uindices2] output = { 'status': STATUS_OK, 'Params': params, 'attachments': { 'Predictions': pickle.dumps(uniq_dev_prediction), 'ID': pickle.dumps(uniq_dev_id) } } return output
def __init__(self, rng, batchsize=100, activation=tanh): import load (num_sent, word_cnt, max_sen_len, k_wrd, x_wrd, y) \ = load.read("tweets_clean.txt") dim_word = 100 cl_word = 300 k_wrd = 5 vocab_size = word_cnt n_hidden = 300 data_train,\ data_test,\ target_train,\ target_test\ = train_test_split(x_wrd, y, random_state=1234, test_size=0.1) x_train = theano.shared(np.asarray(data_train, dtype='int16'), borrow=True) y_train = theano.shared(np.asarray(target_train, dtype='int32'), borrow=True) x_test = theano.shared(np.asarray(data_test, dtype='int16'), borrow=True) y_test = theano.shared(np.asarray(target_test, dtype='int32'), borrow=True) self.n_train_batches = x_train.get_value( borrow=True).shape[0] / batchsize self.n_test_batches = x_test.get_value( borrow=True).shape[0] / batchsize """symbol definition""" index = T.iscalar() x = T.wmatrix('x') y = T.ivector('y') train = T.iscalar('train') layer_embed_input = x #.reshape((batchsize, max_sen_len)) layer_embed = EmbedIDLayer( rng, layer_embed_input, n_input=vocab_size, n_output=dim_word, ) layer1_input = layer_embed.output.reshape( (batchsize, 1, max_sen_len, dim_word)) layer1 = ConvolutionalLayer( rng, layer1_input, filter_shape=(cl_word, 1, k_wrd, dim_word), #1は入力チャネル数 image_shape=(batchsize, 1, max_sen_len, dim_word), activation=activation) layer2 = MaxPoolingLayer(layer1.output, poolsize=(max_sen_len - k_wrd + 1, 1)) layer3_input = layer2.output.reshape((batchsize, cl_word)) layer3 = FullyConnectedLayer(rng, dropout(rng, layer3_input, train), n_input=cl_word, n_output=n_hidden, activation=activation) layer4 = FullyConnectedLayer(rng, dropout(rng, layer3.output, train), n_input=n_hidden, n_output=2, activation=None) result = Result(layer4.output, y) # loss = result.negative_log_likelihood() loss = result.cross_entropy() accuracy = result.accuracy() params = layer4.params + layer3.params + layer1.params + layer_embed.params # updates = AdaDelta(params=params).updates(loss) updates = RMSprop(learning_rate=0.001, params=params).updates(loss) self.train_model = theano.function( inputs=[index], outputs=[loss, accuracy], updates=updates, givens={ x: x_train[index * batchsize:(index + 1) * batchsize], y: y_train[index * batchsize:(index + 1) * batchsize], train: np.cast['int32'](1) }) self.test_model = theano.function( inputs=[index], outputs=[loss, accuracy], givens={ x: x_test[index * batchsize:(index + 1) * batchsize], y: y_test[index * batchsize:(index + 1) * batchsize], train: np.cast['int32'](0) })
def __init__( self, rng, batchsize=100, activation=relu ): import char_load (num_sent, char_cnt, word_cnt, max_word_len, max_sen_len, \ k_chr, k_wrd, x_chr, x_wrd, y) = char_load.read("tweets_clean.txt") dim_word = 30 dim_char = 5 cl_word = 300 cl_char = 50 k_word = k_wrd k_char = k_chr data_train_word, \ data_test_word, \ data_train_char, \ data_test_char, \ target_train, \ target_test \ = train_test_split(x_wrd, x_chr, y, random_state=1234, test_size=0.1) x_train_word = theano.shared(np.asarray(data_train_word, dtype='int16'), borrow=True) x_train_char = theano.shared(np.asarray(data_train_char, dtype='int16'), borrow=True) y_train = theano.shared(np.asarray(target_train, dtype='int8'), borrow=True) x_test_word = theano.shared(np.asarray(data_test_word, dtype='int16'), borrow=True) x_test_char = theano.shared(np.asarray(data_test_char, dtype='int16'), borrow=True) y_test = theano.shared(np.asarray(target_test, dtype='int8'), borrow=True) self.n_train_batches = x_train_word.get_value(borrow=True).shape[0] / batchsize self.n_test_batches = x_test_word.get_value(borrow=True).shape[0] / batchsize """symbol definition""" index = T.iscalar() x_wrd = T.wmatrix('x_wrd') x_chr = T.wtensor3('x_chr') y = T.bvector('y') train = T.iscalar('train') """network definition""" layer_char_embed_input = x_chr # .reshape((batchsize, max_sen_len, max_word_len)) layer_char_embed = EmbedIDLayer( rng, layer_char_embed_input, n_input=char_cnt, n_output=dim_char ) layer1_input = layer_char_embed.output.reshape( (batchsize * max_sen_len, 1, max_word_len, dim_char) ) layer1 = ConvolutionalLayer( rng, layer1_input, filter_shape=(cl_char, 1, k_char, dim_char), # cl_charフィルタ数 image_shape=(batchsize * max_sen_len, 1, max_word_len, dim_char) ) layer2 = MaxPoolingLayer( layer1.output, poolsize=(max_word_len - k_char + 1, 1) ) layer_word_embed_input = x_wrd # .reshape((batchsize, max_sen_len)) layer_word_embed = EmbedIDLayer( rng, layer_word_embed_input, n_input=word_cnt, n_output=dim_word ) layer3_word_input = layer_word_embed.output.reshape((batchsize, 1, max_sen_len, dim_word)) layer3_char_input = layer2.output.reshape((batchsize, 1, max_sen_len, cl_char)) layer3_input = T.concatenate( [layer3_word_input, layer3_char_input], axis=3 ) # .reshape((batchsize, 1, max_sen_len, dim_word+cl_char)) layer3 = ConvolutionalLayer( rng, layer3_input, filter_shape=(cl_word, 1, k_word, dim_word + cl_char), # 1は入力チャネル数 image_shape=(batchsize, 1, max_sen_len, dim_word + cl_char), activation=activation ) layer4 = MaxPoolingLayer( layer3.output, poolsize=(max_sen_len - k_word + 1, 1) ) layer5_input = layer4.output.reshape((batchsize, cl_word)) layer5 = FullyConnectedLayer( rng, dropout(rng, layer5_input, train), n_input=cl_word, n_output=50, activation=activation ) layer6_input = layer5.output layer6 = FullyConnectedLayer( rng, dropout(rng, layer6_input, train, p=0.1), n_input=50, n_output=2, activation=None ) result = Result(layer6.output, y) loss = result.negative_log_likelihood() accuracy = result.accuracy() params = layer6.params \ + layer5.params \ + layer3.params \ + layer_word_embed.params \ + layer1.params \ + layer_char_embed.params updates = RMSprop(learning_rate=0.001, params=params).updates(loss) self.train_model = theano.function( inputs=[index], outputs=[loss, accuracy], updates=updates, givens={ x_wrd: x_train_word[index * batchsize: (index + 1) * batchsize], x_chr: x_train_char[index * batchsize: (index + 1) * batchsize], y: y_train[index * batchsize: (index + 1) * batchsize], train: np.cast['int32'](1) } ) self.test_model = theano.function( inputs=[index], outputs=[loss, accuracy], givens={ x_wrd: x_test_word[index * batchsize: (index + 1) * batchsize], x_chr: x_test_char[index * batchsize: (index + 1) * batchsize], y: y_test[index * batchsize: (index + 1) * batchsize], train: np.cast['int32'](0) } )
def test_matrixmul(): """ Tests matrix multiplication for a range of different dtypes. Checks both normal and transpose multiplication using randomly generated matrices. """ rng = np.random.RandomState(222) dtypes = ['int16', 'int32', 'int64', 'float64', 'float32'] tensor_x = [ tensor.wmatrix(), tensor.imatrix(), tensor.lmatrix(), tensor.dmatrix(), tensor.fmatrix() ] np_W, np_x, np_x_T = [], [], [] for dtype in dtypes: if 'int' in dtype: np_W.append( rng.randint(-10, 10, rng.random_integers(5, size=2)).astype(dtype)) np_x.append( rng.randint( -10, 10, (rng.random_integers(5), np_W[-1].shape[0])).astype(dtype)) np_x_T.append( rng.randint( -10, 10, (rng.random_integers(5), np_W[-1].shape[1])).astype(dtype)) elif 'float' in dtype: np_W.append( rng.uniform(-1, 1, rng.random_integers(5, size=2)).astype(dtype)) np_x.append( rng.uniform( -10, 10, (rng.random_integers(5), np_W[-1].shape[0])).astype(dtype)) np_x.append( rng.uniform( -10, 10, (rng.random_integers(5), np_W[-1].shape[1])).astype(dtype)) else: assert False def sharedW(value, dtype): return theano.shared(theano._asarray(value, dtype=dtype)) tensor_W = [sharedW(W, dtype) for W in np_W] matrixmul = [MatrixMul(W) for W in tensor_W] assert all(mm.get_params()[0] == W for mm, W in zip(matrixmul, tensor_W)) fn = [ theano.function([x], mm.lmul(x)) for x, mm in zip(tensor_x, matrixmul) ] fn_T = [ theano.function([x], mm.lmul_T(x)) for x, mm in zip(tensor_x, matrixmul) ] for W, x, x_T, f, f_T in zip(np_W, np_x, np_x_T, fn, fn_T): np.testing.assert_allclose(f(x), np.dot(x, W)) np.testing.assert_allclose(f_T(x_T), np.dot(x_T, W.T))
def __init__( self, rng, batchsize=100, activation=relu ): import char_load (num_sent, char_cnt, word_cnt, max_word_len, max_sen_len,\ k_chr, k_wrd, x_chr, x_wrd, y) = char_load.read("tweets_clean.txt") dim_word = 30 dim_char = 5 cl_word = 300 cl_char = 50 k_word = k_wrd k_char = k_chr data_train_word,\ data_test_word,\ data_train_char,\ data_test_char,\ target_train,\ target_test\ = train_test_split(x_wrd, x_chr, y, random_state=1234, test_size=0.1) x_train_word = theano.shared(np.asarray(data_train_word, dtype='int16'), borrow=True) x_train_char = theano.shared(np.asarray(data_train_char, dtype='int16'), borrow=True) y_train = theano.shared(np.asarray(target_train, dtype='int8'), borrow=True) x_test_word = theano.shared(np.asarray(data_test_word, dtype='int16'), borrow=True) x_test_char = theano.shared(np.asarray(data_test_char, dtype='int16'), borrow=True) y_test = theano.shared(np.asarray(target_test, dtype='int8'), borrow=True) self.n_train_batches = x_train_word.get_value(borrow=True).shape[0] / batchsize self.n_test_batches = x_test_word.get_value(borrow=True).shape[0] / batchsize """symbol definition""" index = T.iscalar() x_wrd = T.wmatrix('x_wrd') x_chr = T.wtensor3('x_chr') y = T.bvector('y') train = T.iscalar('train') """network definition""" layer_char_embed_input = x_chr#.reshape((batchsize, max_sen_len, max_word_len)) layer_char_embed = EmbedIDLayer( rng, layer_char_embed_input, n_input=char_cnt, n_output=dim_char ) layer1_input = layer_char_embed.output.reshape( (batchsize*max_sen_len, 1, max_word_len, dim_char) ) layer1 = ConvolutionalLayer( rng, layer1_input, filter_shape=(cl_char, 1, k_char, dim_char),# cl_charフィルタ数 image_shape=(batchsize*max_sen_len, 1, max_word_len, dim_char) ) layer2 = MaxPoolingLayer( layer1.output, poolsize=(max_word_len-k_char+1, 1) ) layer_word_embed_input = x_wrd #.reshape((batchsize, max_sen_len)) layer_word_embed = EmbedIDLayer( rng, layer_word_embed_input, n_input=word_cnt, n_output=dim_word ) layer3_word_input = layer_word_embed.output.reshape((batchsize, 1, max_sen_len, dim_word)) layer3_char_input = layer2.output.reshape((batchsize, 1, max_sen_len, cl_char)) layer3_input = T.concatenate( [layer3_word_input, layer3_char_input], axis=3 )#.reshape((batchsize, 1, max_sen_len, dim_word+cl_char)) layer3 = ConvolutionalLayer( rng, layer3_input, filter_shape=(cl_word, 1, k_word, dim_word + cl_char),#1は入力チャネル数 image_shape=(batchsize, 1, max_sen_len, dim_word + cl_char), activation=activation ) layer4 = MaxPoolingLayer( layer3.output, poolsize=(max_sen_len-k_word+1, 1) ) layer5_input = layer4.output.reshape((batchsize, cl_word)) layer5 = FullyConnectedLayer( rng, dropout(rng, layer5_input, train), n_input=cl_word, n_output=50, activation=activation ) layer6_input = layer5.output layer6 = FullyConnectedLayer( rng, dropout(rng, layer6_input, train, p=0.1), n_input=50, n_output=2, activation=None ) result = Result(layer6.output, y) loss = result.negative_log_likelihood() accuracy = result.accuracy() params = layer6.params\ +layer5.params\ +layer3.params\ +layer_word_embed.params\ +layer1.params\ +layer_char_embed.params updates = RMSprop(learning_rate=0.001, params=params).updates(loss) self.train_model = theano.function( inputs=[index], outputs=[loss, accuracy], updates=updates, givens={ x_wrd: x_train_word[index*batchsize: (index+1)*batchsize], x_chr: x_train_char[index*batchsize: (index+1)*batchsize], y: y_train[index*batchsize: (index+1)*batchsize], train: np.cast['int32'](1) } ) self.test_model = theano.function( inputs=[index], outputs=[loss, accuracy], givens={ x_wrd: x_test_word[index*batchsize: (index+1)*batchsize], x_chr: x_test_char[index*batchsize: (index+1)*batchsize], y: y_test[index*batchsize: (index+1)*batchsize], train: np.cast['int32'](0) } )
def run(): report = file('report-hmdb-tdd.txt', 'w') max_time = 200 configs = [] cc = create_config for d in ['1', '2', '3']: configs.append( cc('tdd-max-pool-h-4000 ' + d, max_time, 4000, 'hmdb-tdd-1.hdf5', { 'method': 'max', 'hidden_size': 4000 }, 'hidden_2_layer_model', 0.0001)) configs.append( cc('tdd-mean-pool-h-4000 ' + d, max_time, 4000, 'hmdb-tdd.hdf5', { 'method': 'mean', 'hidden_size': 4000 }, 'hidden_2_layer_model', 0.0001)) configs.append( cc('tdd-sum-pool-h-4000 ' + d, max_time, 4000, 'hmdb-tdd.hdf5', { 'method': 'sum', 'hidden_size': 4000 }, 'hidden_2_layer_model', 0.0005)) configs.append( cc('tdd-spyramid-1-h-1000', max_time, 4000, 'hmdb-tdd.hdf5', { 'levels': 1, 'hidden_size': 1000 }, 'temporal_pyramid_model')) configs.append( cc('tdd-spyramid-4-h-4000 ' + d, max_time, 4000, '/ssd2/hmdb/hmdb-tdd.hdf5', { 'levels': 4, 'hidden_size': 4000 }, 'temporal_pyramid_model', 0.0001)) for d in ['1', '2', '3']: for model in ['temporal_learned_model']: s = s + ' split=' + d for num_f in [3]: configs.append( cc('tdd-pyramid-1-N-' + str(num_f) + '-h-1000' + s, max_time, 4000, 'hmdb-tdd.hdf5', { 'levels': 1, 'hidden_size': 1000, 'N': num_f }, model, 0.05)) for config in configs: name = config['name'] epochs = 250 subdir = name + "-" + time.strftime("%Y%m%d-%H%M%S") if not os.path.isdir(subdir): os.mkdir(subdir) bs = 100 #int(sys.argv[1]) max_time = config['max_time'] #int(sys.argv[2]) feature_dim = config['feature_dim'] #int(sys.argv[3]) from uniform_dataset import UniformDataset data_train = UniformDataset( bs=bs, filename=config['filename'], which_sets=['train'], sources=['features', 'time_mask', 'labels']) data_test = UniformDataset(bs=bs, filename=config['filename'], which_sets=['test'], sources=['features', 'time_mask', 'labels']) train_stream = DataStream.default_stream( data_train, iteration_scheme=SequentialScheme(data_train.num_examples, bs)) test_stream = DataStream.default_stream( data_test, iteration_scheme=SequentialScheme(data_test.num_examples, bs)) x = T.tensor3('features') time_mask = T.wmatrix('time_mask') y = T.imatrix('labels') mod = importlib.import_module(config['model']) classes = 51 model = mod.TemporalModel([x, time_mask, y], bs, max_time, classes, feature_dim, **config['model_kwargs']) prob, pred, loss, error, acc = model.run(x, time_mask, y) prob.name = 'prob' acc.name = 'acc' pred.name = 'pred' loss.name = 'loss' error.name = 'error' model._outputs = [prob, pred, loss, error, acc] params = model.params # from solvers.sgd import SGD as solver from solvers.RMSProp import RMSProp as solver updates = solver(loss, params, lr=config['lr'], clipnorm=10.0) for i, u in enumerate(updates): if u[0].name == 'g' or u[0].name == 'sigma' or u[0].name == 'd': updates[i] = (u[0], T.mean(u[1]).dimshuffle(['x'])) model._updates = updates # ============= TRAIN ========= plots = [['train_loss', 'test_loss'], ['train_acc', 'test_acc']] main_loop = MainLoop( model, train_stream, [ FinishAfter(epochs), Track(variables=['loss', 'error', 'acc'], prefix='train'), DataStreamTrack(test_stream, ['loss', 'error', 'acc'], prefix='test', best_method=[min, min, max]), #SaveModel(subdir, name+'.model'), TimeProfile(), Report(os.path.join(subdir, 'report.txt'), name=name), Printing() ]) main_loop.run() config['best_acc'] = main_loop.log.current_row['best_test_acc'] print >> report, config['name'], 'best test acc', config['best_acc'] report.flush() print ''.join(79 * '-') print 'FINAL REPORT' print ''.join(79 * '-') for config in configs: print config['name'], 'best test acc', config['best_acc']
def __init__(self, rng, batchsize=100, activation=relu): import loader (numsent, charcnt, wordcnt, maxwordlen, maxsenlen,\ kchr, kwrd, xchr, xwrd, y) = loader.read("tweets_clean.txt") dimword = 30 dimchar = 5 clword = 300 clchar = 50 kword = kwrd kchar = kchr datatrainword,\ datatestword,\ datatrainchar,\ datatestchar,\ targettrain,\ targettest\ = train_test_split(xwrd, xchr, y, random_state=1234, test_size=0.1) xtrainword = theano.shared(np.asarray(datatrainword, dtype='int16'), borrow=True) xtrainchar = theano.shared(np.asarray(datatrainchar, dtype='int16'), borrow=True) ytrain = theano.shared(np.asarray(targettrain, dtype='int8'), borrow=True) xtestword = theano.shared(np.asarray(datatestword, dtype='int16'), borrow=True) xtestchar = theano.shared(np.asarray(datatestchar, dtype='int16'), borrow=True) ytest = theano.shared(np.asarray(targettest, dtype='int8'), borrow=True) self.ntrainbatches = xtrainword.get_value( borrow=True).shape[0] / batchsize self.ntestbatches = xtestword.get_value( borrow=True).shape[0] / batchsize index = T.iscalar() xwrd = T.wmatrix('xwrd') xchr = T.wtensor3('xchr') y = T.bvector('y') train = T.iscalar('train') layercharembedinput = xchr layercharembed = EmbedIDLayer(rng, layercharembedinput, ninput=charcnt, noutput=dimchar) layer1input = layercharembed.output.reshape( (batchsize * maxsenlen, 1, maxwordlen, dimchar)) layer1 = ConvolutionalLayer(rng, layer1input, filter_shape=(clchar, 1, kchar, dimchar), image_shape=(batchsize * maxsenlen, 1, maxwordlen, dimchar)) layer2 = MaxPoolingLayer(layer1.output, poolsize=(maxwordlen - kchar + 1, 1)) layerwordembedinput = xwrd layerwordembed = EmbedIDLayer(rng, layerwordembedinput, ninput=wordcnt, noutput=dimword) layer3wordinput = layerwordembed.output.reshape( (batchsize, 1, maxsenlen, dimword)) layer3charinput = layer2.output.reshape( (batchsize, 1, maxsenlen, clchar)) layer3input = T.concatenate([layer3wordinput, layer3charinput], axis=3) layer3 = ConvolutionalLayer(rng, layer3input, filter_shape=(clword, 1, kword, dimword + clchar), image_shape=(batchsize, 1, maxsenlen, dimword + clchar), activation=activation) layer4 = MaxPoolingLayer(layer3.output, poolsize=(maxsenlen - kword + 1, 1)) layer5input = layer4.output.reshape((batchsize, clword)) layer5 = FullyConnectedLayer(rng, dropout(rng, layer5input, train), ninput=clword, noutput=50, activation=activation) layer6input = layer5.output layer6 = FullyConnectedLayer(rng, dropout(rng, layer6input, train, p=0.1), ninput=50, noutput=2, activation=None) result = Result(layer6.output, y) loss = result.negativeloglikelihood() accuracy = result.accuracy() params = layer6.params\ +layer5.params\ +layer3.params\ +layerwordembed.params\ +layer1.params\ +layercharembed.params updates = RMSprop(learningrate=0.001, params=params).updates(loss) self.trainmodel = theano.function( inputs=[index], outputs=[loss, accuracy], updates=updates, givens={ xwrd: xtrainword[index * batchsize:(index + 1) * batchsize], xchr: xtrainchar[index * batchsize:(index + 1) * batchsize], y: ytrain[index * batchsize:(index + 1) * batchsize], train: np.cast['int32'](1) }) self.testmodel = theano.function( inputs=[index], outputs=[loss, accuracy], givens={ xwrd: xtestword[index * batchsize:(index + 1) * batchsize], xchr: xtestchar[index * batchsize:(index + 1) * batchsize], y: ytest[index * batchsize:(index + 1) * batchsize], train: np.cast['int32'](0) })
def run(): report = file('report-hmdb-tdd-binary.txt', 'w') max_time = 200 configs = [] cc = create_config for d in ['1', '2', '3']: configs.append( cc('tdd-max-pool-h-4000 ' + d, max_time, 4000, 'hmdb-tdd-1.hdf5', { 'method': 'max', 'hidden_size': 4000 }, 'baseline_binary_model', 0.01)) configs.append( cc('tdd-mean-pool-h-4000 ' + d, max_time, 4000, 'hmdb-tdd.hdf5', { 'method': 'mean', 'hidden_size': 4000 }, 'baseline_binary_model', 0.0001)) configs.append( cc('tdd-sum-pool-h-4000 ' + d, max_time, 4000, 'hmdb-tdd.hdf5', { 'method': 'sum', 'hidden_size': 4000 }, 'baseline_binary_model', 0.0005)) #configs.append(cc('tdd-spyramid-1-h-1000', max_time, 4000, 'hmdb-tdd.hdf5', {'levels':1, 'hidden_size':1000}, 'temporal_pyramid_model')) # configs.append(cc('tdd-spyramid-4-h-4000 '+d, max_time, 4000, 'hmdb-tdd.hdf5', {'levels':3, 'hidden_size':4000}, 'temporal_pyramid_binary_model', 0.01)) # for d in ['1', '2', '3']: for model in ['binary_learned_model']: #, 'temporal_random_model']: s = s + ' split=' + d for num_f in [3]: configs.append( cc('plot-attention-', max_time, 4000, 'hmdb-tdd.hdf5', { 'levels': 6, 'hidden_size': 4000, 'N': num_f }, model, 0.005)) for config in configs: name = config['name'] + sys.argv[1] epochs = 150 subdir = name + "-" + time.strftime("%Y%m%d-%H%M%S") if not os.path.isdir(subdir): os.mkdir(subdir) bs = 64 #int(sys.argv[1]) max_time = config['max_time'] #int(sys.argv[2]) feature_dim = config['feature_dim'] #int(sys.argv[3]) from uniform_dataset import UniformDataset data_train = UniformDataset( bs=bs, filename=config['filename'], which_sets=['train'], sources=['features', 'time_mask', 'labels']) data_test = UniformDataset(bs=bs, filename=config['filename'], which_sets=['test'], sources=['features', 'time_mask', 'labels']) train_stream = DataStream.default_stream( data_train, iteration_scheme=SequentialScheme(data_train.num_examples, bs)) test_stream = DataStream.default_stream( data_test, iteration_scheme=SequentialScheme(data_test.num_examples, bs)) x = T.tensor3('features') time_mask = T.wmatrix('time_mask') y = T.imatrix('labels') mod = importlib.import_module(config['model']) models = [] b_model = None classes = eval(sys.argv[1]) for clas in classes: model = mod.TemporalModel([x, time_mask, y], bs, max_time, clas, feature_dim, **config['model_kwargs']) models.append(model) if not b_model: b_model = model b_model._outputs = [] b_model._updates = [] prob, loss, (tp, tn, fp, fn) = model.run(x, time_mask, y) prob.name = 'prob_' + str(clas) loss.name = 'loss_' + str(clas) tp.name = 'tp_' + str(clas) tn.name = 'tn_' + str(clas) fp.name = 'fp_' + str(clas) fn.name = 'fn_' + str(clas) b_model._outputs += [prob, loss, tp, tn, fp, fn] #for filt in model.temporal_pyramid: # print filt.g.name, filt.d.name, filt.sigma.name # b_model._outputs += [filt.g, filt.d, filt.sigma] params = model.params # from solvers.sgd import SGD as solver from solvers.RMSProp import RMSProp as solver updates = solver(loss, params, lr=config['lr'], clipnorm=10.0) for i, u in enumerate(updates): if u[0].name is None: continue if 'g.' in u[0].name or 'shhigma.' in u[0].name or 'd.' in u[ 0].name: updates[i] = (u[0], T.mean(u[1]).dimshuffle(['x'])) b_model._updates += updates # ============= TRAIN ========= tc = classes #plots = [['_plt_g.af-0','_plt_g.af-1','_plt_g.af-2'],['_plt_d.af-0','_plt_d.af-1','_plt_d.af-2'],['_plt_sigma.af-0','_plt_sigma.af-1','_plt_sigma.af-2']] #track_plot = [(x[5:],'last') for sl in plots for x in sl] var = [[ 'loss_' + str(i), ('tp_' + str(i), 'sum'), ('tn_' + str(i), 'sum'), ('fp_' + str(i), 'sum'), ('fn_' + str(i), 'sum'), ('recall_' + str(i), 'after', 'tp_' + str(i), 'fn_' + str(i), lambda x, y: x / (x + y)), ('prec_' + str(i), 'after', 'tp_' + str(i), 'fp_' + str(i), lambda x, y: x / (x + y)) ] for i in tc] var = [item for sublist in var for item in sublist] bm = [[min, max, max, min, min, max, max] for i in tc] bm = [item for sublist in bm for item in sublist] main_loop = MainLoop( b_model, train_stream, [ FinishAfter(epochs), Track(variables=var, prefix='train'), #Track(variables=track_plot, prefix='_plt'), DataStreamTrack( test_stream, var, prefix='test', best_method=bm), TimeProfile(), SaveAfter(models), #PlotLocal(name, subdir, plots), Report(os.path.join(subdir, 'report.txt'), name=name), Printing() ]) main_loop.run() config['best_prec'] = main_loop.log.current_row['best_test_prec'] print >> report, config['name'], 'best test prec', config['best_prec'] report.flush() print ''.join(79 * '-') print 'FINAL REPORT' print ''.join(79 * '-') for config in configs: print config['name'], 'best test prec', config['best_prec']
sequence_length, stride_length, buckets[bb], batch_size)) if len(valid_data[bb]) >= batch_size: valid_gens.append(WordLMGenerator([valid_data[bb], valid_mask[bb]], glove, \ sequence_length, stride_length, buckets[bb], batch_size)) #for i in range(len(train_gens)): # train_gen = train_gens[i] # for index in range(train_gen.max_index): # # run minibatch # for trainset in train_gen.get_minibatch(index): # data, mask, label, reset # print(i, index) #================Build graph================# x = T.ftensor3('X') # (batch_size, sequence_length, 300) m = T.wmatrix('M') # (batch_size, sequence_length) r = T.wvector('r') # (batch_size,) x_ext = T.ftensor3('X_ext') m_ext = T.wmatrix('M_ext') y_ext = T.imatrix('Y_ext') r_ext = T.wvector('r_ext') encoder = SimpleGraph(experiment_name + '_enc', batch_size) encoder.add_layer(LSTMRecurrentLayer(input_shape=(300, ), output_shape=(512, ), forget_bias_one=True, peephole=True, output_return_index=[-1], save_state_index=stride_length - 1, also_return_cell=True, precompute=False,
sequence_length, stride_length, buckets[bb], batch_size)) if len(valid_data[bb]) >= batch_size: valid_gens.append(WordLMGenerator([valid_data[bb], valid_mask[bb]], glove, \ sequence_length, stride_length, buckets[bb], batch_size)) #for i in range(len(train_gens)): # train_gen = train_gens[i] # for index in range(train_gen.max_index): # # run minibatch # for trainset in train_gen.get_minibatch(index): # data, mask, label, reset # print(i, index) #================Build graph================# x = T.ftensor3('X') # (batch_size, sequence_length, 300) m = T.wmatrix('M') # (batch_size, sequence_length) y = T.imatrix('Y') # (batch_size, sequence_length) r = T.wvector('r') # (batch_size,) graph = SimpleGraph(experiment_name, batch_size) graph.add_layer(LSTMRecurrentLayer(input_shape=(300,), output_shape=(1024,), forget_bias_one=True, peephole=True, output_return_index=None, save_state_index=stride_length-1, precompute=False, unroll=False, backward=False), is_start=True) # graph.add_layer(TimeDistributedDenseLayer((1024,), (512,))) # not much time difference, and less memory graph.add_layer(DenseLayer((1024,), (512,)))