def get_split(self, split_num=0): split_perm = self.idxs[:, split_num] train_idx = split_perm[:-self.num_valid] test_idx = split_perm[-1 * self.num_valid:] X_train = self.X_data[train_idx] y_train = self.y_data[train_idx] X_test = self.X_data[test_idx] y_test = self.y_data[test_idx] def convertbin(y_temp): y_temp += 1 y_temp /= 2 return y_temp y_train = convertbin(y_train).reshape(-1, 1) y_test = convertbin(y_test).reshape(-1, 1) train_dataset = IndexableDataset(indexables=OrderedDict( [('features', X_train.astype(np.float32)), ('targets', y_train.astype(np.float32))])) test_dataset = IndexableDataset(indexables=OrderedDict( [('features', X_test.astype(np.float32)), ('targets', y_test.astype(np.float32))])) p = np.sum(y_train) * 1.0 / (X_train.shape[0]) return train_dataset, test_dataset, np.float32(p)
def test_transform_source_batch(self): stream_batch = StructuredOneHotEncoding(DataStream( IndexableDataset(self.data), iteration_scheme=SequentialScheme(4, 2)), num_classes=self.num_classes, which_sources=('targets', )) assert_equal(list(stream_batch.get_epoch_iterator()), [ (numpy.ones((2, 2, 2)), numpy.array([ [1, 0, 0, 0, 1, 0, 0, 1], [0, 1, 0, 1, 0, 0, 1, 0], ])), (numpy.ones((2, 2, 2)), numpy.array([[0, 1, 0, 0, 1, 0, 1, 0], [0, 0, 1, 1, 0, 1, 0, 0] ])), ]) stream_batch_invalid = StructuredOneHotEncoding( DataStream(IndexableDataset(self.data), iteration_scheme=SequentialScheme(4, 2)), num_classes=[2, 3, 3], which_sources=('targets', )) assert_raises(ValueError, list, stream_batch_invalid.get_epoch_iterator()) stream_batch_negative = StructuredOneHotEncoding( DataStream(IndexableDataset(self.neg_data), iteration_scheme=SequentialScheme(4, 2)), num_classes=self.num_classes, which_sources=('targets', )) assert_raises(ValueError, list, stream_batch_negative.get_epoch_iterator())
def test_iterate_scheme(): from fuel.datasets import IndexableDataset from fuel.schemes import (SequentialScheme, ShuffledScheme,SequentialExampleScheme, ShuffledExampleScheme) seed = 1234 rng = numpy.random.RandomState(seed) features = rng.randint(256, size=(8, 2, 2)) targets = rng.randint(4, size=(8, 1)) dataset = IndexableDataset(indexables=OrderedDict([('features', features), ('targets', targets)]), axis_labels=OrderedDict([('features', ('batch', 'height', 'width')), ('targets', ('batch', 'index'))])) schemes = [SequentialScheme(examples=8, batch_size=5), ShuffledScheme(examples=8, batch_size=3), SequentialExampleScheme(examples=8), ShuffledExampleScheme(examples=8)] # for scheme in schemes: # print(list(scheme.get_request_iterator())) state = dataset.open() scheme = ShuffledScheme(examples=dataset.num_examples, batch_size=3) for request in scheme.get_request_iterator(): data = dataset.get_data(state=state, request=request) print(data[0].shape, data[1].shape) dataset.close(state)
def indexData(self): labCounts = graph_helper.getLabelCounts( self.G, self.trainNodes + self.validationNodes) trainXY, trainIDs = encode_data_VarLen( self.G, self.trainNodes, self.attrKey, self.maxNeighbors, usePrevWeights=self.usePrevWeights, useActualLabs=self.useActualLabs, onlyLabs=self.onlyLabs, useInputX2=self.useInputX2, labCounts=labCounts, dataAug=self.dataAug, pageRankOrder=self.pageRankOrder, usePro=self.usePro, lastH=self.lastHH, nodeIDs=True) validationXY, testIDs = encode_data_VarLen( self.G, self.validationNodes, self.attrKey, self.maxNeighbors, labCounts=labCounts, usePrevWeights=self.usePrevWeights, useActualLabs=self.useActualLabs, onlyLabs=self.onlyLabs, useInputX2=self.useInputX2, pageRankOrder=self.pageRankOrder, usePro=self.usePro, lastH=self.lastHH, nodeIDs=True) self.input_dimx1 = trainXY['x'][0].shape[1] if 'x2' in trainXY: self.input_dimx2 = trainXY['x2'].shape[1] dataset_train = IndexableDataset(trainXY) dataset_valid = IndexableDataset(validationXY) self.num_examples_train = dataset_train.num_examples self.num_examples_valid = dataset_valid.num_examples if self.usePro: transpose_stream = self.transpose_streamPro else: transpose_stream = self.transpose_stream self.stream_train = DataStream(dataset=dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples, batch_size=self.batch_size)) self.stream_train = Padding(self.stream_train, mask_sources=['x']) self.stream_train = Mapping(self.stream_train, transpose_stream) self.stream_valid = DataStream(dataset=dataset_valid, iteration_scheme=ShuffledScheme( examples=dataset_valid.num_examples, batch_size=self.batch_size)) self.stream_valid = Padding(self.stream_valid, mask_sources=['x']) self.stream_valid = Mapping(self.stream_valid, transpose_stream)
def build_2d_datasets(dataset_name, n_train=20): if dataset_name not in ['mnist', 'sklearn', 'xor']: raise ValueError('This dataset is not supported') if dataset_name == 'xor': data_x = numpy.random.normal(size=(5000, 2)).astype(dtype=fuel.config.floatX) which_cluster = (numpy.random.uniform(size=(data_x.shape[0], 2)) > .5) data_x += 2. * (2 * which_cluster - 1) data_y = (2 * which_cluster - 1).prod(axis=1) * .5 + .5 data_y = data_y.astype(dtype='int32').reshape((-1, 1)) if dataset_name == 'sklearn': data_x, data_y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_classes=2) data_y = data_y.astype(dtype='int32').reshape((-1, 1)) if dataset_name == 'mnist': dataset = MNIST('train') data_mean, data_cov = build_mean_covariance(dataset, 256) eigval, eigvec = numpy.linalg.eigh(data_cov) features = (dataset.indexables[0] - data_mean).dot(eigvec[:, -2:]) features_pos = features[dataset.indexables[1][:, 0] == 3] features_neg = features[dataset.indexables[1][:, 0] == 5] data_x = numpy.zeros( (features_pos.shape[0] + features_neg.shape[0], 2)) data_x[:n_train] = features_pos[:n_train] data_x[n_train:(2 * n_train)] = features_neg[:n_train] data_x[(2 * n_train):-(features_neg.shape[0] - n_train)] = \ features_pos[n_train:] data_x[-(features_neg.shape[0] - n_train):] = features_neg[n_train:] data_y = numpy.zeros( (features_pos.shape[0] + features_neg.shape[0], 1)) data_y[:n_train] = 1 data_y[n_train:(2 * n_train)] = 0 data_y[(2 * n_train):-(features_neg.shape[0] - n_train)] = 1 data_y[-(features_neg.shape[0] - n_train):] = 0 train_dataset = IndexableDataset({ 'features': data_x[:(2 * n_train)], 'targets': data_y[:(2 * n_train)] }) test_dataset = IndexableDataset({ 'features': data_x[(2 * n_train):], 'targets': data_y[(2 * n_train):] }) return train_dataset, test_dataset
def test_dropsources(): stream = IndexableDataset(indexables=OrderedDict([ ("valid", np.ones((5, 3, 3))), ("drop", np.zeros((5, 3, 3))), ])).get_example_stream() stream = DropSources(stream, ["drop"]) assert len(stream.sources) == 1 assert 'valid' in stream.sources data = stream.get_epoch_iterator().next() assert len(data) == 1 assert_allclose(data[0], np.ones((3, 3)))
def test_one_hot_batches_invalid_input(self): wrapper = OneHotEncoding(DataStream(IndexableDataset(self.data), iteration_scheme=SequentialScheme( 4, 2)), num_classes=2, which_sources=('targets', )) assert_raises(ValueError, list, wrapper.get_epoch_iterator())
def test_filter_batches(self): data = [1, 2, 3, 4] data_filtered = [([3, 4],)] stream = DataStream(IndexableDataset(data), iteration_scheme=SequentialScheme(4, 2)) wrapper = Filter(stream, lambda d: d[0][0] % 3 == 0) assert_equal(list(wrapper.get_epoch_iterator()), data_filtered)
def setup_datastream(path, batch_size, sort_batch_count, valid=False): A = numpy.load( os.path.join(path, ('valid_x_raw.npy' if valid else 'train_x_raw.npy'))) B = numpy.load( os.path.join(path, ('valid_phn.npy' if valid else 'train_phn.npy'))) C = numpy.load( os.path.join( path, ('valid_seq_to_phn.npy' if valid else 'train_seq_to_phn.npy'))) D = [B[x[0]:x[1], 2] for x in C] ds = IndexableDataset({'input': A, 'output': D}) stream = DataStream(ds, iteration_scheme=ShuffledExampleScheme(len(A))) stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size * sort_batch_count)) comparison = _balanced_batch_helper(stream.sources.index('input')) stream = Mapping(stream, SortMapping(comparison)) stream = Unpack(stream) stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size, num_examples=len(A))) stream = Padding(stream, mask_sources=['input', 'output']) return ds, stream
def test_predict(): tempfile_path = os.path.join(gettempdir(), 'test_predict.npz') # set up mock datastream source = [[1], [2], [3], [4]] dataset = IndexableDataset(OrderedDict([('input', source)])) scheme = SequentialScheme(dataset.num_examples, batch_size=2) data_stream = DataStream(dataset, iteration_scheme=scheme) # simulate small "network" that increments the input by 1 input_tensor = tensor.matrix('input') output_tensor = input_tensor + 1 output_tensor.name = 'output_tensor' main_loop = MockMainLoop(extensions=[ PredictDataStream(data_stream=data_stream, variables=[output_tensor], path=tempfile_path, after_training=True), FinishAfter(after_n_epochs=1) ]) main_loop.run() # assert resulting prediction is saved prediction = numpy.load(tempfile_path) assert numpy.all(prediction[output_tensor.name] == numpy.array(source) + 1) try: os.remove(tempfile_path) except: pass
def test_flatten_batches(self): wrapper = Flatten(DataStream(IndexableDataset(self.data), iteration_scheme=SequentialScheme(4, 2)), which_sources=('features', )) assert_equal(list(wrapper.get_epoch_iterator()), [(numpy.ones((2, 4)), numpy.array([[0], [1]])), (numpy.ones((2, 4)), numpy.array([[0], [1]]))])
def test_flatten_examples(self): wrapper = Flatten(DataStream( IndexableDataset(self.data), iteration_scheme=SequentialExampleScheme(4)), which_sources=('features', )) assert_equal(list(wrapper.get_epoch_iterator()), [(numpy.ones(4), 0), (numpy.ones(4), 1)] * 2)
def get_prob(self, model, example_set, scheme, interim_dim=30, batch_size=256): (mlp, fine_tuner) = model dataset_state = example_set.open() x = T.matrix('x') out = mlp.apply(x) pred_fn = theano.function([x], out) y = np.zeros((example_set.num_examples)) print "Number of examples is ", example_set.num_examples y_hat = np.zeros((example_set.num_examples, interim_dim)) for idx, request in enumerate(scheme.get_request_iterator()): data = example_set.get_data(state=dataset_state, request=request) out_val = pred_fn(data[0]) end_idx = (idx + 1) * batch_size if end_idx < example_set.num_examples: y[idx * batch_size:end_idx] = data[1].flatten() y_hat[idx * batch_size:end_idx] = out_val dataset = IndexableDataset( indexables=OrderedDict([('features', y_hat.astype(np.float32)), ('targets', y.reshape(-1, 1).astype(np.float32))])) return dataset
def test_mean_aggregator(): num_examples = 4 batch_size = 2 features = numpy.array([[0, 3], [2, 9], [2, 4], [5, 1]], dtype=theano.config.floatX) dataset = IndexableDataset(OrderedDict([('features', features)])) data_stream = DataStream(dataset, iteration_scheme=SequentialScheme( num_examples, batch_size)) x = tensor.matrix('features') y = (x**2).mean(axis=0) y.name = 'y' z = y.sum() z.name = 'z' y.tag.aggregation_scheme = Mean(y, 1.) z.tag.aggregation_scheme = Mean(z, 1.) assert_allclose( DatasetEvaluator([y]).evaluate(data_stream)['y'], numpy.array([8.25, 26.75], dtype=theano.config.floatX)) assert_allclose( DatasetEvaluator([z]).evaluate(data_stream)['z'], numpy.array([35], dtype=theano.config.floatX))
def _test_mean_like_aggregator(scheme, func): """Common test function for both Mean and Perplexity.""" features = numpy.array([[0, 3], [2, 9], [2, 4], [5, 1], [6, 7]], dtype=theano.config.floatX) num_examples = features.shape[0] batch_size = 2 dataset = IndexableDataset(OrderedDict([('features', features)])) data_stream = DataStream(dataset, iteration_scheme=SequentialScheme( num_examples, batch_size)) x = tensor.matrix('features') y = (x**0.5).sum(axis=0) y.name = 'y' z = y.sum() z.name = 'z' y.tag.aggregation_scheme = scheme(y, x.shape[0]) z.tag.aggregation_scheme = scheme(z, x.shape[0]) y_desired = func((features**0.5).mean(axis=0)) z_desired = func((features**0.5).sum(axis=1).mean(axis=0)) assert_allclose( DatasetEvaluator([y]).evaluate(data_stream)['y'], numpy.array(y_desired, dtype=theano.config.floatX)) assert_allclose( DatasetEvaluator([z]).evaluate(data_stream)['z'], numpy.array(z_desired, dtype=theano.config.floatX))
def get_stream_raw(dataset, which_set, mini_batch_size): data = get_data(dataset) # dataset is a 3D array of shape: Time X Batch X Features dataset = data[which_set] time, batch, features = dataset.shape nb_mini_batches = batch / mini_batch_size dataset = dataset[:, :nb_mini_batches * mini_batch_size, :] # Create the target_dataset targets_dataset = dataset[1:, :, :] # Cut the dataset into several minibatches # dataset is now 4D (nb_mini_batches X Time X mini_batch_size X Features) dataset = numpy.swapaxes(dataset, 0, 1) targets_dataset = numpy.swapaxes(targets_dataset, 0, 1) dataset = numpy.reshape(dataset, (nb_mini_batches, mini_batch_size, time, features)) targets_dataset = numpy.reshape( targets_dataset, (nb_mini_batches, mini_batch_size, time - 1, features)) dataset = numpy.swapaxes(dataset, 1, 2) targets_dataset = numpy.swapaxes(targets_dataset, 1, 2) # Create fuel dataset dataset = IndexableDataset({ 'features': dataset, 'targets': targets_dataset }) stream = DataStream( dataset, iteration_scheme=SequentialExampleScheme(nb_mini_batches)) return stream
def get_dev_stream(valid_file, **kwargs): valid_data = cPickle.load(open(valid_file)) images = [example[0] for example in valid_data] targets = [example[1] for example in valid_data] dataset = IndexableDataset( OrderedDict([('input', images), ('output', targets)])) return DataStream(dataset, iteration_scheme=SequentialExampleScheme(len(images)))
def test_ngram_stream_raises_error_on_batch_stream(): sentences = [ list(numpy.random.randint(10, size=sentence_length)) for sentence_length in [3, 5, 7] ] stream = DataStream(IndexableDataset(sentences), iteration_scheme=SequentialScheme(3, 1)) assert_raises(ValueError, NGrams, 4, stream)
def setUp(self): self.stream = DataStream( IndexableDataset( OrderedDict([('features', numpy.ones((4, 2, 2))), ('targets', numpy.array([0, 1, 0, 1]))]), axis_labels={'features': ('batch', 'width', 'height'), 'targets': ('batch',)}), iteration_scheme=SequentialScheme(4, 2))
def test_single_mapping_value_error_on_request(): class IdentitySingleMapping(SingleMapping): def mapping(self, source): return source data_stream = DataStream(IndexableDataset([0, 1, 2])) transformer = IdentitySingleMapping(data_stream) assert_raises(ValueError, transformer.get_data, [0, 1])
def setUp(self): self.string_data = [b'Hello', b'World!'] self.dataset = IndexableDataset( indexables={ 'words': [numpy.fromstring(s, dtype='uint8') for s in self.string_data] }, axis_labels={'words': ('batch', 'bytes')})
def test_axis_labels_are_passed_through(self): stream = DataStream( IndexableDataset( {'features': [1, 2, 3, 4]}, axis_labels={'features': ('batch',)}), iteration_scheme=SequentialScheme(4, 2)) wrapper = Filter(stream, lambda d: d[0][0] % 3 == 0) assert_equal(wrapper.axis_labels, stream.axis_labels)
def load_data(self, data_path): logging.info("Loading: " + data_path) data = pd.read_csv(data_path, sep="\t", header=None) data.columns = ['rel', 'head', 'tail', 'score'] assert (not data.empty) self.N = len(data) return IndexableDataset(data.to_dict('list'))
def test_flatten(): stream = DataStream(IndexableDataset( OrderedDict([('features', numpy.ones((4, 2, 2))), ('targets', numpy.array([0, 1, 0, 1]))])), iteration_scheme=SequentialScheme(4, 2)) wrapper = Flatten(stream, which_sources=('features', )) assert_equal(list(wrapper.get_epoch_iterator()), [(numpy.ones((2, 4)), numpy.array([0, 1])), (numpy.ones((2, 4)), numpy.array([0, 1]))])
def test_axis_labels_on_flatten_examples(self): wrapper = Flatten( DataStream(IndexableDataset(self.data), iteration_scheme=SequentialExampleScheme(4), axis_labels={'features': ('batch', 'width', 'height'), 'targets': ('batch', 'index')}), which_sources=('features',)) assert_equal(wrapper.axis_labels, {'features': ('feature',), 'targets': ('index',)})
def test_axis_labels_on_flatten_batches_with_none(self): wrapper = Flatten( DataStream(IndexableDataset(self.data), iteration_scheme=SequentialScheme(4, 2), axis_labels={'features': None, 'targets': ('batch', 'index')}), which_sources=('features',)) assert_equal(wrapper.axis_labels, {'features': None, 'targets': ('batch', 'index')})
def _construct_dataset(self, dataset): '''Construct an fuel indexable dataset. Every field corresponds to the name of self.provide_sources :param dataset: A tuple of data :return: ''' return IndexableDataset( indexables=OrderedDict(zip(self.provide_souces, dataset)))
def test_min_max_aggregators(): num_examples = 4 batch_size = 2 features = numpy.array([[2, 3], [2, 9], [2, 4], [5, 1]], dtype=theano.config.floatX) dataset = IndexableDataset(OrderedDict([('features', features)])) data_stream = DataStream(dataset, iteration_scheme=SequentialScheme( num_examples, batch_size)) x = tensor.matrix('features') y = (x**2).sum(axis=0) y.name = 'y' z = y.min() z.name = 'z' y.tag.aggregation_scheme = Maximum(y) z.tag.aggregation_scheme = Minimum(z) assert_allclose( DatasetEvaluator([y]).evaluate(data_stream)['y'], numpy.array([29, 90], dtype=theano.config.floatX)) assert_allclose( DatasetEvaluator([z]).evaluate(data_stream)['z'], numpy.array([8], dtype=theano.config.floatX)) # Make sure accumulators are reset. features = numpy.array([[2, 1], [1, 3], [1, -1], [2.5, 1]], dtype=theano.config.floatX) dataset = IndexableDataset(OrderedDict([('features', features)])) data_stream = DataStream(dataset, iteration_scheme=SequentialScheme( num_examples, batch_size)) assert_allclose( DatasetEvaluator([y]).evaluate(data_stream)['y'], numpy.array([7.25, 10], dtype=theano.config.floatX)) assert_allclose( DatasetEvaluator([z]).evaluate(data_stream)['z'], numpy.array([2], dtype=theano.config.floatX))
def replaceTestData(self, testNodes, maxNeighbors=1000, maskNames=['x']): if self.batchesInferences: batch_size = self.batch_size else: batch_size = 1 testing, testIDs = encode_data_VarLen(self.G, testNodes, self.attrKey, maxNeighbors, useActualLabs=self.useActualLabs, useInputX2=self.useInputX2, onlyLabs=self.onlyLabs, lastH=self.lastHH, nodeIDs=True) dataset_test = IndexableDataset(testing) self.stream_test = DataStream(dataset=dataset_test, iteration_scheme=SequentialScheme( examples=dataset_test.num_examples, batch_size=batch_size)) #add masks, have to do individually to avoid all dimensions must be equal error #write own padding transformer, their's sucks ... self.stream_test = Padding(self.stream_test, mask_sources=maskNames) #transpose them for rnn input self.stream_test = Mapping(self.stream_test, self.transpose_streamTest) self.num_examples_test = dataset_test.num_examples #replace shareddata with test_all data self.test_all, names = self.iterateShared(self.stream_test, makeShared=False, name="test") #if we are doing test in batches if self.batchesInferences: for key in self.test_all: totalTestBatches = len(self.test_all[key]) if key != 'nodeID': for i in range(0, totalTestBatches): #if test data has more batches, we add more to shared data list #else we just reset if i >= self.totalBatches: newKey = key + '_myinput' self.sharedData[key].append( shared(self.test_all[key][i], name=self.sharedName + '_' + newKey + '_test_' + str(i))) else: self.sharedData[key][i].set_value( self.test_all[key][i], borrow=True) self.sharedBatch[key].set_value( self.sharedData[key][0].get_value(borrow=True), borrow=True) self.stream_test_int = IntStream(0, totalTestBatches, 1, 'int_stream')
def test_indexabel_dataset(): from fuel.datasets import IndexableDataset seed = 1234 rng = numpy.random.RandomState(seed) features = rng.randint(256, size=(8, 2, 2)) targets = rng.randint(4, size=(8, 1)) dataset = IndexableDataset(indexables=OrderedDict([('features', features), ('targets', targets)]), axis_labels=OrderedDict([('features', ('batch', 'height', 'width')), ('targets', ('batch', 'index'))])) state = dataset.open() print('State is {}.'.format(state)) print(dataset.get_data(state=state, request=[1, 0])) dataset.close(state=state)
def test_one_hot_examples(self): wrapper = OneHotEncoding(DataStream( IndexableDataset(self.data), iteration_scheme=SequentialExampleScheme(4)), num_classes=4, which_sources=('targets', )) assert_equal(list(wrapper.get_epoch_iterator()), [(numpy.ones((2, 2)), numpy.array([[1, 0, 0, 0]])), (numpy.ones((2, 2)), numpy.array([[0, 1, 0, 0]])), (numpy.ones((2, 2)), numpy.array([[0, 0, 1, 0]])), (numpy.ones((2, 2)), numpy.array([[0, 0, 0, 1]]))])
def get_stream(trainXY, batch_size=100): #trainXY=genSynXY() dataset_train = IndexableDataset(trainXY) stream_train_1 = DataStream(dataset=dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples, batch_size=batch_size)) stream_train_2 = Padding(stream_train_1) #stream_train_1.sources=('x_mask_o', 'y_mask_o', 'x', 'y') stream_train_3 = Mapping(stream_train_2, transpose_stream) return (stream_train_3, dataset_train.num_examples)
def test_axis_labels_on_produces_batches(self): dataset = IndexableDataset(numpy.eye(2)) axis_labels = {'data': ('batch', 'features')} dataset.axis_labels = axis_labels stream = DataStream(dataset, iteration_scheme=SequentialScheme(2, 2)) assert_equal(stream.axis_labels, axis_labels)
#add grad clipping to avoid exploding gradients all_grads = [T.clip(g,-5,5) for g in T.grad(mean_cost, all_parameters)] all_grads = lasagne.updates.total_norm_constraint(all_grads,5) updates = lasagne.updates.adam(all_grads, all_parameters, learning_rate=0.001) train_func = theano.function([X, Mask, labels], [mean_cost, train_acc], updates=updates) val_func = theano.function([X, Mask, labels], [val_mcost, val_acc]) #load the dataset Data, Msk, Targets, val_Data, val_Msk, val_tars = load_dataset() train_set = IndexableDataset( indexables = OrderedDict([('features', Data), ('mask',Msk), ('targets', Targets)]), axis_labels={'features':('batch','maxlen','feat_dim'),'mask':('batch','maxlen'), 'targets':('batch','index')}) valid_set = IndexableDataset( indexables = OrderedDict([('features', val_Data), ('mask', val_Msk), ('targets', val_tars)]), axis_labels={'features':('batch','maxlen','feat_dim'),'mask':('batch','maxlen'), 'targets':('batch','index')}) num_epochs=5 epoch=0 print("Starting training...") # We iterate over epochs: val_prev = np.inf a_prev = -np.inf while 'true':
all_grads = [T.clip(g,-5,5) for g in T.grad(mean_cost, all_parameters)] all_grads = lasagne.updates.total_norm_constraint(all_grads,5) updates = lasagne.updates.adam(all_grads, all_parameters, learning_rate=0.005) train_func = theano.function([X, Mask, labels], [mean_cost, train_acc], updates=updates) val_func = theano.function([X, Mask, labels], [val_mcost, val_acc]) num_epochs=100 #load the dataset Data, Msk, Targets, val_Data, val_Msk, val_tars = load_dataset() train_set = IndexableDataset( indexables = OrderedDict([('features', Data), ('mask',Msk), ('targets', Targets)]), axis_labels={'features':('batch','maxlen','feat_dim'),'mask':('batch','maxlen'), 'targets':('batch','index')}) valid_set = IndexableDataset( indexables = OrderedDict([('features', val_Data), ('mask', val_Msk), ('targets', val_tars)]), axis_labels={'features':('batch','maxlen','feat_dim'),'mask':('batch','maxlen'), 'targets':('batch','index')}) trainerr=[] print("Starting training...") # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 tr_acc = 0