예제 #1
0
    def get_split(self, split_num=0):
        split_perm = self.idxs[:, split_num]
        train_idx = split_perm[:-self.num_valid]
        test_idx = split_perm[-1 * self.num_valid:]
        X_train = self.X_data[train_idx]
        y_train = self.y_data[train_idx]
        X_test = self.X_data[test_idx]
        y_test = self.y_data[test_idx]

        def convertbin(y_temp):
            y_temp += 1
            y_temp /= 2
            return y_temp
        y_train = convertbin(y_train).reshape(-1, 1)
        y_test = convertbin(y_test).reshape(-1, 1)
        train_dataset = IndexableDataset(indexables=OrderedDict(
            [('features', X_train.astype(np.float32)),
             ('targets', y_train.astype(np.float32))]))

        test_dataset = IndexableDataset(indexables=OrderedDict(
            [('features', X_test.astype(np.float32)),
             ('targets', y_test.astype(np.float32))]))

        p = np.sum(y_train) * 1.0 / (X_train.shape[0])
        return train_dataset, test_dataset, np.float32(p)
예제 #2
0
    def test_transform_source_batch(self):
        stream_batch = StructuredOneHotEncoding(DataStream(
            IndexableDataset(self.data),
            iteration_scheme=SequentialScheme(4, 2)),
                                                num_classes=self.num_classes,
                                                which_sources=('targets', ))

        assert_equal(list(stream_batch.get_epoch_iterator()), [
            (numpy.ones((2, 2, 2)),
             numpy.array([
                 [1, 0, 0, 0, 1, 0, 0, 1],
                 [0, 1, 0, 1, 0, 0, 1, 0],
             ])),
            (numpy.ones((2, 2, 2)),
             numpy.array([[0, 1, 0, 0, 1, 0, 1, 0], [0, 0, 1, 1, 0, 1, 0, 0]
                          ])),
        ])

        stream_batch_invalid = StructuredOneHotEncoding(
            DataStream(IndexableDataset(self.data),
                       iteration_scheme=SequentialScheme(4, 2)),
            num_classes=[2, 3, 3],
            which_sources=('targets', ))

        assert_raises(ValueError, list,
                      stream_batch_invalid.get_epoch_iterator())
        stream_batch_negative = StructuredOneHotEncoding(
            DataStream(IndexableDataset(self.neg_data),
                       iteration_scheme=SequentialScheme(4, 2)),
            num_classes=self.num_classes,
            which_sources=('targets', ))

        assert_raises(ValueError, list,
                      stream_batch_negative.get_epoch_iterator())
예제 #3
0
def test_iterate_scheme():
    from fuel.datasets import IndexableDataset
    from fuel.schemes import (SequentialScheme, ShuffledScheme,SequentialExampleScheme, ShuffledExampleScheme)

    seed = 1234
    rng = numpy.random.RandomState(seed)
    features = rng.randint(256, size=(8, 2, 2))
    targets = rng.randint(4, size=(8, 1))

    dataset = IndexableDataset(indexables=OrderedDict([('features', features),
                                                       ('targets', targets)]),
                               axis_labels=OrderedDict([('features', ('batch', 'height', 'width')),
                                                        ('targets', ('batch', 'index'))]))

    schemes = [SequentialScheme(examples=8, batch_size=5),
               ShuffledScheme(examples=8, batch_size=3),
               SequentialExampleScheme(examples=8),
               ShuffledExampleScheme(examples=8)]

    # for scheme in schemes:
    #     print(list(scheme.get_request_iterator()))

    state = dataset.open()
    scheme = ShuffledScheme(examples=dataset.num_examples, batch_size=3)

    for request in scheme.get_request_iterator():
        data = dataset.get_data(state=state, request=request)
        print(data[0].shape, data[1].shape)

    dataset.close(state)
예제 #4
0
    def indexData(self):
        labCounts = graph_helper.getLabelCounts(
            self.G, self.trainNodes + self.validationNodes)
        trainXY, trainIDs = encode_data_VarLen(
            self.G,
            self.trainNodes,
            self.attrKey,
            self.maxNeighbors,
            usePrevWeights=self.usePrevWeights,
            useActualLabs=self.useActualLabs,
            onlyLabs=self.onlyLabs,
            useInputX2=self.useInputX2,
            labCounts=labCounts,
            dataAug=self.dataAug,
            pageRankOrder=self.pageRankOrder,
            usePro=self.usePro,
            lastH=self.lastHH,
            nodeIDs=True)
        validationXY, testIDs = encode_data_VarLen(
            self.G,
            self.validationNodes,
            self.attrKey,
            self.maxNeighbors,
            labCounts=labCounts,
            usePrevWeights=self.usePrevWeights,
            useActualLabs=self.useActualLabs,
            onlyLabs=self.onlyLabs,
            useInputX2=self.useInputX2,
            pageRankOrder=self.pageRankOrder,
            usePro=self.usePro,
            lastH=self.lastHH,
            nodeIDs=True)
        self.input_dimx1 = trainXY['x'][0].shape[1]
        if 'x2' in trainXY:
            self.input_dimx2 = trainXY['x2'].shape[1]

        dataset_train = IndexableDataset(trainXY)
        dataset_valid = IndexableDataset(validationXY)
        self.num_examples_train = dataset_train.num_examples
        self.num_examples_valid = dataset_valid.num_examples
        if self.usePro:
            transpose_stream = self.transpose_streamPro
        else:
            transpose_stream = self.transpose_stream

        self.stream_train = DataStream(dataset=dataset_train,
                                       iteration_scheme=ShuffledScheme(
                                           examples=dataset_train.num_examples,
                                           batch_size=self.batch_size))
        self.stream_train = Padding(self.stream_train, mask_sources=['x'])
        self.stream_train = Mapping(self.stream_train, transpose_stream)

        self.stream_valid = DataStream(dataset=dataset_valid,
                                       iteration_scheme=ShuffledScheme(
                                           examples=dataset_valid.num_examples,
                                           batch_size=self.batch_size))
        self.stream_valid = Padding(self.stream_valid, mask_sources=['x'])
        self.stream_valid = Mapping(self.stream_valid, transpose_stream)
def build_2d_datasets(dataset_name, n_train=20):
    if dataset_name not in ['mnist', 'sklearn', 'xor']:
        raise ValueError('This dataset is not supported')

    if dataset_name == 'xor':
        data_x = numpy.random.normal(size=(5000,
                                           2)).astype(dtype=fuel.config.floatX)
        which_cluster = (numpy.random.uniform(size=(data_x.shape[0], 2)) > .5)
        data_x += 2. * (2 * which_cluster - 1)
        data_y = (2 * which_cluster - 1).prod(axis=1) * .5 + .5
        data_y = data_y.astype(dtype='int32').reshape((-1, 1))
    if dataset_name == 'sklearn':
        data_x, data_y = make_classification(n_samples=1000,
                                             n_features=2,
                                             n_informative=2,
                                             n_redundant=0,
                                             n_classes=2)
        data_y = data_y.astype(dtype='int32').reshape((-1, 1))
    if dataset_name == 'mnist':
        dataset = MNIST('train')
        data_mean, data_cov = build_mean_covariance(dataset, 256)
        eigval, eigvec = numpy.linalg.eigh(data_cov)
        features = (dataset.indexables[0] - data_mean).dot(eigvec[:, -2:])
        features_pos = features[dataset.indexables[1][:, 0] == 3]
        features_neg = features[dataset.indexables[1][:, 0] == 5]

        data_x = numpy.zeros(
            (features_pos.shape[0] + features_neg.shape[0], 2))
        data_x[:n_train] = features_pos[:n_train]
        data_x[n_train:(2 * n_train)] = features_neg[:n_train]
        data_x[(2 * n_train):-(features_neg.shape[0] - n_train)] = \
            features_pos[n_train:]
        data_x[-(features_neg.shape[0] - n_train):] = features_neg[n_train:]

        data_y = numpy.zeros(
            (features_pos.shape[0] + features_neg.shape[0], 1))
        data_y[:n_train] = 1
        data_y[n_train:(2 * n_train)] = 0
        data_y[(2 * n_train):-(features_neg.shape[0] - n_train)] = 1
        data_y[-(features_neg.shape[0] - n_train):] = 0

    train_dataset = IndexableDataset({
        'features': data_x[:(2 * n_train)],
        'targets': data_y[:(2 * n_train)]
    })
    test_dataset = IndexableDataset({
        'features': data_x[(2 * n_train):],
        'targets': data_y[(2 * n_train):]
    })

    return train_dataset, test_dataset
예제 #6
0
def test_dropsources():
    stream = IndexableDataset(indexables=OrderedDict([
        ("valid", np.ones((5, 3, 3))),
        ("drop", np.zeros((5, 3, 3))),
        ])).get_example_stream()

    stream = DropSources(stream, ["drop"])

    assert len(stream.sources) == 1
    assert 'valid' in stream.sources

    data = stream.get_epoch_iterator().next()
    assert len(data) == 1
    assert_allclose(data[0], np.ones((3, 3)))
예제 #7
0
 def test_one_hot_batches_invalid_input(self):
     wrapper = OneHotEncoding(DataStream(IndexableDataset(self.data),
                                         iteration_scheme=SequentialScheme(
                                             4, 2)),
                              num_classes=2,
                              which_sources=('targets', ))
     assert_raises(ValueError, list, wrapper.get_epoch_iterator())
예제 #8
0
 def test_filter_batches(self):
     data = [1, 2, 3, 4]
     data_filtered = [([3, 4],)]
     stream = DataStream(IndexableDataset(data),
                         iteration_scheme=SequentialScheme(4, 2))
     wrapper = Filter(stream, lambda d: d[0][0] % 3 == 0)
     assert_equal(list(wrapper.get_epoch_iterator()), data_filtered)
예제 #9
0
def setup_datastream(path, batch_size, sort_batch_count, valid=False):
    A = numpy.load(
        os.path.join(path,
                     ('valid_x_raw.npy' if valid else 'train_x_raw.npy')))
    B = numpy.load(
        os.path.join(path, ('valid_phn.npy' if valid else 'train_phn.npy')))
    C = numpy.load(
        os.path.join(
            path,
            ('valid_seq_to_phn.npy' if valid else 'train_seq_to_phn.npy')))

    D = [B[x[0]:x[1], 2] for x in C]

    ds = IndexableDataset({'input': A, 'output': D})
    stream = DataStream(ds, iteration_scheme=ShuffledExampleScheme(len(A)))

    stream = Batch(stream,
                   iteration_scheme=ConstantScheme(batch_size *
                                                   sort_batch_count))
    comparison = _balanced_batch_helper(stream.sources.index('input'))
    stream = Mapping(stream, SortMapping(comparison))
    stream = Unpack(stream)

    stream = Batch(stream,
                   iteration_scheme=ConstantScheme(batch_size,
                                                   num_examples=len(A)))
    stream = Padding(stream, mask_sources=['input', 'output'])

    return ds, stream
예제 #10
0
def test_predict():
    tempfile_path = os.path.join(gettempdir(), 'test_predict.npz')

    # set up mock datastream
    source = [[1], [2], [3], [4]]
    dataset = IndexableDataset(OrderedDict([('input', source)]))
    scheme = SequentialScheme(dataset.num_examples, batch_size=2)
    data_stream = DataStream(dataset, iteration_scheme=scheme)

    # simulate small "network" that increments the input by 1
    input_tensor = tensor.matrix('input')
    output_tensor = input_tensor + 1
    output_tensor.name = 'output_tensor'

    main_loop = MockMainLoop(extensions=[
        PredictDataStream(data_stream=data_stream,
                          variables=[output_tensor],
                          path=tempfile_path,
                          after_training=True),
        FinishAfter(after_n_epochs=1)
    ])
    main_loop.run()

    # assert resulting prediction is saved
    prediction = numpy.load(tempfile_path)
    assert numpy.all(prediction[output_tensor.name] == numpy.array(source) + 1)

    try:
        os.remove(tempfile_path)
    except:
        pass
예제 #11
0
 def test_flatten_batches(self):
     wrapper = Flatten(DataStream(IndexableDataset(self.data),
                                  iteration_scheme=SequentialScheme(4, 2)),
                       which_sources=('features', ))
     assert_equal(list(wrapper.get_epoch_iterator()),
                  [(numpy.ones((2, 4)), numpy.array([[0], [1]])),
                   (numpy.ones((2, 4)), numpy.array([[0], [1]]))])
예제 #12
0
 def test_flatten_examples(self):
     wrapper = Flatten(DataStream(
         IndexableDataset(self.data),
         iteration_scheme=SequentialExampleScheme(4)),
                       which_sources=('features', ))
     assert_equal(list(wrapper.get_epoch_iterator()),
                  [(numpy.ones(4), 0), (numpy.ones(4), 1)] * 2)
예제 #13
0
 def get_prob(self,
              model,
              example_set,
              scheme,
              interim_dim=30,
              batch_size=256):
     (mlp, fine_tuner) = model
     dataset_state = example_set.open()
     x = T.matrix('x')
     out = mlp.apply(x)
     pred_fn = theano.function([x], out)
     y = np.zeros((example_set.num_examples))
     print "Number of examples is ", example_set.num_examples
     y_hat = np.zeros((example_set.num_examples, interim_dim))
     for idx, request in enumerate(scheme.get_request_iterator()):
         data = example_set.get_data(state=dataset_state, request=request)
         out_val = pred_fn(data[0])
         end_idx = (idx + 1) * batch_size
         if end_idx < example_set.num_examples:
             y[idx * batch_size:end_idx] = data[1].flatten()
             y_hat[idx * batch_size:end_idx] = out_val
     dataset = IndexableDataset(
         indexables=OrderedDict([('features', y_hat.astype(np.float32)),
                                 ('targets',
                                  y.reshape(-1, 1).astype(np.float32))]))
     return dataset
예제 #14
0
def test_mean_aggregator():
    num_examples = 4
    batch_size = 2

    features = numpy.array([[0, 3], [2, 9], [2, 4], [5, 1]],
                           dtype=theano.config.floatX)

    dataset = IndexableDataset(OrderedDict([('features', features)]))

    data_stream = DataStream(dataset,
                             iteration_scheme=SequentialScheme(
                                 num_examples, batch_size))

    x = tensor.matrix('features')
    y = (x**2).mean(axis=0)
    y.name = 'y'
    z = y.sum()
    z.name = 'z'

    y.tag.aggregation_scheme = Mean(y, 1.)
    z.tag.aggregation_scheme = Mean(z, 1.)

    assert_allclose(
        DatasetEvaluator([y]).evaluate(data_stream)['y'],
        numpy.array([8.25, 26.75], dtype=theano.config.floatX))
    assert_allclose(
        DatasetEvaluator([z]).evaluate(data_stream)['z'],
        numpy.array([35], dtype=theano.config.floatX))
예제 #15
0
def _test_mean_like_aggregator(scheme, func):
    """Common test function for both Mean and Perplexity."""
    features = numpy.array([[0, 3], [2, 9], [2, 4], [5, 1], [6, 7]],
                           dtype=theano.config.floatX)
    num_examples = features.shape[0]
    batch_size = 2

    dataset = IndexableDataset(OrderedDict([('features', features)]))

    data_stream = DataStream(dataset,
                             iteration_scheme=SequentialScheme(
                                 num_examples, batch_size))

    x = tensor.matrix('features')
    y = (x**0.5).sum(axis=0)
    y.name = 'y'
    z = y.sum()
    z.name = 'z'

    y.tag.aggregation_scheme = scheme(y, x.shape[0])
    z.tag.aggregation_scheme = scheme(z, x.shape[0])

    y_desired = func((features**0.5).mean(axis=0))
    z_desired = func((features**0.5).sum(axis=1).mean(axis=0))

    assert_allclose(
        DatasetEvaluator([y]).evaluate(data_stream)['y'],
        numpy.array(y_desired, dtype=theano.config.floatX))
    assert_allclose(
        DatasetEvaluator([z]).evaluate(data_stream)['z'],
        numpy.array(z_desired, dtype=theano.config.floatX))
예제 #16
0
def get_stream_raw(dataset, which_set, mini_batch_size):
    data = get_data(dataset)

    # dataset is a 3D array of shape: Time X Batch X Features
    dataset = data[which_set]
    time, batch, features = dataset.shape
    nb_mini_batches = batch / mini_batch_size
    dataset = dataset[:, :nb_mini_batches * mini_batch_size, :]

    # Create the target_dataset
    targets_dataset = dataset[1:, :, :]

    # Cut the dataset into several minibatches
    # dataset is now 4D (nb_mini_batches X Time X mini_batch_size X Features)
    dataset = numpy.swapaxes(dataset, 0, 1)
    targets_dataset = numpy.swapaxes(targets_dataset, 0, 1)
    dataset = numpy.reshape(dataset,
                            (nb_mini_batches, mini_batch_size, time, features))
    targets_dataset = numpy.reshape(
        targets_dataset,
        (nb_mini_batches, mini_batch_size, time - 1, features))
    dataset = numpy.swapaxes(dataset, 1, 2)
    targets_dataset = numpy.swapaxes(targets_dataset, 1, 2)

    # Create fuel dataset
    dataset = IndexableDataset({
        'features': dataset,
        'targets': targets_dataset
    })
    stream = DataStream(
        dataset, iteration_scheme=SequentialExampleScheme(nb_mini_batches))
    return stream
예제 #17
0
def get_dev_stream(valid_file, **kwargs):
    valid_data = cPickle.load(open(valid_file))
    images = [example[0] for example in valid_data]
    targets = [example[1] for example in valid_data]
    dataset = IndexableDataset(
        OrderedDict([('input', images), ('output', targets)]))
    return DataStream(dataset,
                      iteration_scheme=SequentialExampleScheme(len(images)))
예제 #18
0
def test_ngram_stream_raises_error_on_batch_stream():
    sentences = [
        list(numpy.random.randint(10, size=sentence_length))
        for sentence_length in [3, 5, 7]
    ]
    stream = DataStream(IndexableDataset(sentences),
                        iteration_scheme=SequentialScheme(3, 1))
    assert_raises(ValueError, NGrams, 4, stream)
예제 #19
0
 def setUp(self):
     self.stream = DataStream(
         IndexableDataset(
             OrderedDict([('features', numpy.ones((4, 2, 2))),
                          ('targets', numpy.array([0, 1, 0, 1]))]),
             axis_labels={'features': ('batch', 'width', 'height'),
                          'targets': ('batch',)}),
         iteration_scheme=SequentialScheme(4, 2))
예제 #20
0
def test_single_mapping_value_error_on_request():
    class IdentitySingleMapping(SingleMapping):
        def mapping(self, source):
            return source

    data_stream = DataStream(IndexableDataset([0, 1, 2]))
    transformer = IdentitySingleMapping(data_stream)
    assert_raises(ValueError, transformer.get_data, [0, 1])
예제 #21
0
 def setUp(self):
     self.string_data = [b'Hello', b'World!']
     self.dataset = IndexableDataset(
         indexables={
             'words':
             [numpy.fromstring(s, dtype='uint8') for s in self.string_data]
         },
         axis_labels={'words': ('batch', 'bytes')})
예제 #22
0
 def test_axis_labels_are_passed_through(self):
     stream = DataStream(
         IndexableDataset(
             {'features': [1, 2, 3, 4]},
             axis_labels={'features': ('batch',)}),
         iteration_scheme=SequentialScheme(4, 2))
     wrapper = Filter(stream, lambda d: d[0][0] % 3 == 0)
     assert_equal(wrapper.axis_labels, stream.axis_labels)
예제 #23
0
    def load_data(self, data_path):
        logging.info("Loading: " + data_path)

        data = pd.read_csv(data_path, sep="\t", header=None)
        data.columns = ['rel', 'head', 'tail', 'score']
        assert (not data.empty)
        self.N = len(data)
        return IndexableDataset(data.to_dict('list'))
예제 #24
0
def test_flatten():
    stream = DataStream(IndexableDataset(
        OrderedDict([('features', numpy.ones((4, 2, 2))),
                     ('targets', numpy.array([0, 1, 0, 1]))])),
                        iteration_scheme=SequentialScheme(4, 2))
    wrapper = Flatten(stream, which_sources=('features', ))
    assert_equal(list(wrapper.get_epoch_iterator()),
                 [(numpy.ones((2, 4)), numpy.array([0, 1])),
                  (numpy.ones((2, 4)), numpy.array([0, 1]))])
예제 #25
0
 def test_axis_labels_on_flatten_examples(self):
     wrapper = Flatten(
         DataStream(IndexableDataset(self.data),
                    iteration_scheme=SequentialExampleScheme(4),
                    axis_labels={'features': ('batch', 'width', 'height'),
                                 'targets': ('batch', 'index')}),
         which_sources=('features',))
     assert_equal(wrapper.axis_labels, {'features': ('feature',),
                                        'targets': ('index',)})
예제 #26
0
 def test_axis_labels_on_flatten_batches_with_none(self):
     wrapper = Flatten(
         DataStream(IndexableDataset(self.data),
                    iteration_scheme=SequentialScheme(4, 2),
                    axis_labels={'features': None,
                                 'targets': ('batch', 'index')}),
         which_sources=('features',))
     assert_equal(wrapper.axis_labels, {'features': None,
                                        'targets': ('batch', 'index')})
예제 #27
0
    def _construct_dataset(self, dataset):
        '''Construct an fuel indexable dataset.

        Every field corresponds to the name of self.provide_sources
        :param dataset: A tuple of data
        :return:
        '''
        return IndexableDataset(
            indexables=OrderedDict(zip(self.provide_souces, dataset)))
예제 #28
0
def test_min_max_aggregators():
    num_examples = 4
    batch_size = 2

    features = numpy.array([[2, 3], [2, 9], [2, 4], [5, 1]],
                           dtype=theano.config.floatX)

    dataset = IndexableDataset(OrderedDict([('features', features)]))

    data_stream = DataStream(dataset,
                             iteration_scheme=SequentialScheme(
                                 num_examples, batch_size))

    x = tensor.matrix('features')
    y = (x**2).sum(axis=0)
    y.name = 'y'
    z = y.min()
    z.name = 'z'

    y.tag.aggregation_scheme = Maximum(y)
    z.tag.aggregation_scheme = Minimum(z)

    assert_allclose(
        DatasetEvaluator([y]).evaluate(data_stream)['y'],
        numpy.array([29, 90], dtype=theano.config.floatX))
    assert_allclose(
        DatasetEvaluator([z]).evaluate(data_stream)['z'],
        numpy.array([8], dtype=theano.config.floatX))

    # Make sure accumulators are reset.
    features = numpy.array([[2, 1], [1, 3], [1, -1], [2.5, 1]],
                           dtype=theano.config.floatX)

    dataset = IndexableDataset(OrderedDict([('features', features)]))

    data_stream = DataStream(dataset,
                             iteration_scheme=SequentialScheme(
                                 num_examples, batch_size))
    assert_allclose(
        DatasetEvaluator([y]).evaluate(data_stream)['y'],
        numpy.array([7.25, 10], dtype=theano.config.floatX))
    assert_allclose(
        DatasetEvaluator([z]).evaluate(data_stream)['z'],
        numpy.array([2], dtype=theano.config.floatX))
예제 #29
0
    def replaceTestData(self, testNodes, maxNeighbors=1000, maskNames=['x']):
        if self.batchesInferences:
            batch_size = self.batch_size
        else:
            batch_size = 1

        testing, testIDs = encode_data_VarLen(self.G,
                                              testNodes,
                                              self.attrKey,
                                              maxNeighbors,
                                              useActualLabs=self.useActualLabs,
                                              useInputX2=self.useInputX2,
                                              onlyLabs=self.onlyLabs,
                                              lastH=self.lastHH,
                                              nodeIDs=True)
        dataset_test = IndexableDataset(testing)
        self.stream_test = DataStream(dataset=dataset_test,
                                      iteration_scheme=SequentialScheme(
                                          examples=dataset_test.num_examples,
                                          batch_size=batch_size))
        #add masks, have to do individually to avoid all dimensions must be equal error
        #write own padding transformer, their's sucks ...
        self.stream_test = Padding(self.stream_test, mask_sources=maskNames)
        #transpose them for rnn input
        self.stream_test = Mapping(self.stream_test, self.transpose_streamTest)
        self.num_examples_test = dataset_test.num_examples

        #replace shareddata with test_all data
        self.test_all, names = self.iterateShared(self.stream_test,
                                                  makeShared=False,
                                                  name="test")

        #if we are doing test in batches
        if self.batchesInferences:
            for key in self.test_all:
                totalTestBatches = len(self.test_all[key])
                if key != 'nodeID':
                    for i in range(0, totalTestBatches):
                        #if test data has more batches, we add more to shared data list
                        #else we just reset
                        if i >= self.totalBatches:
                            newKey = key + '_myinput'
                            self.sharedData[key].append(
                                shared(self.test_all[key][i],
                                       name=self.sharedName + '_' + newKey +
                                       '_test_' + str(i)))
                        else:
                            self.sharedData[key][i].set_value(
                                self.test_all[key][i], borrow=True)

                    self.sharedBatch[key].set_value(
                        self.sharedData[key][0].get_value(borrow=True),
                        borrow=True)

            self.stream_test_int = IntStream(0, totalTestBatches, 1,
                                             'int_stream')
예제 #30
0
def test_indexabel_dataset():
    from fuel.datasets import IndexableDataset

    seed = 1234
    rng = numpy.random.RandomState(seed)
    features = rng.randint(256, size=(8, 2, 2))
    targets = rng.randint(4, size=(8, 1))

    dataset = IndexableDataset(indexables=OrderedDict([('features', features),
                                                       ('targets', targets)]),
                               axis_labels=OrderedDict([('features', ('batch', 'height', 'width')),
                                                        ('targets', ('batch', 'index'))]))

    state = dataset.open()
    print('State is {}.'.format(state))

    print(dataset.get_data(state=state, request=[1, 0]))

    dataset.close(state=state)
예제 #31
0
 def test_one_hot_examples(self):
     wrapper = OneHotEncoding(DataStream(
         IndexableDataset(self.data),
         iteration_scheme=SequentialExampleScheme(4)),
                              num_classes=4,
                              which_sources=('targets', ))
     assert_equal(list(wrapper.get_epoch_iterator()),
                  [(numpy.ones((2, 2)), numpy.array([[1, 0, 0, 0]])),
                   (numpy.ones((2, 2)), numpy.array([[0, 1, 0, 0]])),
                   (numpy.ones((2, 2)), numpy.array([[0, 0, 1, 0]])),
                   (numpy.ones((2, 2)), numpy.array([[0, 0, 0, 1]]))])
예제 #32
0
def get_stream(trainXY, batch_size=100):
    #trainXY=genSynXY()
    dataset_train = IndexableDataset(trainXY)
    stream_train_1 = DataStream(dataset=dataset_train,
                                iteration_scheme=ShuffledScheme(
                                    examples=dataset_train.num_examples,
                                    batch_size=batch_size))
    stream_train_2 = Padding(stream_train_1)
    #stream_train_1.sources=('x_mask_o', 'y_mask_o', 'x', 'y')
    stream_train_3 = Mapping(stream_train_2, transpose_stream)

    return (stream_train_3, dataset_train.num_examples)
예제 #33
0
파일: test_streams.py 프로젝트: Afrik/fuel
 def test_axis_labels_on_produces_batches(self):
     dataset = IndexableDataset(numpy.eye(2))
     axis_labels = {'data': ('batch', 'features')}
     dataset.axis_labels = axis_labels
     stream = DataStream(dataset, iteration_scheme=SequentialScheme(2, 2))
     assert_equal(stream.axis_labels, axis_labels)
예제 #34
0
#add grad clipping to avoid exploding gradients
all_grads = [T.clip(g,-5,5) for g in T.grad(mean_cost, all_parameters)]
all_grads = lasagne.updates.total_norm_constraint(all_grads,5)

updates = lasagne.updates.adam(all_grads, all_parameters, learning_rate=0.001)

train_func = theano.function([X, Mask, labels], [mean_cost, train_acc], updates=updates)

val_func = theano.function([X, Mask, labels], [val_mcost, val_acc])


#load the dataset
Data, Msk, Targets, val_Data, val_Msk, val_tars = load_dataset()

train_set = IndexableDataset(
    indexables = OrderedDict([('features', Data), ('mask',Msk), ('targets', Targets)]), 
    axis_labels={'features':('batch','maxlen','feat_dim'),'mask':('batch','maxlen'), 'targets':('batch','index')})

valid_set = IndexableDataset(
    indexables = OrderedDict([('features', val_Data), ('mask', val_Msk), ('targets', val_tars)]), 
    axis_labels={'features':('batch','maxlen','feat_dim'),'mask':('batch','maxlen'), 'targets':('batch','index')})

num_epochs=5
epoch=0

print("Starting training...")
    # We iterate over epochs:
val_prev = np.inf
a_prev = -np.inf

while 'true':
예제 #35
0
all_grads = [T.clip(g,-5,5) for g in T.grad(mean_cost, all_parameters)]
all_grads = lasagne.updates.total_norm_constraint(all_grads,5)

updates = lasagne.updates.adam(all_grads, all_parameters, learning_rate=0.005)

train_func = theano.function([X, Mask, labels], [mean_cost, train_acc], updates=updates)

val_func = theano.function([X, Mask, labels], [val_mcost, val_acc])


num_epochs=100
#load the dataset
Data, Msk, Targets, val_Data, val_Msk, val_tars = load_dataset()

train_set = IndexableDataset(
    indexables = OrderedDict([('features', Data), ('mask',Msk), ('targets', Targets)]), 
    axis_labels={'features':('batch','maxlen','feat_dim'),'mask':('batch','maxlen'), 'targets':('batch','index')})

valid_set = IndexableDataset(
    indexables = OrderedDict([('features', val_Data), ('mask', val_Msk), ('targets', val_tars)]), 
    axis_labels={'features':('batch','maxlen','feat_dim'),'mask':('batch','maxlen'), 'targets':('batch','index')})


trainerr=[]

print("Starting training...")
    # We iterate over epochs:
for epoch in range(num_epochs):
    # In each epoch, we do a full pass over the training data:
    train_err = 0
    tr_acc = 0