Exemple #1
0
 def fit_transform(self, X, y=None, **fit_params):
     print X.shape
     nfeat = X.shape[1]
     X = X.astype(theano.config.floatX)
     if y is None:
         dataset = VectorSpacesDataset(X, (VectorSpace(nfeat), 'features'))
     else:
         y = np.reshape(y, (y.shape[0], 1))
         space = CompositeSpace([VectorSpace(nfeat), VectorSpace(1)])
         source = ('features', 'targets')
         data_specs = (space, source)
         dataset = VectorSpacesDataset((X, y), data_specs)
     return dataset
Exemple #2
0
def test_get_layer_monitor_channels():
    """
    Create a MLP with multiple layer types
    and get layer monitoring channels for MLP.
    """
    mlp = MLP(layers=[
        FlattenerLayer(
            CompositeLayer('composite',
                           [Linear(10, 'h0', 0.1),
                            Linear(10, 'h1', 0.1)], {
                                0: [1],
                                1: [0]
                            })),
        Softmax(5, 'softmax', 0.1)
    ],
              input_space=CompositeSpace([VectorSpace(15),
                                          VectorSpace(20)]),
              input_source=('features0', 'features1'))
    dataset = VectorSpacesDataset(
        (np.random.rand(20, 20).astype(theano.config.floatX),
         np.random.rand(20, 15).astype(theano.config.floatX),
         np.random.rand(20, 5).astype(theano.config.floatX)),
        (CompositeSpace(
            [VectorSpace(20), VectorSpace(15),
             VectorSpace(5)]), ('features1', 'features0', 'targets')))
    state_below = mlp.get_input_space().make_theano_batch()
    targets = mlp.get_target_space().make_theano_batch()
    mlp.get_layer_monitoring_channels(state_below=state_below,
                                      state=None,
                                      targets=targets)
Exemple #3
0
def test_multiple_inputs():
    """
    Create a VectorSpacesDataset with two inputs (features0 and features1)
    and train an MLP which takes both inputs for 1 epoch.
    """
    mlp = MLP(layers=[
        FlattenerLayer(
            CompositeLayer('composite',
                           [Linear(10, 'h0', 0.1),
                            Linear(10, 'h1', 0.1)], {
                                0: [1],
                                1: [0]
                            })),
        Softmax(5, 'softmax', 0.1)
    ],
              input_space=CompositeSpace([VectorSpace(15),
                                          VectorSpace(20)]),
              input_source=('features0', 'features1'))
    dataset = VectorSpacesDataset(
        (np.random.rand(20, 20).astype(theano.config.floatX),
         np.random.rand(20, 15).astype(theano.config.floatX),
         np.random.rand(20, 5).astype(theano.config.floatX)),
        (CompositeSpace(
            [VectorSpace(20), VectorSpace(15),
             VectorSpace(5)]), ('features1', 'features0', 'targets')))
    train = Train(dataset, mlp, SGD(0.1, batch_size=5))
    train.algorithm.termination_criterion = EpochCounter(1)
    train.main_loop()
Exemple #4
0
def BWD_dataset(portion_to_return,
                total=36772,
                train_val_test=((0.0, 0.1), (0.8, 0.9), (0.9, 1.0)),
                portion_keys=('train', 'valid', 'test')):
    assert all(e[0] >= 0 and e[1] >= 0 for e in train_val_test)
    assert portion_to_return in portion_keys
    (start_frac,
     stop_frac) = train_val_test[portion_keys.index(portion_to_return)]
    start, stop = int(start_frac * total), int(stop_frac * total)
    return VectorSpacesDataset(data=load_data(
        start=start,
        stop=stop,
        filename='res/bowman_wordnet_longer_shuffled_synset_relations.tsv',
        token_map='res/bowman_wordnet_longer_shuffled_synset_relations.map',
        first_column_has_y_label=True,
        first_column_of_map_file_has_index=True,
        return_composite_space_tuples=True),
                               data_specs=BowmanWordnetDataset.data_specs)
Exemple #5
0
def test_flattener_layer():
    # To test the FlattenerLayer we create a very simple feed-forward neural
    # network with two parallel linear layers. We then create two separate
    # feed-forward neural networks with single linear layers. In principle,
    # these two models should be identical if we start from the same
    # parameters. This makes it easy to test that the composite layer works
    # as expected.

    # Create network with composite layers.
    mlp_composite = MLP(layers=[
        FlattenerLayer(
            CompositeLayer('composite',
                           [Linear(2, 'h0', 0.1),
                            Linear(2, 'h1', 0.1)], {
                                0: [0],
                                1: [1]
                            }))
    ],
                        input_space=CompositeSpace(
                            [VectorSpace(5), VectorSpace(10)]),
                        input_source=('features0', 'features1'))

    # Create network with single linear layer, corresponding to first
    # layer in the composite network.
    mlp_first_part = MLP(layers=[Linear(2, 'h0', 0.1)],
                         input_space=VectorSpace(5),
                         input_source=('features0'))

    # Create network with single linear layer, corresponding to second
    # layer in the composite network.
    mlp_second_part = MLP(layers=[Linear(2, 'h1', 0.1)],
                          input_space=VectorSpace(10),
                          input_source=('features1'))

    # Create dataset which we will test our networks against.
    shared_dataset = np.random.rand(20, 19).astype(theano.config.floatX)

    # Make dataset for composite network.
    dataset_composite = VectorSpacesDataset(
        (shared_dataset[:, 0:5], shared_dataset[:, 5:15],
         shared_dataset[:, 15:19]), (CompositeSpace(
             [VectorSpace(5), VectorSpace(10),
              VectorSpace(4)]), ('features0', 'features1', 'targets')))

    # Make dataset for first single linear layer network.
    dataset_first_part = VectorSpacesDataset(
        (shared_dataset[:, 0:5], shared_dataset[:, 15:17]),
        (CompositeSpace([VectorSpace(5), VectorSpace(2)]),
         ('features0', 'targets')))

    # Make dataset for second single linear layer network.
    dataset_second_part = VectorSpacesDataset(
        (shared_dataset[:, 5:15], shared_dataset[:, 17:19]),
        (CompositeSpace([VectorSpace(10), VectorSpace(2)]),
         ('features1', 'targets')))

    # Initialize all MLPs to start from zero weights.
    mlp_composite.layers[0].raw_layer.layers[0].set_weights(
        mlp_composite.layers[0].raw_layer.layers[0].get_weights() * 0.0)
    mlp_composite.layers[0].raw_layer.layers[1].set_weights(
        mlp_composite.layers[0].raw_layer.layers[1].get_weights() * 0.0)
    mlp_first_part.layers[0].set_weights(
        mlp_first_part.layers[0].get_weights() * 0.0)
    mlp_second_part.layers[0].set_weights(
        mlp_second_part.layers[0].get_weights() * 0.0)

    # Train all models with their respective datasets.
    train_composite = Train(dataset_composite, mlp_composite,
                            SGD(0.0001, batch_size=20))
    train_composite.algorithm.termination_criterion = EpochCounter(1)
    train_composite.main_loop()

    train_first_part = Train(dataset_first_part, mlp_first_part,
                             SGD(0.0001, batch_size=20))
    train_first_part.algorithm.termination_criterion = EpochCounter(1)
    train_first_part.main_loop()

    train_second_part = Train(dataset_second_part, mlp_second_part,
                              SGD(0.0001, batch_size=20))
    train_second_part.algorithm.termination_criterion = EpochCounter(1)
    train_second_part.main_loop()

    # Check that the composite feed-forward neural network has learned
    # same parameters as each individual feed-forward neural network.
    np.testing.assert_allclose(
        mlp_composite.layers[0].raw_layer.layers[0].get_weights(),
        mlp_first_part.layers[0].get_weights())
    np.testing.assert_allclose(
        mlp_composite.layers[0].raw_layer.layers[1].get_weights(),
        mlp_second_part.layers[0].get_weights())

    # Check that we get same output given the same input on a randomly
    # generated dataset.
    X_composite = mlp_composite.get_input_space().make_theano_batch()
    X_first_part = mlp_first_part.get_input_space().make_theano_batch()
    X_second_part = mlp_second_part.get_input_space().make_theano_batch()

    fprop_composite = theano.function(X_composite,
                                      mlp_composite.fprop(X_composite))
    fprop_first_part = theano.function([X_first_part],
                                       mlp_first_part.fprop(X_first_part))
    fprop_second_part = theano.function([X_second_part],
                                        mlp_second_part.fprop(X_second_part))

    X_data = np.random.random(size=(10, 15)).astype(theano.config.floatX)
    y_data = np.random.randint(low=0, high=10, size=(10, 4))

    np.testing.assert_allclose(
        fprop_composite(X_data[:, 0:5], X_data[:, 5:15])[:, 0:2],
        fprop_first_part(X_data[:, 0:5]))
    np.testing.assert_allclose(
        fprop_composite(X_data[:, 0:5], X_data[:, 5:15])[:, 2:4],
        fprop_second_part(X_data[:, 5:15]))

    # Finally check that calling the internal FlattenerLayer behaves
    # as we would expect. First, retrieve the FlattenerLayer.
    fl = mlp_composite.layers[0]

    # Check that it agrees on the input space.
    assert mlp_composite.get_input_space() == fl.get_input_space()

    # Check that it agrees on the parameters.
    for i in range(0, 4):
        np.testing.assert_allclose(fl.get_params()[i].eval(),
                                   mlp_composite.get_params()[i].eval())
Exemple #6
0
def create_dataset(schema, tables, ids, n_classes, which=None):
    all_instances = psda.generate_instances_for_appliances_by_dataids(
        schema, tables, ['use', 'air1', 'furnace1'], ids, sample_rate='15T')

    energy_arrays = []
    temperature_arrays = []
    time_arrays = []
    weekday_arrays = []
    target_arrays = []
    sorted_classes = np.linspace(0, 1, n_classes + 1)[:-1]
    for instances, dataid in zip(all_instances, ids):
        # format use correctly
        use = instances[0].traces[0]
        use.series.fillna(0, inplace=True)
        use.series = use.series.astype(float).clip(0.0000001)
        use_windows = use.get_windows(window_length, window_stride)

        # create features sources
        energy_arrays.append(use_windows)
        temperature_arrays.append(np.tile([70], (use_windows.shape[0], 1)))
        time_arrays.append(np.tile([12], (use_windows.shape[0], 1)))
        weekday_arrays.append(
            np.tile([1, 0, 0, 0, 0, 0, 0], (use_windows.shape[0], 1)))

        # determine targets
        air1 = instances[1].traces[0]
        furnace1 = instances[2].traces[0]
        total_air = da.utils.aggregate_traces([air1, furnace1], {})
        total_air.series.fillna(0, inplace=True)
        total_air.series = total_air.series.astype(float)
        ratio_series = total_air.series / use.series
        ratios = da.appliance.ApplianceTrace(ratio_series, {})
        ratio_windows = ratios.get_windows(window_length, window_stride)
        ratio_windows = ratio_windows[:, prediction_index].clip(0, 1)
        classes = np.searchsorted(sorted_classes, ratio_windows,
                                  side='right') - 1
        target_arrays.append(classes_to_onehot(classes, n_classes))

    # create data tuple
    energy_arrays = np.concatenate(energy_arrays, axis=0)[:, :, np.newaxis,
                                                          np.newaxis]
    temperature_arrays = np.concatenate(temperature_arrays, axis=0)
    time_arrays = np.concatenate(time_arrays, axis=0)
    weekday_arrays = csr_matrix(np.concatenate(weekday_arrays, axis=0))
    target_arrays = csr_matrix(np.concatenate(target_arrays, axis=0))
    data = (energy_arrays, temperature_arrays, time_arrays, weekday_arrays,
            target_arrays)

    # define the data specs
    space = CompositeSpace([
        Conv2DSpace(shape=[10, 1], num_channels=1),
        VectorSpace(dim=1),
        VectorSpace(dim=1),
        VectorSpace(dim=7, sparse=True),
        VectorSpace(dim=n_classes, sparse=True)
    ])
    source = ('features0', 'features1', 'features2', 'features3', 'targets')
    data_specs = (space, source)
    dataset = VectorSpacesDataset(data=data, data_specs=data_specs)
    with open(os.path.join(args.data_dir, args.prefix + '_' + which + '.pkl'),
              'w') as f:
        pickle.dump(dataset, f)
def load_dataset(which_set, dataset_types):

    # we need to have at least 2 types otherwise this func is useless
    assert len(dataset_types) > 1
    print "loading.. ", which_set

    if which_set == 'test':
        start_set = 0
        stop_set = 10000
    elif which_set == 'valid':
        which_set = 'train'
        start_set = 40000
        stop_set = 50000
    else:
        #train
        start_set = 0
        stop_set = 40000

    n_classes = 10

    data = []
    for prepro in dataset_types:

        if prepro == 'gcn':
            print "LOADING GCN..."
            input_data = CIFAR10(which_set=which_set,
                                 start=start_set,
                                 stop=stop_set,
                                 gcn=55.,
                                 axes=['b', 0, 1, 'c'])
            # gcn_data = input_data.get_topological_view()
            data.append(input_data.get_topological_view())

        if prepro == 'toronto':
            print "LOADING TOR..."
            input_data = CIFAR10(which_set=which_set,
                                 start=start_set,
                                 stop=stop_set,
                                 axes=['b', 0, 1, 'c'],
                                 toronto_prepro=1)
            # tor_data = input_data.get_topological_view()
            data.append(input_data.get_topological_view())

        if prepro == 'zca':
            print "LOADING ZCA..."

            data_dir = string_utils.preprocess('${PYLEARN2_DATA_PATH}/cifar10')
            input_data = ZCA_Dataset(
                preprocessed_dataset=serial.load(data_dir +
                                                 "/pylearn2_gcn_whitened/" +
                                                 which_set + ".pkl"),
                preprocessor=serial.load(
                    data_dir + "/pylearn2_gcn_whitened/preprocessor.pkl"),
                start=start_set,
                stop=stop_set,
                axes=['b', 0, 1, 'c'])
            # zca_data = input_data.get_topological_view()
            data.append(input_data.get_topological_view())

    target_data = OneHotFormatter(n_classes).format(input_data.y,
                                                    mode="concatenate")
    data.append(target_data)

    data_source = []
    for i in range(len(dataset_types)):
        data_source.append('features' + str(i))
    data_source.append('targets')

    ################################## DEFINE SPACES ##################################
    spaces = []
    # add input spaces as b01c
    for i in range(0, len(dataset_types)):
        spaces.append(
            Conv2DSpace(shape=(32, 32), num_channels=3, axes=('b', 0, 1, 'c')))
    # add output space
    spaces.append(VectorSpace(n_classes))

    set = VectorSpacesDataset(tuple(data),
                              (CompositeSpace(spaces), tuple(data_source)))

    return set
def generate(opc):
    """
    Summary (Generates a dataset with the chosen transformation).

    Parameters
    ----------
    opc: string
        Only two options, shifts or rotations.
    """
    dim = 19  # outer square
    # A bigger image is used to avoid empty pixels in the
    # borders.
    reg = 13  # inner square
    total = 20000  # Number of training examples

    im1 = numpy.zeros((total, reg, reg, 1), dtype='float32')
    im2 = numpy.zeros((total, reg, reg, 1), dtype='float32')
    Y = numpy.zeros((total, 1), dtype='uint8')
    rng = make_np_rng(9001, [1, 2, 3], which_method="uniform")
    transformation = opc

    if transformation == 'shifts':
        # Shifts
        # only shifts between [-3, +3] pixels
        shifts = list(itertools.product(range(-3, 4), range(-3, 4)))
        t = 0
        while t < total:
            x = rng.uniform(0, 1, (dim, dim))
            x = numpy.ceil(x * 255)
            im_x = x[3:16, 3:16][:, :, None]
            ind = rng.randint(0, len(shifts))
            Y[t] = ind
            txy = shifts[ind]
            tx, ty = txy
            im_y = x[(3 + tx):(16 + tx), (3 + ty):(16 + ty)][:, :, None]
            im1[t, :] = im_x
            im2[t, :] = im_y
            t += 1
    else:
        assert transformation == 'rotations'
        # Rotations
        import Image
        # import cv2
        angs = numpy.linspace(0, 359, 90)
        t = 0
        while t < total:
            x = rng.uniform(0, 1, (dim, dim))
            x = numpy.ceil(x * 255)
            im_x = x[3:16, 3:16][:, :, None]
            ind = rng.randint(0, len(angs))
            Y[t] = ind
            ang = angs[ind]
            y = numpy.asarray(Image.fromarray(x).rotate(ang))
            # scale = 1
            # M1 = cv2.getRotationMatrix2D((dim/2, dim/2), ang, scale)
            # y = cv2.warpAffine(x, M1, (dim, dim))
            im_y = y[3:16, 3:16][:, :, None]
            im1[t, :] = im_x
            im2[t, :] = im_y
            t += 1

    view_converter = dense_design_matrix.DefaultViewConverter((reg, reg, 1))

    design_X = view_converter.topo_view_to_design_mat(im1)
    design_Y = view_converter.topo_view_to_design_mat(im2)

    # Normalize data:
    pipeline = preprocessing.Pipeline()
    gcn = preprocessing.GlobalContrastNormalization(sqrt_bias=10.,
                                                    use_std=True)
    pipeline.items.append(gcn)
    XY = numpy.concatenate((design_X, design_Y), 0)
    XY_ImP = dense_design_matrix.DenseDesignMatrix(X=XY)
    XY_ImP.apply_preprocessor(preprocessor=pipeline, can_fit=True)

    X1 = XY_ImP.X[0:design_X.shape[0], :]
    X2 = XY_ImP.X[design_X.shape[0]:, :]

    # As a Conv2DSpace
    topo_X1 = view_converter.design_mat_to_topo_view(X1)
    topo_X2 = view_converter.design_mat_to_topo_view(X2)
    axes = ('b', 0, 1, 'c')
    data_specs = (CompositeSpace([
        Conv2DSpace((reg, reg), num_channels=1, axes=axes),
        Conv2DSpace((reg, reg), num_channels=1, axes=axes),
        VectorSpace(1)
    ]), ('featuresX', 'featuresY', 'targets'))
    train = VectorSpacesDataset((topo_X1, topo_X2, Y), data_specs=data_specs)

    # As a VectorSpace
    # data_specs = (CompositeSpace(
    # [VectorSpace(reg * reg),
    # VectorSpace(reg * reg),
    #      VectorSpace(1)]),
    #               ('featuresX', 'featuresY', 'targets'))
    # train = VectorSpacesDataset(data=(X1, X2, Y), data_specs=data_specs)

    import os

    save_path = os.path.dirname(os.path.realpath(__file__))
    serial.save(os.path.join(save_path, 'train_preprocessed.pkl'), train)