Beispiel #1
0
                            parameters=cg.parameters,
                            step_rule=Scale(learning_rate=0.1))

train_set = H5PYDataset('mushrooms.hdf5', which_sets=('train',))
train_stream = DataStream.default_stream(
    train_set, iteration_scheme=SequentialScheme(
        train_set.num_examples, batch_size=128))

test_set = H5PYDataset('mushrooms.hdf5', which_sets=('test',))
test_stream = DataStream.default_stream(
    test_set, iteration_scheme=SequentialScheme(
        test_set.num_examples, batch_size=128))

main = MainLoop(
    model=Model(cost),
    data_stream=train_stream,
    algorithm=algorithm,
    extensions=[
        FinishAfter(after_n_epochs=10),
        Printing(),
        TrainingDataMonitoring([cost, error_rate], after_batch=True, prefix='train'),
        DataStreamMonitoring([cost, error_rate], after_batch=True, data_stream=test_stream, prefix='test'),
        Plot('Train',
             channels=[['train_cost', 'test_cost'], ['train_error_rate', 'test_error_rate']])
    ])

main.run()

hinton(W1.get_value())
hinton(W2.get_value())
Beispiel #2
0
#定义一个多层神经网络,层与层之间的计算公式已经被之前定义。
#激活函数集activations定义了每一层的非线性转换函数,多层感知器每一层的输出都包含了两部分,第一部分是线性计算,然后将线性计算的结果进行非线性转换
#x是多层感知器的输入
mlp = MLP(activations = [Rectifier(),Softmax()], dims = [784, 100, 10]).apply(x)

#定义完整个神经网络的流程后,需要设置其线性转换的参数的初始值。

input_to_hidden.weights_init = IsotropicGaussian(0.01)
input_to_hidden.biases_init = Constant(0);
hidden_to_output.weights_init = IsotropicGaussian(0.01)
hidden_to_output.biases_init = Constant(0)
#对设置进行初始化设置,必须要做这一步,否则之前的设置都没有用
input_to_hidden.initialize()
hidden_to_output.initialize()

print W1.get_value()

#然后开始进行模型训练,这里使用现有的内置的数据集 MNIST,如果想要使用别的数据集,需要使用fuel对数据进行预处理
mnist = MNIST(("train",))
#定义迭代计算的方式,使用mini-batch的方法计算,每一次mini-batch使用1024条数据。以此获得数据流,data_stream
data_stream = Flatten(DataStream.default_stream(mnist, iteration_scheme = SequentialScheme(mnist.num_examples, batch_size = 256)))

#定义优化函数的最优值计算方法,这边使用SGD来做
#algorithm = GradientDescent(cost = cost, parameters = [cg.parameters], step_rule = Scale(learning_rate = 0.01))
algorithm = GradientDescent(cost = cost, parameters = cg.parameters, step_rule = Scale(learning_rate = 0.01)) # define to use gradient dscent to compute

print "------"

#对训练数据的指定参数进行监控,使用DataStreamMonitoring方法,使用test 集合来验证结果。在训练过程中,可以查看算法在test集合的性能表现。
mnist_test = MNIST(("test",))
#data_stream_test = Flatten(DataStream.default_stream(mnist_test, iteration_scheme = SequentialScheme(mnist_test.num_examples, batch_size= 1024)))
Beispiel #3
0
                                             batch_size=128))

test_set = H5PYDataset('mushrooms.hdf5', which_sets=('test', ))
test_stream = DataStream.default_stream(test_set,
                                        iteration_scheme=SequentialScheme(
                                            test_set.num_examples,
                                            batch_size=128))

main = MainLoop(model=Model(cost),
                data_stream=train_stream,
                algorithm=algorithm,
                extensions=[
                    FinishAfter(after_n_epochs=10),
                    Printing(),
                    TrainingDataMonitoring([cost, error_rate],
                                           after_batch=True,
                                           prefix='train'),
                    DataStreamMonitoring([cost, error_rate],
                                         after_batch=True,
                                         data_stream=test_stream,
                                         prefix='test'),
                    Plot('Train',
                         channels=[['train_cost', 'test_cost'],
                                   ['train_error_rate', 'test_error_rate']])
                ])

main.run()

hinton(W1.get_value())
hinton(W2.get_value())
def run_experiment():

    np.random.seed(42)

    X = tensor.tensor4('features')
    nbr_channels = 3
    image_shape = (5, 5)

    conv_layers = [ ConvolutionalLayer( filter_size=(2,2),
                                        num_filters=10,
                                        activation=Rectifier().apply,
                                        border_mode='valid',
                                        pooling_size=(1,1),
                                        weights_init=Uniform(width=0.1),
                                        #biases_init=Uniform(width=0.01),
                                        biases_init=Constant(0.0),
                                        name='conv0')]
    conv_sequence = ConvolutionalSequence(  conv_layers,
                                            num_channels=nbr_channels,
                                            image_size=image_shape)
    #conv_sequence.push_allocation_config()
    conv_sequence.initialize()
    
    flattener = Flattener()
    conv_output = conv_sequence.apply(X)
    y_hat = flattener.apply(conv_output)
    # Whatever. Not important since we're not going to actually train anything.
    cost = tensor.sqr(y_hat).sum()


    #L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[FILTER, BIAS])(ComputationGraph([y_hat]).variables)]
    L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[BIAS])(ComputationGraph([y_hat]).variables)]
    # works on the sum of the gradients in a mini-batch
    sum_square_norm_gradients_method_02 = sum([tensor.sqr(g).sum() for g in L_grads_method_02])


    D_by_layer = get_conv_layers_transformation_roles(ComputationGraph(conv_output))
    individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_conv_transformations(D_by_layer, cost)


    # why does this thing depend on N again ?
    # I don't think I've used a cost that divides by N.

    N = 2
    Xtrain = np.random.randn(N, nbr_channels, image_shape[0], image_shape[1]).astype(np.float32)
    #Xtrain[1:,:,:,:] = 0.0
    Xtrain[:,:,:,:] = 1.0

    convolution_filter_variable = VariableFilter(roles=[FILTER])(ComputationGraph([y_hat]).variables)[0]
    convolution_filter_variable_value = convolution_filter_variable.get_value()
    convolution_filter_variable_value[:,:,:,:] = 1.0
    #convolution_filter_variable_value[0,0,:,:] = 1.0
    convolution_filter_variable.set_value(convolution_filter_variable_value)

    f = theano.function([X],
                        [cost,
                            individual_sum_square_norm_gradients_method_00,
                            sum_square_norm_gradients_method_02])


    [c, v0, gs2] = f(Xtrain)

    #print "[c, v0, gs2]"
    L_c, L_v0, L_gs2 = ([], [], [])
    for n in range(N):
        [nc, nv0, ngs2] = f(Xtrain[n,:, :, :].reshape((1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3])))
        L_c.append(nc)
        L_v0.append(nv0)
        L_gs2.append(ngs2)

    print "Cost for whole mini-batch in single shot : %f." % c
    print "Cost for whole mini-batch accumulated    : %f." % sum(L_c)
    print ""
    print "Square-norm of all gradients for each data point in single shot :"
    print v0.reshape((1,-1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs2).reshape((1,-1))
    print ""
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2)))
    print ""
    print "Ratios : "
    print np.array(L_gs2).reshape((1,-1)) / v0.reshape((1,-1))
def run_experiment():

    np.random.seed(42)

    X = tensor.tensor4('features')
    nbr_channels = 3
    image_shape = (5, 5)

    conv_layers = [
        ConvolutionalLayer(
            filter_size=(2, 2),
            num_filters=10,
            activation=Rectifier().apply,
            border_mode='valid',
            pooling_size=(1, 1),
            weights_init=Uniform(width=0.1),
            #biases_init=Uniform(width=0.01),
            biases_init=Constant(0.0),
            name='conv0')
    ]
    conv_sequence = ConvolutionalSequence(conv_layers,
                                          num_channels=nbr_channels,
                                          image_size=image_shape)
    #conv_sequence.push_allocation_config()
    conv_sequence.initialize()

    flattener = Flattener()
    conv_output = conv_sequence.apply(X)
    y_hat = flattener.apply(conv_output)
    # Whatever. Not important since we're not going to actually train anything.
    cost = tensor.sqr(y_hat).sum()

    #L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[FILTER, BIAS])(ComputationGraph([y_hat]).variables)]
    L_grads_method_02 = [
        tensor.grad(cost, v) for v in VariableFilter(
            roles=[BIAS])(ComputationGraph([y_hat]).variables)
    ]
    # works on the sum of the gradients in a mini-batch
    sum_square_norm_gradients_method_02 = sum(
        [tensor.sqr(g).sum() for g in L_grads_method_02])

    D_by_layer = get_conv_layers_transformation_roles(
        ComputationGraph(conv_output))
    individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_conv_transformations(
        D_by_layer, cost)

    # why does this thing depend on N again ?
    # I don't think I've used a cost that divides by N.

    N = 2
    Xtrain = np.random.randn(N, nbr_channels, image_shape[0],
                             image_shape[1]).astype(np.float32)
    #Xtrain[1:,:,:,:] = 0.0
    Xtrain[:, :, :, :] = 1.0

    convolution_filter_variable = VariableFilter(roles=[FILTER])(
        ComputationGraph([y_hat]).variables)[0]
    convolution_filter_variable_value = convolution_filter_variable.get_value()
    convolution_filter_variable_value[:, :, :, :] = 1.0
    #convolution_filter_variable_value[0,0,:,:] = 1.0
    convolution_filter_variable.set_value(convolution_filter_variable_value)

    f = theano.function([X], [
        cost, individual_sum_square_norm_gradients_method_00,
        sum_square_norm_gradients_method_02
    ])

    [c, v0, gs2] = f(Xtrain)

    #print "[c, v0, gs2]"
    L_c, L_v0, L_gs2 = ([], [], [])
    for n in range(N):
        [nc, nv0, ngs2] = f(Xtrain[n, :, :, :].reshape(
            (1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3])))
        L_c.append(nc)
        L_v0.append(nv0)
        L_gs2.append(ngs2)

    print "Cost for whole mini-batch in single shot : %f." % c
    print "Cost for whole mini-batch accumulated    : %f." % sum(L_c)
    print ""
    print "Square-norm of all gradients for each data point in single shot :"
    print v0.reshape((1, -1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs2).reshape((1, -1))
    print ""
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2)))
    print ""
    print "Ratios : "
    print np.array(L_gs2).reshape((1, -1)) / v0.reshape((1, -1))
Beispiel #6
0
def train(train_set, test_set):
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')

    l1 = Linear(
            name='input_to_hidden',
            input_dim=2,
            output_dim=3,
            weights_init=IsotropicGaussian(0.1),
            biases_init=Constant(0)
    )
    l1.initialize()
    h = Logistic().apply(l1.apply(x))

    l2 = Linear(
            name='hidden_to_output',
            input_dim=l1.output_dim,
            output_dim=2,
            weights_init=IsotropicGaussian(0.1),
            biases_init=Constant(0)
    )
    l2.initialize()
    y_hat = Softmax().apply(l2.apply(h))

    cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)

    error = MisclassificationRate().apply(y.flatten(), y_hat)
    error.name = 'misclassification_rate'

    cg = ComputationGraph(cost)

    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + 1e-8 * (W1 ** 2).sum() + 1e-8 * (W2 ** 2).sum()
    cost.name = 'cost_with_regularization'

    print('W1', W1.get_value())
    print('W2', W2.get_value())

    algorithm = GradientDescent(
            cost=cost,
            parameters=cg.parameters,
            step_rule=RMSProp()
    )

    data_stream_train = Flatten(
            DataStream.default_stream(
                    train_set,
                    iteration_scheme=ShuffledScheme(train_set.num_examples, batch_size=4)
            )
    )

    data_stream_test = Flatten(
            DataStream.default_stream(
                    test_set,
                    iteration_scheme=SequentialScheme(test_set.num_examples, batch_size=1)
            )
    )

    monitor = DataStreamMonitoring(
            variables=[cost, error],
            data_stream=data_stream_test,
            prefix="test"
    )

    main_loop = MainLoop(
            data_stream=data_stream_train,
            algorithm=algorithm,
            extensions=[
                monitor,
                FinishAfter(after_n_epochs=100),
                Printing(),
                # ProgressBar()
            ]
    )

    main_loop.run()