Esempio n. 1
0
def main(save_to, num_batches, continue_=False):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0), seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(
            cost=cost, params=ComputationGraph(cost).parameters,
            step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=([LoadFromDump(save_to)] if continue_ else []) +
        [Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)),
                prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Dump(save_to),
            Printing()])
    main_loop.run()
    return main_loop
Esempio n. 2
0
def main(save_to, num_batches, continue_=False):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0),
              seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(cost=cost,
                        params=ComputationGraph(cost).parameters,
                        step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=([LoadFromDump(save_to)] if continue_ else []) + [
            Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)), prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Dump(save_to),
            Printing()
        ])
    main_loop.run()
    return main_loop
Esempio n. 3
0
    def apply(self, input_, target):
        x_to_h = Linear(name='x_to_h',
                        input_dim=self.dims[0],
                        output_dim=self.dims[1] * 4)
        pre_rnn = x_to_h.apply(input_)
        pre_rnn.name = 'pre_rnn'
        rnn = LSTM(activation=Tanh(),
                   dim=self.dims[1], name=self.name)
        h, _ = rnn.apply(pre_rnn)
        h.name = 'h'
        h_to_y = Linear(name='h_to_y',
                        input_dim=self.dims[1],
                        output_dim=self.dims[2])
        y_hat = h_to_y.apply(h)
        y_hat.name = 'y_hat'

        cost = SquaredError().apply(target, y_hat)
        cost.name = 'MSE'

        self.outputs = {}
        self.outputs['y_hat'] = y_hat
        self.outputs['cost'] = cost
        self.outputs['pre_rnn'] = pre_rnn
        self.outputs['h'] = h

        # Initialization
        for brick in (rnn, x_to_h, h_to_y):
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0)
            brick.initialize()
Esempio n. 4
0
    def apply(self, input_, target):
        x_to_h = Linear(name='x_to_h',
                        input_dim=self.dims[0],
                        output_dim=self.dims[1] * 4)
        pre_rnn = x_to_h.apply(input_)
        pre_rnn.name = 'pre_rnn'
        rnn = LSTM(activation=Tanh(), dim=self.dims[1], name=self.name)
        h, _ = rnn.apply(pre_rnn)
        h.name = 'h'
        h_to_y = Linear(name='h_to_y',
                        input_dim=self.dims[1],
                        output_dim=self.dims[2])
        y_hat = h_to_y.apply(h)
        y_hat.name = 'y_hat'

        cost = SquaredError().apply(target, y_hat)
        cost.name = 'MSE'

        self.outputs = {}
        self.outputs['y_hat'] = y_hat
        self.outputs['cost'] = cost
        self.outputs['pre_rnn'] = pre_rnn
        self.outputs['h'] = h

        # Initialization
        for brick in (rnn, x_to_h, h_to_y):
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0)
            brick.initialize()
def get_costs(presoft, args):

    if has_indices(args.dataset):
        # Targets: (Time X Batch)
        y = tensor.lmatrix('targets')
        y_mask = tensor.ones_like(y, dtype=floatX)
        y_mask = tensor.set_subtensor(
            y_mask[:args.context, :],
            tensor.zeros_like(y_mask[:args.context, :], dtype=floatX))

        time, batch, feat = presoft.shape
        cross_entropy = Softmax().categorical_cross_entropy(
            (y.flatten() * y_mask.reshape((batch * time, ))), (presoft.reshape(
                (batch * time, feat)) * y_mask.reshape((batch * time, 1))))

        # renormalization
        renormalized_cross_entropy = cross_entropy * (
            tensor.sum(tensor.ones_like(y_mask)) / tensor.sum(y_mask))

        # BPC: Bits Per Character
        unregularized_cost = renormalized_cross_entropy / tensor.log(2)
        unregularized_cost.name = "cross_entropy"

    else:
        # Targets: (Time X Batch X Features)
        y = tensor.tensor3('targets', dtype=floatX)
        y_mask = tensor.ones_like(y[:, :, 0], dtype=floatX)
        y_mask = tensor.set_subtensor(
            y_mask[:args.context, :],
            tensor.zeros_like(y_mask[:args.context, :], dtype=floatX))

        if args.used_inputs is not None:
            y_mask = tensor.set_subtensor(
                y_mask[:args.used_inputs, :],
                tensor.zeros_like(y_mask[:args.used_inputs, :], dtype=floatX))
        # SquaredError does not work on 3D tensor
        target = (y * y_mask.dimshuffle(0, 1, 'x'))
        values = (presoft[:-1, :, :] * y_mask.dimshuffle(0, 1, 'x'))

        target = target.reshape(
            (target.shape[0] * target.shape[1], target.shape[2]))

        values = values.reshape(
            (values.shape[0] * values.shape[1], values.shape[2]))

        unregularized_cost = SquaredError().apply(target, values)
        # renormalization
        unregularized_cost = unregularized_cost * (
            tensor.sum(tensor.ones_like(y_mask)) / tensor.sum(y_mask))
        unregularized_cost.name = "mean_squared_error"

    # TODO: add regularisation for the cost
    # the log(1) is here in order to differentiate the two variables
    # for monitoring
    cost = unregularized_cost + tensor.log(1)
    cost.name = "regularized_cost"
    return cost, unregularized_cost
def get_costs(presoft, args):

    if has_indices(args.dataset):
        # Targets: (Time X Batch)
        y = tensor.lmatrix('targets')
        y_mask = tensor.ones_like(y, dtype=floatX)
        y_mask = tensor.set_subtensor(y_mask[:args.context, :],
                                      tensor.zeros_like(y_mask[:args.context,
                                                               :],
                                                        dtype=floatX))

        time, batch, feat = presoft.shape
        cross_entropy = Softmax().categorical_cross_entropy(
            (y.flatten() *
                y_mask.reshape((batch * time, ))),
            (presoft.reshape((batch * time, feat)) *
                y_mask.reshape((batch * time, 1))))

        # renormalization
        renormalized_cross_entropy = cross_entropy * (
            tensor.sum(tensor.ones_like(y_mask)) /
            tensor.sum(y_mask))

        # BPC: Bits Per Character
        unregularized_cost = renormalized_cross_entropy / tensor.log(2)
        unregularized_cost.name = "cross_entropy"

    else:
        # Targets: (Time X Batch X Features)
        y = tensor.tensor3('targets', dtype=floatX)
        y_mask = tensor.ones_like(y[:, :, 0], dtype=floatX)
        y_mask = tensor.set_subtensor(y_mask[:args.context, :],
                                      tensor.zeros_like(y_mask[:args.context, :],
                                                        dtype=floatX))

        if args.used_inputs is not None:
            y_mask = tensor.set_subtensor(y_mask[:args.used_inputs, :],
                                          tensor.zeros_like(y_mask[:args.used_inputs, :],
                                                            dtype=floatX))
        # SquaredError does not work on 3D tensor
        target = (y * y_mask.dimshuffle(0, 1, 'x'))
        values = (presoft[:-1, :, :] * y_mask.dimshuffle(0, 1, 'x'))

        target = target.reshape((target.shape[0] * target.shape[1],
                                 target.shape[2]))

        values = values.reshape((values.shape[0] * values.shape[1],
                                 values.shape[2]))

        unregularized_cost = SquaredError().apply(target, values)
        # renormalization
        unregularized_cost = unregularized_cost * (
            tensor.sum(tensor.ones_like(y_mask)) /
            tensor.sum(y_mask))
        unregularized_cost.name = "mean_squared_error"

    # TODO: add regularisation for the cost
    # the log(1) is here in order to differentiate the two variables
    # for monitoring
    cost = unregularized_cost + tensor.log(1)
    cost.name = "regularized_cost"
    return cost, unregularized_cost
Esempio n. 7
0
    def train(self):

        x = self.sharedBatch['x']
        x.name = 'x_myinput'
        xmini = self.sharedBatch['xmini']
        xmini.name = 'xmini_myinput'
        y = self.sharedBatch['y']
        y.name = 'y_myinput'

        # we need to provide data for the LSTM layer of size 4 * ltsm_dim, see
        # LSTM layer documentation for the explanation
        x_to_h = Linear(self.input_dimx,
                        self.dim,
                        name='x_to_h',
                        weights_init=IsotropicGaussian(),
                        biases_init=Constant(0.0))
        xmini_to_h = Linear(self.input_dimxmini,
                            self.mini_dim,
                            name='xmini_to_h',
                            weights_init=IsotropicGaussian(),
                            biases_init=Constant(0.0))

        rnnwmini = RNNwMini(dim=self.dim,
                            mini_dim=self.mini_dim,
                            summary_dim=self.summary_dim)

        h_to_o = Linear(self.summary_dim,
                        1,
                        name='h_to_o',
                        weights_init=IsotropicGaussian(),
                        biases_init=Constant(0.0))

        x_transform = x_to_h.apply(x)
        xmini_transform = xmini_to_h.apply(xmini)

        h = rnnwmini.apply(x=x_transform, xmini=xmini_transform)

        # only values of hidden units of the last timeframe are used for
        # the classification
        y_hat = h_to_o.apply(h[-1])
        #y_hat = Logistic().apply(y_hat)

        cost = SquaredError().apply(y, y_hat)
        cost.name = 'cost'

        rnnwmini.initialize()
        x_to_h.initialize()
        xmini_to_h.initialize()
        h_to_o.initialize()

        self.f = theano.function(inputs=[], outputs=y_hat)

        #print("self.f === ")
        #print(self.f())
        #print(self.f().shape)
        #print("====")

        self.cg = ComputationGraph(cost)
        m = Model(cost)

        algorithm = GradientDescent(cost=cost,
                                    parameters=self.cg.parameters,
                                    step_rule=RMSProp(learning_rate=0.01),
                                    on_unused_sources='ignore')
        valid_monitor = DataStreamMonitoringShared(
            variables=[cost],
            data_stream=self.stream_valid_int,
            prefix="valid",
            sharedBatch=self.sharedBatch,
            sharedData=self.sharedData)
        train_monitor = TrainingDataMonitoring(variables=[cost],
                                               prefix="train",
                                               after_epoch=True)

        sharedVarMonitor = SwitchSharedReferences(self.sharedBatch,
                                                  self.sharedData)
        tBest = self.track_best('valid_cost', self.cg)
        self.tracker = tBest[0]
        extensions = [sharedVarMonitor, valid_monitor] + tBest

        if self.debug:
            extensions.append(Printing())

        self.algorithm = algorithm
        self.extensions = extensions
        self.model = m
        self.mainloop = MainLoop(self.algorithm,
                                 self.stream_train_int,
                                 extensions=self.extensions,
                                 model=self.model)
        self.main_loop(True)
Esempio n. 8
0
lstm.biases_init = Constant(0.)
lstm.initialize()

#ComputationGraph(encode.apply(x)).get_theano_function()(features_test)[0].shape
#ComputationGraph(lstm.apply(encoded)).get_theano_function()(features_test)
#ComputationGraph(decode.apply(hiddens[-1])).get_theano_function()(features_test)[0].shape

#ComputationGraph(SquaredError().apply(y, y_hat.flatten())).get_theano_function()(features_test, targets_test)[0].shape

encoded = encode.apply(x)
#hiddens = lstm.apply(encoded, gates.apply(x))
hiddens = lstm.apply(encoded)
y_hat  = decode.apply(hiddens[-1])

cost = SquaredError().apply(y, y_hat)
cost.name = 'cost'

#ipdb.set_trace()

#ComputationGraph(y_hat).get_theano_function()(features_test)[0].shape
#ComputationGraph(cost).get_theano_function()(features_test, targets_test)[0].shape

cg = ComputationGraph(cost)

#cg = ComputationGraph(hiddens).get_theano_function()
#ipdb.set_trace()
algorithm = GradientDescent(cost=cost, 
                            params=cg.parameters,
                            step_rule=CompositeRule([StepClipping(5.0),
                                                     Scale(0.01)]))
Esempio n. 9
0
    # MODEL SETUP
    textRNN = TextRNN(dim_in=VECTOR_SIZE, dim_hidden=HIDDEN_UNITS, dim_out=VECTOR_SIZE)

    output = textRNN.run(inputs=x)
    #get_states_and_output = T.function([x, x_mask], [output])

    # COST SETUP
    #y_hat = np.float32(np.ones((3,1)))
    labels = np.float32([data[1] for data in dataset])
    inputs_data = np.float32([data[0] for data in dataset])
    test_labels = np.float32([data[1] for data in test_dataset])
    test_inputs_data = np.float32([data[0] for data in test_dataset])

    cost = SquaredError().apply(y, output)
    cost.name = 'MSE_with_regularization'
    cg = ComputationGraph(cost)

    #inputs = VariableFilter(roles=[INPUT], bricks=[SimpleRecurrent])(cg.variables)
    #inputs = [inputs[0]]
    #cg_dropout = apply_dropout(cg, inputs, 0.5)
    #fprop_dropout = T.function([cg_dropout.inputs], [cg_dropout.outputs[0]])
    #dropped_out = VariableFilter(roles=[DROPOUT])(cg.variables)
    #inputs_referenced = [var.tag.replacement_of for var in dropped_out]
    #set(inputs) == set(inputs_referenced)

    get_states_and_output = T.function([x], [output])

    #W = VariableFilter(roles=[WEIGHT])(cg.variables)
    #W = W
    #cost = cost + 0.005 * (W ** 2).sum()
Esempio n. 10
0
def train_lstm(train, test, input_dim,
               hidden_dimension, columns, epochs,
               save_file, execution_name, batch_size, plot):
    stream_train = build_stream(train, batch_size, columns)
    stream_test = build_stream(test, batch_size, columns)

    # The train stream will return (TimeSequence, BatchSize, Dimensions) for
    # and the train test will return (TimeSequence, BatchSize, 1)

    x = T.tensor3('x')
    y = T.tensor3('y')

    y = y.reshape((y.shape[1], y.shape[0], y.shape[2]))

    # input_dim = 6
    # output_dim = 1
    linear_lstm = LinearLSTM(input_dim, 1, hidden_dimension,
                             # print_intermediate=True,
                             print_attrs=['__str__', 'shape'])

    y_hat = linear_lstm.apply(x)
    linear_lstm.initialize()

    c_test = AbsolutePercentageError().apply(y, y_hat)
    c_test.name = 'mape'

    c = SquaredError().apply(y, y_hat)
    c.name = 'cost'

    cg = ComputationGraph(c_test)

    def one_perc_min(current_value, best_value):
        if (1 - best_value / current_value) > 0.01:
            return best_value
        else:
            return current_value

    extensions = []

    extensions.append(DataStreamMonitoring(variables=[c, c_test],
                                           data_stream=stream_test,
                                           prefix='test',
                                           after_epoch=False,
                                           every_n_epochs=100))

    extensions.append(TrainingDataMonitoring(variables=[c_test],
                                             prefix='train',
                                             after_epoch=True))

    extensions.append(FinishAfter(after_n_epochs=epochs))

    # extensions.append(Printing())
    # extensions.append(ProgressBar())

    extensions.append(TrackTheBest('test_mape', choose_best=one_perc_min))
    extensions.append(TrackTheBest('test_cost', choose_best=one_perc_min))
    extensions.append(FinishIfNoImprovementAfter('test_cost_best_so_far', epochs=500))

    # Save only parameters, not the whole main loop and only when best_test_cost is updated
    checkpoint = Checkpoint(save_file, save_main_loop=False, after_training=False)
    checkpoint.add_condition(['after_epoch'], predicate=OnLogRecord('test_cost_best_so_far'))
    extensions.append(checkpoint)

    if BOKEH_AVAILABLE and plot:
        extensions.append(Plot(execution_name, channels=[[  # 'train_cost',
                                                          'test_cost']]))

    step_rule = Adam()

    algorithm = GradientDescent(cost=c_test, parameters=cg.parameters, step_rule=step_rule)
    main_loop = MainLoop(algorithm, stream_train, model=Model(c_test), extensions=extensions)
    main_loop.run()

    test_mape = 0
    if main_loop.log.status.get('best_test_mape', None) is None:
        with open(save_file, 'rb') as f:
            parameters = load_parameters(f)
            model = main_loop.model
            model.set_parameter_values(parameters)
            ev = DatasetEvaluator([c_test])
            test_mape = ev.evaluate(stream_test)['mape']
    else:
        test_mape = main_loop.log.status['best_test_mape']

    return test_mape, main_loop.log.status['epochs_done']
Esempio n. 11
0
hidden_dims = [int(dim) for dim in args.dim.split(",")]

if args.batchnorm:
    network = BatchNormalizedMLP
else:
    network = MLP

autoencoder = network(activations=[Tanh() for _ in xrange(len(hidden_dims))] + [Identity()],
                  dims=[input_dim] + hidden_dims + [input_dim],
                  weights_init=Uniform(width=0.02), biases_init=Constant(0))
autoencoder.initialize()

hopefully_states_again = autoencoder.apply(states)

cost = SquaredError().apply(hopefully_states_again, states)
cost.name = "squared_error"
cost_model = Model(cost)

algorithm = GradientDescent(cost=cost, parameters=cost_model.parameters,
                            step_rule=Adam())

# handle data
data = H5PYDataset(args.file, which_sets=("train",), load_in_memory=True)
# trash data for testing
"""
dataraw = numpy.zeros((10000, 512), dtype="float32")
for row in xrange(dataraw.shape[0]):
    dataraw[row] = numpy.random.rand(512)
data = OrderedDict()
data["act_seqs"] = dataraw
data = IndexableDataset(data)
Esempio n. 12
0
lstm.biases_init = Constant(0.)
lstm.initialize()

#ComputationGraph(encode.apply(x)).get_theano_function()(features_test)[0].shape
#ComputationGraph(lstm.apply(encoded)).get_theano_function()(features_test)
#ComputationGraph(decode.apply(hiddens[-1])).get_theano_function()(features_test)[0].shape

#ComputationGraph(SquaredError().apply(y, y_hat.flatten())).get_theano_function()(features_test, targets_test)[0].shape

encoded = encode.apply(x)
#hiddens = lstm.apply(encoded, gates.apply(x))
hiddens = lstm.apply(encoded)
y_hat = decode.apply(hiddens[-1])

cost = SquaredError().apply(y, y_hat)
cost.name = 'cost'

#ipdb.set_trace()

#ComputationGraph(y_hat).get_theano_function()(features_test)[0].shape
#ComputationGraph(cost).get_theano_function()(features_test, targets_test)[0].shape

cg = ComputationGraph(cost)

#cg = ComputationGraph(hiddens).get_theano_function()
#ipdb.set_trace()
algorithm = GradientDescent(cost=cost,
                            params=cg.parameters,
                            step_rule=CompositeRule(
                                [StepClipping(5.0),
                                 Scale(0.01)]))