예제 #1
0
    def __init__(self, config, **kwargs):
        super(Model, self).__init__(config, **kwargs)

        self.dest_mlp = MLP(
            activations=[Rectifier()
                         for _ in config.dim_hidden_dest] + [Softmax()],
            dims=[config.dim_hidden[-1]] + config.dim_hidden_dest +
            [config.dim_output_dest],
            name='dest_mlp')
        self.time_mlp = MLP(
            activations=[Rectifier()
                         for _ in config.dim_hidden_time] + [Softmax()],
            dims=[config.dim_hidden[-1]] + config.dim_hidden_time +
            [config.dim_output_time],
            name='time_mlp')

        self.dest_classes = theano.shared(numpy.array(
            config.dest_tgtcls, dtype=theano.config.floatX),
                                          name='dest_classes')
        self.time_classes = theano.shared(numpy.array(
            config.time_tgtcls, dtype=theano.config.floatX),
                                          name='time_classes')

        self.inputs.append('input_time')
        self.children.extend([self.dest_mlp, self.time_mlp])
예제 #2
0
def create_vae(x=None, batch=batch_size):
    x = T.matrix('features') if x is None else x
    x = x / 255.

    encoder = MLP(
        activations=[Rectifier(), Logistic()],
        dims=[img_dim**2, hidden_dim, 2*latent_dim],
        weights_init=IsotropicGaussian(std=0.01, mean=0),
        biases_init=Constant(0.01),
        name='encoder'
    )
    encoder.initialize()
    z_param = encoder.apply(x)
    z_mean, z_log_std = z_param[:,latent_dim:], z_param[:,:latent_dim]
    z = Sampling(theano_seed=seed).apply([z_mean, z_log_std], batch=batch_size)

    decoder = MLP(
        activations=[Rectifier(), Logistic()],
        dims=[latent_dim, hidden_dim, img_dim**2],
        weights_init=IsotropicGaussian(std=0.01, mean=0),
        biases_init=Constant(0.01),
        name='decoder'
    )
    decoder.initialize()
    x_reconstruct = decoder.apply(z)

    cost = VAEloss().apply(x, x_reconstruct, z_mean, z_log_std)
    cost.name = 'vae_cost'
    return cost
예제 #3
0
def setup_model():
    # shape: T x B x F
    input_ = T.tensor3('features')
    # shape: B
    target = T.lvector('targets')
    model = LSTMAttention(input_dim=10000,
                          dim=500,
                          mlp_hidden_dims=[2000, 500, 4],
                          batch_size=100,
                          image_shape=(100, 100),
                          patch_shape=(28, 28),
                          weights_init=IsotropicGaussian(0.01),
                          biases_init=Constant(0))
    model.initialize()
    h, c = model.apply(input_)
    classifier = MLP([Rectifier(), Softmax()], [500, 100, 10],
                     weights_init=IsotropicGaussian(0.01),
                     biases_init=Constant(0))
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = CategoricalCrossEntropy().apply(target, probabilities)
    error_rate = MisclassificationRate().apply(target, probabilities)

    return cost, error_rate
    def __init__(self, stack_dim=500, **kwargs):
        """Sole constructor.
        
        Args:
            stack_dim (int): Size of vectors on the stack.
        """
        super(PushDownSequenceContentAttention, self).__init__(**kwargs)
        self.stack_dim = stack_dim
        self.max_stack_depth = 25

        self.stack_op_names = self.state_names + ['weighted_averages']

        self.stack_pop_transformer = MLP(activations=[Logistic()], dims=None)
        self.stack_pop_transformers = Parallel(
            input_names=self.stack_op_names,
            prototype=self.stack_pop_transformer,
            name="stack_pop")

        self.stack_push_transformer = MLP(activations=[Logistic()], dims=None)
        self.stack_push_transformers = Parallel(
            input_names=self.stack_op_names,
            prototype=self.stack_push_transformer,
            name="stack_push")

        self.stack_input_transformer = Linear()
        self.stack_input_transformers = Parallel(
            input_names=self.stack_op_names,
            prototype=self.stack_input_transformer,
            name="stack_input")
        self.children.append(self.stack_pop_transformers)
        self.children.append(self.stack_push_transformers)
        self.children.append(self.stack_input_transformers)
예제 #5
0
def generation(z_list, n_latent, hu_decoder, n_out, y):
    logger.info('in generation: n_latent: %d, hu_decoder: %d', n_latent,
                hu_decoder)
    if hu_decoder == 0:
        return generation_simple(z_list, n_latent, n_out, y)
    mlp1 = MLP(activations=[Rectifier()],
               dims=[n_latent, hu_decoder],
               name='latent_to_hidDecoder')
    initialize([mlp1])
    hid_to_out = Linear(name='hidDecoder_to_output',
                        input_dim=hu_decoder,
                        output_dim=n_out)
    initialize([hid_to_out])
    mysigmoid = Logistic(name='y_hat_vae')
    agg_logpy_xz = 0.
    agg_y_hat = 0.
    for i, z in enumerate(z_list):
        y_hat = mysigmoid.apply(hid_to_out.apply(
            mlp1.apply(z)))  #reconstructed x
        agg_logpy_xz += cross_entropy_loss(y_hat, y)
        agg_y_hat += y_hat

    agg_logpy_xz /= len(z_list)
    agg_y_hat /= len(z_list)
    return agg_y_hat, agg_logpy_xz
예제 #6
0
def build_classifier(dimension):
  mlp = MLP([Tanh(),Tanh(), Softmax()], [784, 100,50, 10],
          weights_init=IsotropicGaussian(0.01),
          biases_init=Constant(0))
          
  mlp.initialize()
  return mlp
예제 #7
0
def setup_model(configs):
    tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5)
    # shape: T x B x C x X x Y
    input_ = tensor5("features")
    tensor3 = theano.tensor.TensorType(config.floatX, (False,) * 3)
    locs = tensor3("locs")
    # shape: B x Classes
    target = T.ivector("targets")

    model = LSTMAttention(configs, weights_init=Glorot(), biases_init=Constant(0))
    model.initialize()

    (h, c, location, scale, alpha, patch, downn_sampled_input, conved_part_1, conved_part_2, pre_lstm) = model.apply(
        input_, locs
    )

    model.location = location
    model.scale = scale
    model.alpha = location
    model.patch = patch

    classifier = MLP(
        [Rectifier(), Softmax()], configs["classifier_dims"], weights_init=Glorot(), biases_init=Constant(0)
    )
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = CategoricalCrossEntropy().apply(target, probabilities)
    cost.name = "CE"
    error_rate = MisclassificationRate().apply(target, probabilities)
    error_rate.name = "ER"
    model.cost = cost
    model.error_rate = error_rate
    model.probabilities = probabilities

    if configs["load_pretrained"]:
        blocks_model = Model(model.cost)
        all_params = blocks_model.parameters
        with open("VGG_CNN_params.npz") as f:
            loaded = np.load(f)
            all_conv_params = loaded.keys()
            for param in all_params:
                if param.name in loaded.keys():
                    assert param.get_value().shape == loaded[param.name].shape
                    param.set_value(loaded[param.name])
                    all_conv_params.pop(all_conv_params.index(param.name))
        print "the following parameters did not match: " + str(all_conv_params)

    if configs["test_model"]:
        print "TESTING THE MODEL: CHECK THE INPUT SIZE!"
        cg = ComputationGraph(model.cost)
        f = theano.function(cg.inputs, [model.cost], on_unused_input="ignore", allow_input_downcast=True)
        data = configs["get_streams"](configs["batch_size"])[0].get_epoch_iterator().next()
        f(data[1], data[0], data[2])

        print "Test passed! ;)"

    model.monitorings = [cost, error_rate]

    return model
예제 #8
0
파일: attention.py 프로젝트: jych/blocks
    def __init__(self,
                 state_names,
                 state_dims,
                 sequence_dim,
                 match_dim,
                 state_transformer=None,
                 sequence_transformer=None,
                 energy_computer=None,
                 weights_init=None,
                 biases_init=None,
                 **kwargs):
        super(SequenceContentAttention, self).__init__(**kwargs)
        update_instance(self, locals())

        self.state_transformers = Parallel(state_names,
                                           self.state_transformer,
                                           name="state_trans")
        if not self.sequence_transformer:
            self.sequence_transformer = MLP([Identity()], name="seq_trans")
        if not self.energy_computer:
            self.energy_computer = MLP([Identity()], name="energy_comp")
        self.children = [
            self.state_transformers, self.sequence_transformer,
            self.energy_computer
        ]
예제 #9
0
    def __init__(self, n_out, dwin, vector_size, n_hidden_layer, **kwargs):
        super(ConvPoolNlp, self).__init__(**kwargs)
        self.vector_size = vector_size
        self.n_hidden_layer = n_hidden_layer
        self.dwin = dwin
        self.n_out = n_out

        self.rectifier = Rectifier()
        """
	self.convolution = Convolutional(filter_size=(1,self.filter_size),num_filters=self.num_filter,num_channels=1,
					weights_init=IsotropicGaussian(0.01), use_bias=False)
	"""
        # second dimension is of fixed size sum(vect_size) less the fiter_size borders
        self.mlp = MLP(activations=[Rectifier()] * len(self.n_hidden_layer) +
                       [Identity()],
                       dims=[self.n_out] + self.n_hidden_layer + [2],
                       weights_init=IsotropicGaussian(0.01),
                       biases_init=Constant(0.))

        self.parameters = []
        self.children = []
        #self.children.append(self.lookup)
        #self.children.append(self.convolution)
        self.children.append(self.mlp)
        self.children.append(self.rectifier)
예제 #10
0
def build_mlp(features_car_cat, features_car_int, features_nocar_cat,
              features_nocar_int, features_cp, features_hascar, means, labels):

    features = tensor.concatenate([
        features_hascar, means['cp'][features_cp[:, 0]],
        means['dep'][features_cp[:, 1]]
    ],
                                  axis=1)

    mlp = MLP(activations=[Rectifier(), Rectifier(), None],
              dims=[5, 50, 50, 1],
              weights_init=IsotropicGaussian(.1),
              biases_init=Constant(0),
              name='mlp')
    mlp.initialize()

    prediction = mlp.apply(features)

    cost = MAPECost().apply(labels, prediction)

    cg = ComputationGraph(cost)
    input_var = VariableFilter(roles=[INPUT])(cg.variables)
    print input_var

    cg_dropout1 = apply_dropout(cg, [input_var[3], input_var[5]], .4)
    cost_dropout1 = cg_dropout1.outputs[0]

    return prediction, cost_dropout1, cg_dropout1.parameters, cost
예제 #11
0
def test_pylearn2_trainin():
    # Construct the model
    mlp = MLP(activations=[Sigmoid(), Sigmoid()],
              dims=[784, 100, 784],
              weights_init=IsotropicGaussian(),
              biases_init=Constant(0.01))
    mlp.initialize()
    cost = SquaredError()

    block_cost = BlocksCost(cost)
    block_model = BlocksModel(mlp, (VectorSpace(dim=784), 'features'))

    # Load the data
    rng = numpy.random.RandomState(14)
    train_dataset = random_dense_design_matrix(rng, 1024, 784, 10)
    valid_dataset = random_dense_design_matrix(rng, 1024, 784, 10)

    # Silence Pylearn2's logger
    logger = logging.getLogger(pylearn2.__name__)
    logger.setLevel(logging.ERROR)

    # Training algorithm
    sgd = SGD(learning_rate=0.01,
              cost=block_cost,
              batch_size=128,
              monitoring_dataset=valid_dataset)
    train = Train(train_dataset, block_model, algorithm=sgd)
    train.main_loop(time_budget=3)
예제 #12
0
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(tensor.flatten(x, outdim=2))
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train", ))
    mnist_test = MNIST(("test", ))

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             Flatten(DataStream.default_stream(
                                 mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                                     which_sources=('features', )),
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]))

    main_loop = MainLoop(algorithm,
                         Flatten(DataStream.default_stream(
                             mnist_train,
                             iteration_scheme=SequentialScheme(
                                 mnist_train.num_examples, 50)),
                                 which_sources=('features', )),
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
예제 #13
0
    def create_base_model(self, x, y, input_dim, interim_dim=30):

        # Create the output of the MLP
        mlp = MLP([Tanh(), Tanh(), Tanh()], [input_dim, 60, 60, interim_dim],
                  weights_init=IsotropicGaussian(0.001),
                  biases_init=Constant(0))
        mlp.initialize()
        inter = mlp.apply(x)

        fine_tuner = MLP([Logistic()], [interim_dim, 1],
                         weights_init=IsotropicGaussian(0.001),
                         biases_init=Constant(0))
        fine_tuner.initialize()
        probs = fine_tuner.apply(inter)
        #sq_err = BinaryCrossEntropy()
        err = T.sqr(y.flatten() - probs.flatten())
        # cost = T.mean(err * y.flatten() * (1 - self.p) + err *
        #              (1 - y.flatten()) * self.p)
        cost = T.mean(err)
        #cost = sq_err.apply(probs.flatten(), y.flatten())
        # cost = T.mean(y.flatten() * T.log(probs.flatten()) +
        #              (1 - y.flatten()) * T.log(1 - probs.flatten()))
        cost.name = 'cost'
        pred_out = probs > 0.5
        mis_cost = T.sum(T.neq(y.flatten(), pred_out.flatten()))
        mis_cost.name = 'MisclassificationRate'
        return mlp, fine_tuner, cost, mis_cost
예제 #14
0
    def create_model(self, x, y, input_dim, tol=10e-5):

        # Create the output of the MLP
        mlp = MLP(
            [Rectifier(), Rectifier(), Logistic()], [input_dim, 100, 100, 1],
            weights_init=IsotropicGaussian(0.01),
            biases_init=Constant(0))
        mlp.initialize()
        probs = mlp.apply(x)
        y = y.dimshuffle(0, 'x')
        # Create the if-else cost function
        true_p = (T.sum(y * probs) + tol) * 1.0 / (T.sum(y) + tol)
        true_n = (T.sum((1 - y) * (1 - probs)) + tol) * \
            1.0 / (T.sum(1 - y) + tol)
        #p = (T.sum(y) + tol) / (y.shape[0] + tol)
        theta = (1 - self.p) / self.p
        numerator = (1 + self.beta**2) * true_p
        denominator = self.beta**2 + theta + true_p - theta * true_n

        Fscore = numerator / denominator

        cost = -1 * Fscore
        cost.name = "cost"

        return mlp, cost, probs
예제 #15
0
    def __init__(self, config, **kwargs):
        super(Model, self).__init__(**kwargs)
        self.config = config

        self.pre_context_embedder = ContextEmbedder(
            config.pre_embedder, name='pre_context_embedder')
        self.post_context_embedder = ContextEmbedder(
            config.post_embedder, name='post_context_embedder')

        in1 = 2 + sum(x[2] for x in config.pre_embedder.dim_embeddings)
        self.input_to_rec = MLP(activations=[Tanh()],
                                dims=[in1, config.hidden_state_dim],
                                name='input_to_rec')

        self.rec = LSTM(dim=config.hidden_state_dim, name='recurrent')

        in2 = config.hidden_state_dim + sum(
            x[2] for x in config.post_embedder.dim_embeddings)
        self.rec_to_output = MLP(activations=[Tanh()],
                                 dims=[in2, 2],
                                 name='rec_to_output')

        self.sequences = ['latitude', 'latitude_mask', 'longitude']
        self.context = self.pre_context_embedder.inputs + self.post_context_embedder.inputs
        self.inputs = self.sequences + self.context
        self.children = [
            self.pre_context_embedder, self.post_context_embedder,
            self.input_to_rec, self.rec, self.rec_to_output
        ]

        self.initial_state_ = shared_floatx_zeros((config.hidden_state_dim, ),
                                                  name="initial_state")
        self.initial_cells = shared_floatx_zeros((config.hidden_state_dim, ),
                                                 name="initial_cells")
예제 #16
0
def test_pylearn2_training():
    # Construct the model
    mlp = MLP(activations=[Sigmoid(), Sigmoid()], dims=[784, 100, 784],
              weights_init=IsotropicGaussian(), biases_init=Constant(0.01))
    mlp.initialize()
    cost = SquaredError()

    # Load the data
    rng = numpy.random.RandomState(14)
    train_dataset = random_dense_design_matrix(rng, 1024, 784, 10)
    valid_dataset = random_dense_design_matrix(rng, 1024, 784, 10)

    x = tensor.matrix('features')
    block_cost = Pylearn2Cost(cost.apply(x, mlp.apply(x)))
    block_model = Pylearn2Model(mlp)

    # Silence Pylearn2's logger
    logger = logging.getLogger(pylearn2.__name__)
    logger.setLevel(logging.ERROR)

    # Training algorithm
    sgd = SGD(learning_rate=0.01, cost=block_cost, batch_size=128,
              monitoring_dataset=valid_dataset)
    train = Pylearn2Train(train_dataset, block_model, algorithm=sgd)
    train.main_loop(time_budget=3)
예제 #17
0
def build_mlp(features_car_cat, features_car_int, features_nocar_cat,
              features_nocar_int, features_cp, features_hascar, means, labels):

    prediction, _, _, _, = \
            build_mlp_onlyloc(features_car_cat, features_car_int,
                              features_nocar_cat, features_nocar_int, features_cp, features_hascar,
                              means, labels)

    mlp_crm = MLP(activations=[None],
                  dims=[1, 1],
                  weights_init=IsotropicGaussian(.1),
                  biases_init=Constant(0),
                  name='mlp_crm')
    mlp_crm.initialize()
    crm = features_nocar_int[:, 0][:, None]

    prediction = prediction * mlp_crm.apply(crm)

    cost = MAPECost().apply(labels, prediction)

    cg = ComputationGraph(cost)
    input_var = VariableFilter(roles=[INPUT])(cg.variables)
    print input_var

    cg_dropout = apply_dropout(cg, [input_var[7], input_var[5]], .4)
    cost_dropout = cg_dropout.outputs[0]

    return prediction, cost_dropout, cg_dropout.parameters, cost
예제 #18
0
파일: custom.py 프로젝트: anirudh9119/play
    def __init__(self, mlp, frame_size = 401, k = 20, const=1e-5, **kwargs):
        super(SPF0Emitter, self).__init__(**kwargs)
        self.mlp = mlp
        input_dim = self.mlp.output_dim
        self.const = const
        self.frame_size = frame_size

        mlp_gmm = GMMMLP(mlp = mlp,
                  dim = (frame_size-2)*k,
                  k = k,
                  const = const)

        self.gmm_emitter = GMMEmitter(
                  gmmmlp = mlp_gmm,
                  output_size = frame_size-2,
                  k = k,
                  name = "gmm_emitter"
                  )

        self.mu = MLP(activations=[Identity()],
                      dims=[input_dim, 1],
                      name=self.name + "_mu")
        self.sigma = MLP(activations=[SoftPlus()],
                         dims=[input_dim, 1],
                         name=self.name + "_sigma")
        self.binary = MLP(activations=[Logistic()],
                 dims=[input_dim, 1],
                 name=self.name + "_binary")

        self.children = [self.mlp, self.mu, self.sigma,
            self.binary, self.gmm_emitter]
예제 #19
0
def main(save_to, num_batches, continue_=False):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0),
              seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(cost=cost,
                        params=ComputationGraph(cost).parameters,
                        step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=([LoadFromDump(save_to)] if continue_ else []) + [
            Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)), prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Dump(save_to),
            Printing()
        ])
    main_loop.run()
    return main_loop
예제 #20
0
def build_mlp(features_int, features_cat, labels, labels_mean):

    inputs = tensor.concatenate([features_int, features_cat], axis=1)

    mlp = MLP(activations=[Rectifier(),
                           Rectifier(),
                           Rectifier(), None],
              dims=[337, 800, 1200, 1],
              weights_init=IsotropicGaussian(),
              biases_init=Constant(1))
    mlp.initialize()

    prediction = mlp.apply(inputs)
    cost = MAPECost().apply(prediction, labels, labels_mean)

    cg = ComputationGraph(cost)
    #cg_dropout0   = apply_dropout(cg, [VariableFilter(roles=[INPUT])(cg.variables)[1]], .2)
    cg_dropout1 = apply_dropout(cg, [
        VariableFilter(roles=[OUTPUT])(cg.variables)[1],
        VariableFilter(roles=[OUTPUT])(cg.variables)[3],
        VariableFilter(roles=[OUTPUT])(cg.variables)[5]
    ], .2)
    cost_dropout1 = cg_dropout1.outputs[0]

    return cost_dropout1, cg_dropout1.parameters, cost  #cost, cg.parameters, cost #
예제 #21
0
def construct_model(input_dim, output_dim):
    # Construct the model
    r = tensor.fmatrix('r')
    x = tensor.fmatrix('x')
    y = tensor.ivector('y')

    # input_dim must be nr
    mlp = MLP(activations=activation_functions,
              dims=[input_dim] + hidden_dims + [2])

    weights = mlp.apply(r)

    final = tensor.dot(x, weights)

    cost = Softmax().categorical_cross_entropy(y, final).mean()

    pred = final.argmax(axis=1)
    error_rate = tensor.neq(y, pred).mean()

    # Initialize parameters
    for brick in [mlp]:
        brick.weights_init = IsotropicGaussian(0.01)
        brick.biases_init = Constant(0.001)
        brick.initialize()

    # apply noise
    cg = ComputationGraph([cost, error_rate])
    noise_vars = VariableFilter(roles=[WEIGHT])(cg)
    apply_noise(cg, noise_vars, noise_std)
    [cost, error_rate] = cg.outputs

    return cost, error_rate
예제 #22
0
파일: sqrt.py 프로젝트: basaundi/blocks
def main(save_to, num_batches, continue_=False):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0), seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(
            cost=cost, params=ComputationGraph(cost).parameters,
            step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=([LoadFromDump(save_to)] if continue_ else []) +
        [Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)),
                prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Dump(save_to),
            Printing()])
    main_loop.run()
    return main_loop
예제 #23
0
def test_mlp_use_bias_pushed_when_not_explicitly_specified():
    mlp = MLP(activations=[Tanh(), Tanh(), None],
              dims=[4, 5, 6, 7],
              prototype=Linear(use_bias=False),
              use_bias=True)
    mlp.push_allocation_config()
    assert [lin.use_bias for lin in mlp.linear_transformations]
def build_model_mnist():

    # CNN
    filter_size = (5, 5)
    activation = Rectifier().apply
    pooling_size = (2, 2)
    num_filters = 50
    layer0 = ConvolutionalLayer(activation=activation, filter_size=filter_size, num_filters=num_filters,
                              pooling_size=pooling_size,
                              weights_init=Uniform(width=0.1),
                              biases_init=Uniform(width=0.01), name="layer_0")

    filter_size = (3, 3)
    activation = Rectifier().apply
    num_filters = 20
    layer1 = ConvolutionalLayer(activation=activation, filter_size=filter_size, num_filters=num_filters,
                              pooling_size=pooling_size,
                              weights_init=Uniform(width=0.1),
                              biases_init=Uniform(width=0.01), name="layer_1")

    conv_layers = [layer0, layer1]
    convnet = ConvolutionalSequence(conv_layers, num_channels= 1,
                                    image_size=(28, 28))

    convnet.initialize()
    output_dim = np.prod(convnet.get_dim('output'))
    mlp = MLP(activations=[Identity()], dims=[output_dim, 10],
                        weights_init=Uniform(width=0.1),
                        biases_init=Uniform(width=0.01), name="layer_2")
    mlp.initialize()

    classifier = Classifier(convnet, mlp)
    classifier.initialize()
    return classifier
 def __init__(self, attended_dim, **kwargs):
     super(GRUInitialState, self).__init__(**kwargs)
     self.attended_dim = attended_dim
     self.initial_transformer = MLP(activations=[Tanh()],
                                    dims=[attended_dim, self.dim],
                                    name='state_initializer')
     self.children.append(self.initial_transformer)
예제 #26
0
파일: rnn.py 프로젝트: JimStearns206/taxi
    def __init__(self, config, **kwargs):
        super(Model, self).__init__(**kwargs)
        self.config = config

        self.pre_context_embedder = ContextEmbedder(config.pre_embedder, name='pre_context_embedder')
        self.post_context_embedder = ContextEmbedder(config.post_embedder, name='post_context_embedder')

        in1 = 2 + sum(x[2] for x in config.pre_embedder.dim_embeddings)
        self.input_to_rec = MLP(activations=[Tanh()], dims=[in1, config.hidden_state_dim], name='input_to_rec')

        self.rec = LSTM(
                dim = config.hidden_state_dim,
                name = 'recurrent'
            )

        in2 = config.hidden_state_dim + sum(x[2] for x in config.post_embedder.dim_embeddings)
        self.rec_to_output = MLP(activations=[Tanh()], dims=[in2, 2], name='rec_to_output')

        self.sequences = ['latitude', 'latitude_mask', 'longitude']
        self.context = self.pre_context_embedder.inputs + self.post_context_embedder.inputs
        self.inputs = self.sequences + self.context
        self.children = [ self.pre_context_embedder, self.post_context_embedder, self.input_to_rec, self.rec, self.rec_to_output ]

        self.initial_state_ = shared_floatx_zeros((config.hidden_state_dim,),
                name="initial_state")
        self.initial_cells = shared_floatx_zeros((config.hidden_state_dim,),
                name="initial_cells")
예제 #27
0
class AttentionReader(Initializable):
    def __init__(self, x_dim, dec_dim, channels, height, width, N, **kwargs):
        super(AttentionReader, self).__init__(name="reader", **kwargs)

        self.img_height = height
        self.img_width = width
        self.N = N
        self.x_dim = x_dim
        self.dec_dim = dec_dim
        self.output_dim = 2*channels*N*N

        self.zoomer = ZoomableAttentionWindow(channels, height, width, N)
        self.readout = MLP(activations=[Identity()], dims=[dec_dim, 5], **kwargs)

        self.children = [self.readout]

    def get_dim(self, name):
        if name == 'input':
            return self.dec_dim
        elif name == 'x_dim':
            return self.x_dim
        elif name == 'output':
            return self.output_dim
        else:
            raise ValueError
            
    @application(inputs=['x', 'x_hat', 'h_dec'], outputs=['r'])
    def apply(self, x, x_hat, h_dec):
        l = self.readout.apply(h_dec)

        center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l)

        w     = gamma * self.zoomer.read(x    , center_y, center_x, delta, sigma)
        w_hat = gamma * self.zoomer.read(x_hat, center_y, center_x, delta, sigma)
        
        return T.concatenate([w, w_hat], axis=1)

    @application(inputs=['x', 'x_hat', 'h_dec'], outputs=['r','center_y', 'center_x', 'delta'])
    def apply_detailed(self, x, x_hat, h_dec):
        l = self.readout.apply(h_dec)

        center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l)

        w     = gamma * self.zoomer.read(x    , center_y, center_x, delta, sigma)
        w_hat = gamma * self.zoomer.read(x_hat, center_y, center_x, delta, sigma)
        
        r = T.concatenate([w, w_hat], axis=1)
        return r, center_y, center_x, delta

    @application(inputs=['x', 'h_dec'], outputs=['r','center_y', 'center_x', 'delta'])
    def apply_simple(self, x, h_dec):
        l = self.readout.apply(h_dec)

        center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l)

        r     = gamma * self.zoomer.read(x    , center_y, center_x, delta, sigma)

        return r, center_y, center_x, delta
예제 #28
0
파일: main.py 프로젝트: mohammadpz/rna
def setup_model(configs):

    tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5)
    # shape: T x B x C x X x Y
    input_ = tensor5('features')
    # shape: B x Classes
    target = T.lmatrix('targets')

    model = LSTMAttention(
        configs,
        weights_init=Glorot(),
        biases_init=Constant(0))
    model.initialize()

    (h, c, location, scale, patch, downn_sampled_input,
        conved_part_1, conved_part_2, pre_lstm) = model.apply(input_)

    classifier = MLP(
        [Rectifier(), Logistic()],
        configs['classifier_dims'],
        weights_init=Glorot(),
        biases_init=Constant(0))
    classifier.initialize()

    probabilities = classifier.apply(h[-1])
    cost = BinaryCrossEntropy().apply(target, probabilities)
    cost.name = 'CE'
    error_rate = MisclassificationRate().apply(target, probabilities)
    error_rate.name = 'ER'
    model.cost = cost

    if configs['load_pretrained']:
        blocks_model = Model(model.cost)
        all_params = blocks_model.parameters
        with open('VGG_CNN_params.npz') as f:
            loaded = np.load(f)
            all_conv_params = loaded.keys()
            for param in all_params:
                if param.name in loaded.keys():
                    assert param.get_value().shape == loaded[param.name].shape
                    param.set_value(loaded[param.name])
                    all_conv_params.pop(all_conv_params.index(param.name))
        print "the following parameters did not match: " + str(all_conv_params)

    if configs['test_model']:
        cg = ComputationGraph(model.cost)
        f = theano.function(cg.inputs, [model.cost],
                            on_unused_input='ignore',
                            allow_input_downcast=True)
        data = np.random.randn(10, 40, 3, 224, 224)
        targs = np.random.randn(40, 101)
        f(data, targs)
        print "Test passed! ;)"

    model.monitorings = [cost, error_rate]

    return model
예제 #29
0
def test_snapshot():
    x = tensor.matrix('x')
    linear = MLP([Identity(), Identity()], [10, 10, 10],
                 weights_init=Constant(1), biases_init=Constant(2))
    linear.initialize()
    y = linear.apply(x)
    cg = ComputationGraph(y)
    snapshot = cg.get_snapshot(dict(x=numpy.zeros((1, 10), dtype=floatX)))
    assert len(snapshot) == 14
예제 #30
0
def test_extract_parameter_values():
    mlp = MLP([Identity(), Identity()], [10, 20, 10])
    mlp.allocate()
    param_values = extract_parameter_values(mlp)
    assert len(param_values) == 4
    assert isinstance(param_values['/mlp/linear_0.W'], numpy.ndarray)
    assert isinstance(param_values['/mlp/linear_0.b'], numpy.ndarray)
    assert isinstance(param_values['/mlp/linear_1.W'], numpy.ndarray)
    assert isinstance(param_values['/mlp/linear_1.b'], numpy.ndarray)
class MLP_conv_dense(Initializable):
    def __init__(self, n_layers_conv, n_layers_dense_lower, n_layers_dense_upper,
        n_hidden_conv, n_hidden_dense_lower, n_hidden_dense_lower_output, n_hidden_dense_upper,
        spatial_width, n_colors, n_temporal_basis):
        """
        The multilayer perceptron, that provides temporal weighting coefficients for mu and sigma
        images. This consists of a lower segment with a convolutional MLP, and optionally with a
        dense MLP in parallel. The upper segment then consists of a per-pixel dense MLP
        (convolutional MLP with 1x1 kernel).
        """
        super(MLP_conv_dense, self).__init__()

        self.n_colors = n_colors
        self.spatial_width = spatial_width
        self.n_hidden_dense_lower = n_hidden_dense_lower
        self.n_hidden_dense_lower_output = n_hidden_dense_lower_output
        self.n_hidden_conv = n_hidden_conv

        ## the lower layers
        self.mlp_conv = MultiLayerConvolution(n_layers_conv, n_hidden_conv, spatial_width, n_colors)
        self.children = [self.mlp_conv]
        if n_hidden_dense_lower > 0 and n_layers_dense_lower > 0:
            n_input = n_colors*spatial_width**2
            n_output = n_hidden_dense_lower_output*spatial_width**2
            self.mlp_dense_lower = MLP([dense_nonlinearity] * n_layers_conv,
                [n_input] + [n_hidden_dense_lower] * (n_layers_conv-1) + [n_output],
                name='MLP dense lower', weights_init=Orthogonal(), biases_init=Constant(0))
            self.children.append(self.mlp_dense_lower)
        else:
            n_hidden_dense_lower_output = 0

        ## the upper layers (applied to each pixel independently)
        n_output = n_colors*n_temporal_basis*2 # "*2" for both mu and sigma
        self.mlp_dense_upper = MLP([dense_nonlinearity] * (n_layers_dense_upper-1) + [Identity()],
            [n_hidden_conv+n_hidden_dense_lower_output] +
            [n_hidden_dense_upper] * (n_layers_dense_upper-1) + [n_output],
            name='MLP dense upper', weights_init=Orthogonal(), biases_init=Constant(0))
        self.children.append(self.mlp_dense_upper)

    @application
    def apply(self, X):
        """
        Take in noisy input image and output temporal coefficients for mu and sigma.
        """
        Y = self.mlp_conv.apply(X)
        Y = Y.dimshuffle(0,2,3,1)
        if self.n_hidden_dense_lower > 0:
            n_images = X.shape[0]
            X = X.reshape((n_images, self.n_colors*self.spatial_width**2))
            Y_dense = self.mlp_dense_lower.apply(X)
            Y_dense = Y_dense.reshape((n_images, self.spatial_width, self.spatial_width,
                self.n_hidden_dense_lower_output))
            Y = T.concatenate([Y/T.sqrt(self.n_hidden_conv),
                Y_dense/T.sqrt(self.n_hidden_dense_lower_output)], axis=3)
        Z = self.mlp_dense_upper.apply(Y)
        return Z
예제 #32
0
 def create_model(self):
     x = self.x
     input_dim = self.input_dim
     mlp = MLP([Logistic(), Logistic(), Tanh()], [input_dim, 100, 100, 1],
               weights_init=IsotropicGaussian(0.001),
               biases_init=Constant(0))
     mlp.initialize()
     self.mlp = mlp
     probs = mlp.apply(x)
     return probs
예제 #33
0
def test_inject_parameter_values():
    mlp = MLP([Identity()], [10, 10])
    mlp.allocate()
    param_values = {
        '/mlp/linear_0.W': 2 * numpy.ones((10, 10), dtype=floatX),
        '/mlp/linear_0.b': 3 * numpy.ones(10, dtype=floatX)
    }
    inject_parameter_values(mlp, param_values)
    assert numpy.all(mlp.linear_transformations[0].params[0].get_value() == 2)
    assert numpy.all(mlp.linear_transformations[0].params[1].get_value() == 3)
def test_fully_layer():
	batch_size=2
	x = T.tensor4();
	y = T.ivector()
	V = 200
	layer_conv = Convolutional(filter_size=(5,5),num_filters=V,
				name="toto",
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	# try with no bias
	activation = Rectifier()
	pool = MaxPooling(pooling_size=(2,2))

	convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15,
					image_size=(10,10),
					name="conv_section")
	convnet.push_allocation_config()
	convnet.initialize()
	output=convnet.apply(x)
	batch_size=output.shape[0]
	output_dim=np.prod(convnet.get_dim('output'))
	result_conv = output.reshape((batch_size, output_dim))
	mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10],
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	mlp.initialize()
	output=mlp.apply(result_conv)
	cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output))
	cg = ComputationGraph(cost)
	W = VariableFilter(roles=[WEIGHT])(cg.variables)
	B = VariableFilter(roles=[BIAS])(cg.variables)
	W = W[0]; b = B[0]

	inputs_fully = VariableFilter(roles=[INPUT], bricks=[Linear])(cg)
	outputs_fully = VariableFilter(roles=[OUTPUT], bricks=[Linear])(cg)
	var_input=inputs_fully[0]
	var_output=outputs_fully[0]
	
	[d_W,d_S,d_b] = T.grad(cost, [W, var_output, b])

	d_b = d_b.dimshuffle(('x',0))
	d_p = T.concatenate([d_W, d_b], axis=0)
	x_value = 1e3*np.random.ranf((2,15, 10, 10))
	f = theano.function([x,y], [var_input, d_S, d_p], allow_input_downcast=True, on_unused_input='ignore')
	A, B, C= f(x_value, [5, 0])
	A = np.concatenate([A, np.ones((2,1))], axis=1)
	print 'A', A.shape
	print 'B', B.shape
	print 'C', C.shape

	print lin.norm(C - np.dot(np.transpose(A), B), 'fro')

	return
	
	"""
예제 #35
0
    def create_model(self):
        input_dim = self.input_dim
        x = self.x
        y = self.y
        p = self.p
        mask = self.mask
        hidden_dim = self.hidden_dim
        embedding_dim = self.embedding_dim
        lookup = LookupTable(self.dict_size,
                             embedding_dim,
                             weights_init=IsotropicGaussian(0.001),
                             name='LookupTable')
        x_to_h = Linear(embedding_dim,
                        hidden_dim * 4,
                        name='x_to_h',
                        weights_init=IsotropicGaussian(0.001),
                        biases_init=Constant(0.0))
        lstm = LSTM(hidden_dim,
                    name='lstm',
                    weights_init=IsotropicGaussian(0.001),
                    biases_init=Constant(0.0))
        h_to_o = MLP([Logistic()], [hidden_dim, 1],
                     weights_init=IsotropicGaussian(0.001),
                     biases_init=Constant(0),
                     name='h_to_o')

        lookup.initialize()
        x_to_h.initialize()
        lstm.initialize()
        h_to_o.initialize()

        embed = lookup.apply(x).reshape(
            (x.shape[0], x.shape[1], self.embedding_dim))
        embed.name = "embed_vec"
        x_transform = x_to_h.apply(embed.transpose(1, 0, 2))
        x_transform.name = "Transformed X"
        self.lookup = lookup
        self.x_to_h = x_to_h
        self.lstm = lstm
        self.h_to_o = h_to_o

        #if mask is None:
        h, c = lstm.apply(x_transform)
        #else:
        #h, c = lstm.apply(x_transform, mask=mask)
        h.name = "hidden_state"
        c.name = "cell state"
        # only values of hidden units of the last timeframe are used for
        # the classification
        indices = T.sum(mask, axis=0) - 1
        rel_hid = h[indices, T.arange(h.shape[1])]
        out = self.h_to_o.apply(rel_hid)

        probs = out
        return probs
예제 #36
0
def main(save_to, num_epochs, bokeh=False):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(x)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1 ** 2).sum() + .00005 * (W2 ** 2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST("train")
    mnist_test = MNIST("test")

    algorithm = GradientDescent(
        cost=cost, params=cg.parameters,
        step_rule=Scale(learning_rate=0.1))
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs),
                  DataStreamMonitoring(
                      [cost, error_rate],
                      DataStream(mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                      prefix="test"),
                  TrainingDataMonitoring(
                      [cost, error_rate,
                       aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  Printing()]

    if bokeh:
        extensions.append(Plot(
            'MNIST example',
            channels=[
                ['test_final_cost',
                 'test_misclassificationrate_apply_error_rate'],
                ['train_total_gradient_norm']]))

    main_loop = MainLoop(
        algorithm,
        DataStream(mnist_train,
                   iteration_scheme=SequentialScheme(
                       mnist_train.num_examples, 50)),
        model=Model(cost),
        extensions=extensions)

    main_loop.run()
예제 #37
0
파일: model.py 프로젝트: v-mipeng/Hashtag
    def _build_bricks(self, *args, **kwargs):
        # Build lookup tables
        self.word_embed = self._embed(len(self.dataset.word2index),
                                      self.config.word_embed_dim,
                                      name='word_embed')

        self.hashtag_embed = self._embed(len(self.dataset.hashtag2index),
                                         self.config.lstm_dim,
                                         name='hashtag_embed')
        # Build text encoder
        self.mlstm_ins = Linear(input_dim=self.config.word_embed_dim,
                                output_dim=4 * self.config.lstm_dim,
                                name='mlstm_in')
        self.mlstm_ins.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) /
            numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim))
        self.mlstm_ins.biases_init = Constant(0)
        self.mlstm_ins.initialize()
        self.mlstm = MLSTM(self.config.lstm_time,
                           self.config.lstm_dim,
                           shared=False)
        self.mlstm.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) /
            numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim))
        self.mlstm.biases_init = Constant(0)
        self.mlstm.initialize()
        self.hashtag2word = MLP(
            activations=[Tanh('hashtag2word_tanh')],
            dims=[self.config.lstm_dim, self.config.word_embed_dim],
            name='hashtag2word_mlp')
        self.hashtag2word.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.hashtag2word.biases_init = Constant(0)
        self.hashtag2word.initialize()
        self.hashtag2word_bias = Bias(dim=1, name='hashtag2word_bias')
        self.hashtag2word_bias.biases_init = Constant(0)
        self.hashtag2word_bias.initialize()
        #Build character embedding
        self.char_embed = self._embed(len(self.dataset.char2index),
                                      self.config.char_embed_dim,
                                      name='char_embed')
        # Build sparse word encoder
        self.rnn_ins = Linear(input_dim=self.config.char_embed_dim,
                              output_dim=self.config.word_embed_dim,
                              name='rnn_in')
        self.rnn_ins.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) / numpy.sqrt(self.config.char_embed_dim +
                                           self.config.word_embed_dim))
        self.rnn_ins.biases_init = Constant(0)
        self.rnn_ins.initialize()
        self.rnn = SimpleRecurrent(dim=self.config.word_embed_dim,
                                   activation=Tanh())
        self.rnn.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.rnn.initialize()
예제 #38
0
    def __init__(self, input_dim, hidden_dim, **kwargs):
        super(VariationalAutoEncoder, self).__init__(**kwargs)

        encoder_mlp = MLP([Sigmoid(), Identity()],
                          [input_dim, 101, None])
        decoder_mlp = MLP([Sigmoid(), Sigmoid()],
                          [hidden_dim, 101, input_dim])
        self.hidden_dim = hidden_dim
        self.encoder = VAEEncoder(encoder_mlp, hidden_dim)
        self.decoder = VAEDecoder(decoder_mlp)
        self.children = [self.encoder, self.decoder]
예제 #39
0
def test_serialization():
    # Create a simple brick with two parameters
    mlp = MLP(activations=[None, None], dims=[10, 10, 10],
              weights_init=Constant(1.), use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)

    # Check the data using numpy.load
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear_0.W', 'mlp-linear_1.W', 'pkl'])
    assert_allclose(numpy_data['mlp-linear_0.W'], numpy.ones((10, 10)))
    assert numpy_data['mlp-linear_0.W'].dtype == theano.config.floatX

    # Ensure that it can be unpickled
    mlp = load(f.name)
    assert_allclose(mlp.linear_transformations[1].W.get_value(),
                    numpy.ones((10, 10)) * 2)

    # Ensure that only parameters are saved as NPY files
    mlp.random_data = numpy.random.rand(10)
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear_0.W', 'mlp-linear_1.W', 'pkl'])

    # Ensure that parameters can be loaded with correct names
    parameter_values = load_parameter_values(f.name)
    assert set(parameter_values.keys()) == \
        set(['/mlp/linear_0.W', '/mlp/linear_1.W'])

    # Ensure that duplicate names are dealt with
    for child in mlp.children:
        child.name = 'linear'
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear.W', 'mlp-linear.W_2', 'pkl'])

    # Ensure warnings are raised when __main__ namespace objects are dumped
    foo.__module__ = '__main__'
    import __main__
    __main__.__dict__['foo'] = foo
    mlp.foo = foo
    with NamedTemporaryFile(delete=False) as f:
        with warnings.catch_warnings(record=True) as w:
            dump(mlp, f)
            assert len(w) == 1
            assert '__main__' in str(w[-1].message)
예제 #40
0
    def __init__(self, emb_dim, dim, dropout=0.0,
            def_word_gating="none",
            dropout_type="per_unit", compose_type="sum",
            word_dropout_weighting="no_weighting",
            shortcut_unk_and_excluded=False,
            num_input_words=-1, exclude_top_k=-1, vocab=None,
            **kwargs):

        self._dropout = dropout
        self._num_input_words = num_input_words
        self._exclude_top_K = exclude_top_k
        self._dropout_type = dropout_type
        self._compose_type = compose_type
        self._vocab = vocab
        self._shortcut_unk_and_excluded = shortcut_unk_and_excluded
        self._word_dropout_weighting = word_dropout_weighting
        self._def_word_gating = def_word_gating

        if def_word_gating not in {"none", "self_attention"}:
            raise NotImplementedError()

        if word_dropout_weighting not in {"no_weighting"}:
            raise NotImplementedError("Not implemented " + word_dropout_weighting)

        if dropout_type not in {"per_unit", "per_example", "per_word"}:
            raise NotImplementedError()

        children = []

        if self._def_word_gating=="self_attention":
            self._gate_mlp = Linear(dim, dim)
            self._gate_act = Logistic()
            children.extend([self._gate_mlp, self._gate_act])

        if compose_type == 'fully_connected_linear':
            self._def_state_compose = MLP(activations=[None],
                dims=[emb_dim + dim, emb_dim])
            children.append(self._def_state_compose)
        if compose_type == "gated_sum" or compose_type == "gated_transform_and_sum":
            if dropout_type == "per_word" or dropout_type == "per_example":
                raise RuntimeError("I dont think this combination makes much sense")

            self._compose_gate_mlp = Linear(dim + emb_dim, emb_dim,
                                            name='gate_linear')
            self._compose_gate_act = Logistic()
            children.extend([self._compose_gate_mlp, self._compose_gate_act])
        if compose_type == 'sum':
            if not emb_dim == dim:
                raise ValueError("Embedding has different dim! Cannot use compose_type='sum'")
        if compose_type == 'transform_and_sum' or compose_type == "gated_transform_and_sum":
            self._def_state_transform = Linear(dim, emb_dim, name='state_transform')
            children.append(self._def_state_transform)

        super(MeanPoolCombiner, self).__init__(children=children, **kwargs)
예제 #41
0
 def __init__(self,
              representation_dim,
              representation_name='initial_state_representation',
              **kwargs):
     super(GRUSpecialInitialState, self).__init__(**kwargs)
     self.representation_dim = representation_dim
     self.representation_name = representation_name
     self.initial_transformer = MLP(activations=[Tanh()],
                                    dims=[representation_dim, self.dim],
                                    name='state_initializer')
     self.children.append(self.initial_transformer)
예제 #42
0
    def __init__(self, attended_dim, context_dim, **kwargs):
        super(GRUInitialStateWithInitialStateConcatContext,
              self).__init__(**kwargs)
        self.attended_dim = attended_dim
        self.context_dim = context_dim

        self.initial_transformer = MLP(
            activations=[Tanh(), Tanh(), Tanh()],
            dims=[attended_dim + context_dim, 1000, 500, self.dim],
            name='state_initializer')
        self.children.append(self.initial_transformer)
예제 #43
0
 def build_model(self, hidden_dim):
     board_input = T.vector('input')
     mlp = MLP(activations=[LeakyRectifier(0.1), LeakyRectifier(0.1)],
               dims=[9, hidden_dim,  9],
               weights_init=IsotropicGaussian(0.00001),
               biases_init=Constant(0.01))
     output = mlp.apply(board_input)
     masked_output = Softmax().apply(output * T.eq(board_input, 0) * 1000)
     mlp.initialize()
     cost, chosen = self.get_cost(masked_output)
     return board_input, mlp, cost, chosen, output
예제 #44
0
파일: util.py 프로젝트: xlhdh/sp2016.11-731
    def __init__(self, attended_dim, **kwargs):
        super(LSTM2GO, self).__init__(**kwargs)
        self.attended_dim = attended_dim
        self.initial_transformer_s = MLP(activations=[Tanh()],
                                       dims=[attended_dim, self.dim],
                                       name='state_initializer')
        self.children.append(self.initial_transformer_s)

        self.initial_transformer_c = MLP(activations=[Tanh()],
                                       dims=[attended_dim, self.dim],
                                       name='cell_initializer')
        self.children.append(self.initial_transformer_c)
예제 #45
0
def prior_network(x, n_input, hu_encoder, n_latent):
    logger.info('In prior_network: n_input: %d, hu_encoder: %d', n_input, hu_encoder)
    mlp1 = MLP(activations=[Rectifier()], dims=[n_input, hu_encoder], name='prior_in_to_hidEncoder')
    initialize([mlp1])
    h_encoder = mlp1.apply(x)
    h_encoder = debug_print(h_encoder, 'h_encoder', False)
    lin1 = Linear(name='prior_hiddEncoder_to_latent_mu', input_dim=hu_encoder, output_dim=n_latent)
    lin2 = Linear(name='prior_hiddEncoder_to_latent_sigma', input_dim=hu_encoder, output_dim=n_latent)
    initialize([lin1])
    initialize([lin2], rndstd=0.001)
    mu = lin1.apply(h_encoder)
    log_sigma = lin2.apply(h_encoder)
    return mu, log_sigma
예제 #46
0
 def apply(self, input_, target):
     mlp = MLP(self.non_lins, self.dims,
               weights_init=IsotropicGaussian(0.01),
               biases_init=Constant(0),
               name=self.name)
     mlp.initialize()
     probs = mlp.apply(T.flatten(input_, outdim=2))
     probs.name = 'probs'
     cost = CategoricalCrossEntropy().apply(target.flatten(), probs)
     cost.name = "CE"
     self.outputs = {}
     self.outputs['probs'] = probs
     self.outputs['cost'] = cost
예제 #47
0
def test_serialization():

    # Create a simple MLP to dump.
    mlp = MLP(activations=[None, None], dims=[10, 10, 10],
              weights_init=Constant(1.), use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)

    # Ensure warnings are raised when __main__ namespace objects are dumped.
    foo.__module__ = '__main__'
    import __main__
    __main__.__dict__['foo'] = foo
    mlp.foo = foo
    with NamedTemporaryFile(delete=False) as f:
        with warnings.catch_warnings(record=True) as w:
            dump(mlp.foo, f)
            assert len(w) == 1
            assert '__main__' in str(w[-1].message)

    # Check the parameters.
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear_0.W', '/mlp/linear_1.W'])
    assert_allclose(numpy_data['/mlp/linear_0.W'], numpy.ones((10, 10)))
    assert numpy_data['/mlp/linear_0.W'].dtype == theano.config.floatX

    # Ensure that it can be unpickled.
    with open(f.name, 'rb') as ff:
        mlp = load(ff)
    assert_allclose(mlp.linear_transformations[1].W.get_value(),
                    numpy.ones((10, 10)) * 2)

    # Ensure that duplicate names are dealt with.
    for child in mlp.children:
        child.name = 'linear'
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear.W', '/mlp/linear.W_2'])

    # Check when we don't dump the main object.
    with NamedTemporaryFile(delete=False) as f:
        dump(None, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with tarfile.open(f.name, 'r') as tarball:
        assert set(tarball.getnames()) == set(['_parameters'])
예제 #48
0
def construct_model(input_dim, output_dim):
    # Construct the model
    r = tensor.fmatrix('r')
    x = tensor.fmatrix('x')
    y = tensor.ivector('y')

    nx = x.shape[0]
    nj = x.shape[1]  # also is r.shape[0]
    nr = r.shape[1]

    # r is nj x nr
    # x is nx x nj
    # y is nx x 1

    # r_rep is nx x nj x nr
    r_rep = r[None, :, :].repeat(axis=0, repeats=nx)
    # x3 is nx x nj x 1
    x3 = x[:, :, None]

    # concat is nx x nj x (nr + 1)
    concat = tensor.concatenate([r_rep, x3], axis=2)
    mlp_input = concat.reshape((nx * nj, nr + 1))

    # input_dim must be nr
    mlp = MLP(activations=activation_functions,
              dims=[input_dim+1] + hidden_dims + [output_dim])

    activations = mlp.apply(mlp_input)

    act_sh = activations.reshape((nx, nj, output_dim))
    final = act_sh.mean(axis=1)

    cost = Softmax().categorical_cross_entropy(y, final).mean()

    pred = final.argmax(axis=1)
    error_rate = tensor.neq(y, pred).mean()

    # Initialize parameters
    for brick in [mlp]:
        brick.weights_init = IsotropicGaussian(0.01)
        brick.biases_init = Constant(0.001)
        brick.initialize()

    # apply noise
    cg = ComputationGraph([cost, error_rate])
    noise_vars = VariableFilter(roles=[WEIGHT])(cg)
    apply_noise(cg, noise_vars, noise_std)
    [cost_reg, error_rate_reg] = cg.outputs

    return cost_reg, error_rate_reg, cost, error_rate
예제 #49
0
파일: attention.py 프로젝트: jych/blocks
    def __init__(self, state_names, state_dims, sequence_dim, match_dim,
                 state_transformer=None, sequence_transformer=None,
                 energy_computer=None, weights_init=None, biases_init=None,
                 **kwargs):
        super(SequenceContentAttention, self).__init__(**kwargs)
        update_instance(self, locals())

        self.state_transformers = Parallel(state_names, self.state_transformer,
                                           name="state_trans")
        if not self.sequence_transformer:
            self.sequence_transformer = MLP([Identity()], name="seq_trans")
        if not self.energy_computer:
            self.energy_computer = MLP([Identity()], name="energy_comp")
        self.children = [self.state_transformers, self.sequence_transformer,
                         self.energy_computer]
예제 #50
0
    def __init__(self, config, **kwargs):
        super(Model, self).__init__(config, **kwargs)
        
        self.dest_mlp = MLP(activations=[Rectifier() for _ in config.dim_hidden_dest] + [Softmax()],
                       dims=[config.dim_hidden[-1]] + config.dim_hidden_dest + [config.dim_output_dest],
                       name='dest_mlp')
        self.time_mlp = MLP(activations=[Rectifier() for _ in config.dim_hidden_time] + [Softmax()],
                       dims=[config.dim_hidden[-1]] + config.dim_hidden_time + [config.dim_output_time],
                       name='time_mlp')

        self.dest_classes = theano.shared(numpy.array(config.dest_tgtcls, dtype=theano.config.floatX), name='dest_classes')
        self.time_classes = theano.shared(numpy.array(config.time_tgtcls, dtype=theano.config.floatX), name='time_classes')

        self.inputs.append('input_time')
        self.children.extend([self.dest_mlp, self.time_mlp])
def create_model():
    """Create the deep autoencoder model with Blocks, and load MNIST."""
    mlp = MLP(activations=[Logistic(), Logistic(), Logistic(), None,
                           Logistic(), Logistic(), Logistic(), Logistic()],
              dims=[784, 1000, 500, 250, 30, 250, 500, 1000, 784],
              weights_init=Sparse(15, IsotropicGaussian()),
              biases_init=Constant(0))
    mlp.initialize()

    x = tensor.matrix('features')
    x_hat = mlp.apply(tensor.flatten(x, outdim=2))
    squared_err = SquaredError().apply(tensor.flatten(x, outdim=2), x_hat)
    cost = BinaryCrossEntropy().apply(tensor.flatten(x, outdim=2), x_hat)

    return x, cost, squared_err
예제 #52
0
def setup_model():
    # shape: T x B x F
    input_ = T.tensor3('features')
    # shape: B
    target = T.lvector('targets')
    model = LSTMAttention(dim=256,
                          mlp_hidden_dims=[256, 4],
                          batch_size=100,
                          image_shape=(64, 64),
                          patch_shape=(16, 16),
                          weights_init=Glorot(),
                          biases_init=Constant(0))
    model.initialize()
    h, c, location, scale = model.apply(input_)
    classifier = MLP([Rectifier(), Softmax()], [256 * 2, 200, 10],
                     weights_init=Glorot(),
                     biases_init=Constant(0))
    model.h = h
    model.c = c
    model.location = location
    model.scale = scale
    classifier.initialize()

    probabilities = classifier.apply(T.concatenate([h[-1], c[-1]], axis=1))
    cost = CategoricalCrossEntropy().apply(target, probabilities)
    error_rate = MisclassificationRate().apply(target, probabilities)
    model.cost = cost

    location_x_0_avg = T.mean(location[0, :, 0])
    location_x_0_avg.name = 'location_x_0_avg'
    location_x_10_avg = T.mean(location[10, :, 0])
    location_x_10_avg.name = 'location_x_10_avg'
    location_x_20_avg = T.mean(location[-1, :, 0])
    location_x_20_avg.name = 'location_x_20_avg'

    scale_x_0_avg = T.mean(scale[0, :, 0])
    scale_x_0_avg.name = 'scale_x_0_avg'
    scale_x_10_avg = T.mean(scale[10, :, 0])
    scale_x_10_avg.name = 'scale_x_10_avg'
    scale_x_20_avg = T.mean(scale[-1, :, 0])
    scale_x_20_avg.name = 'scale_x_20_avg'

    monitorings = [error_rate,
                   location_x_0_avg, location_x_10_avg, location_x_20_avg,
                   scale_x_0_avg, scale_x_10_avg, scale_x_20_avg]
    model.monitorings = monitorings

    return model
    def __init__(
        self,
        input_dim,
        h0_dim,
        s0_dim,
        h1_dim,
        output_dim,
    ):
        super(SeqToSeqLSTM, self).__init__()
        self.h0__input = MLP(
            [Tanh()],
            dims=[
                input_dim,
                h0_dim
            ],
            weights_init=init.IsotropicGaussian(0.01),
            biases_init=init.IsotropicGaussian(0.3),
            name='MLP:h0__input'
        )
        self.s0__h0_input = LSTMLayer(
            input_dim=h0_dim + input_dim,
            state_dim=s0_dim,
            name='LSTMLayer:s0__h0_input'
        )

        self.h1__s0_h0_input = MLP(
            [Tanh()],
            dims=[
                s0_dim + h0_dim + input_dim,
                h1_dim
            ],
            weights_init=init.IsotropicGaussian(0.01),
            biases_init=init.Constant(0.0),
            name='MLP:h1__s0_h0_input'
        )
        self.output__h1_s0_h0_input = Linear(
            input_dim=h1_dim + s0_dim + h0_dim + input_dim,
            output_dim=output_dim,
            weights_init=init.IsotropicGaussian(0.01),
            biases_init=init.Constant(0.0),
            name='Linear:output__h1_s0_h0_input'
        )
        self.children = [
            self.h0__input,
            self.s0__h0_input,
            self.h1__s0_h0_input,
            self.output__h1_s0_h0_input
        ]
예제 #54
0
class DGSRNN(BaseRecurrent, Initializable):
    def __init__(self, input_dim, state_dim, act, transition_h, tr_h_activations, **kwargs):
        super(DGSRNN, self).__init__(**kwargs)

        self.input_dim = input_dim
        self.state_dim = state_dim

        logistic = Logistic()

        self.inter = MLP(dims=[input_dim + state_dim] + transition_h,
                         activations=tr_h_activations,
                         name='inter')
        self.reset = MLP(dims=[transition_h[-1], state_dim],
                         activations=[logistic],
                         name='reset')
        self.update = MLP(dims=[transition_h[-1], state_dim],
                          activations=[act],
                          name='update')

        self.children = [self.inter, self.reset, self.update, logistic, act] + tr_h_activations

        # init state
        self.params = [shared_floatx_zeros((state_dim,), name='init_state')]
        add_role(self.params[0], INITIAL_STATE)

    def get_dim(self, name):
        if name == 'state':
            return self.state_dim
        return super(GFGRU, self).get_dim(name)

    @recurrent(sequences=['inputs', 'drop_updates_mask'], states=['state'],
               outputs=['state', 'reset'], contexts=[])
    def apply(self, inputs=None, drop_updates_mask=None, state=None):
        inter_v = self.inter.apply(tensor.concatenate([inputs, state], axis=1))
        reset_v = self.reset.apply(inter_v)
        update_v = self.update.apply(inter_v)

        reset_v = reset_v * drop_updates_mask

        new_state = state * (1 - reset_v) + reset_v * update_v

        return new_state, reset_v

    @application
    def initial_state(self, state_name, batch_size, *args, **kwargs):
        return tensor.repeat(self.params[0][None, :],
                             repeats=batch_size,
                             axis=0)
예제 #55
0
def setupNN(NNParam):
    NNWidth = NNParam['NNWidth']
    WeightStdDev = NNParam['WeightStdDev']
    L2Weight = NNParam['L2Weight']
    DropOutProb = NNParam['DropOutProb']
    InitialLearningRate = NNParam['InitialLearningRate']
    x = theano.tensor.concatenate([x0, x1, x2, x3], axis=1)
    mlp = MLP(activations=[Rectifier(), Rectifier(), Rectifier(), Rectifier(), Rectifier()], dims=[69*4, NNWidth, NNWidth, NNWidth, NNWidth, 100],
           weights_init=IsotropicGaussian(WeightStdDev),
           biases_init=Constant(0))

    x_forward = mlp.apply(x)
    mlp_sm = MLP(activations=[None], dims=[100, 39],
           weights_init=IsotropicGaussian(WeightStdDev),
           biases_init=Constant(0))
    y_hat_b = Softmax().apply(mlp_sm.apply(x_forward))
    mlp.initialize()
    mlp_sm.initialize()
    cg = blocks.graph.ComputationGraph(y_hat_b)
    parameters = list()
    for p in cg.parameters:
        parameters.append(p)
    weights = VariableFilter(roles=[blocks.roles.WEIGHT])(cg.variables)
    cg_dropout = blocks.graph.apply_dropout(cg,[weights[3]] , DropOutProb)
    y_hat_b_do = cg_dropout.outputs[0]
    pred_b = theano.tensor.argmax(cg.outputs[0],axis=1)
    err_b = theano.tensor.mean(theano.tensor.eq(pred_b,y_b))
    cW = 0
    for W in weights:
        cW += (W**2).sum()
    cost = theano.tensor.mean(theano.tensor.nnet.categorical_crossentropy(y_hat_b_do, y_b))  + cW*L2Weight


    Learning_Rate_Decay = numpy.float32(0.98)
    learning_rate_theano = theano.shared(numpy.float32(InitialLearningRate), name='learning_rate')

    learning_rate_update = theano.function(inputs=[],outputs=learning_rate_theano,updates=[(learning_rate_theano,learning_rate_theano*Learning_Rate_Decay)])
    update_proc = momentum_sgd(cost,parameters,0.8, learning_rate_theano)

    #train
    training_proc = theano.function(
        	inputs=[shuffIdx], outputs=cost, updates=update_proc,
        	givens={x0:tX[theano.tensor.flatten(shuffIdx[:,0])],
                x1:tX[theano.tensor.flatten(shuffIdx[:,1])],
                x2:tX[theano.tensor.flatten(shuffIdx[:,2])],
                x3:tX[theano.tensor.flatten(shuffIdx[:,3])],
                y_b:tYb[theano.tensor.flatten(shuffIdx[:,1])]}) 
    #test
    test_on_testing_proc = theano.function(
        	inputs=[shuffIdx], outputs=[err_b], 
        	givens={x0:vX[shuffIdx[:,0]],x1:vX[shuffIdx[:,1]],x2:vX[shuffIdx[:,2]],x3:vX[shuffIdx[:,3]],y_b:vYb[shuffIdx[:,1]]}) 
       
    test_on_training_proc = theano.function(
        	inputs=[shuffIdx], outputs=[err_b], 
        	givens={x0:tX[shuffIdx[:,0]],x1:tX[shuffIdx[:,1]],x2:tX[shuffIdx[:,2]],x3:tX[shuffIdx[:,3]],y_b:tYb[shuffIdx[:,1]]}) 

    forward_proc = theano.function(inputs=[x0,x1,x2,x3],outputs=[x_forward])
    return (learning_rate_update, training_proc, test_on_testing_proc,test_on_training_proc,forward_proc)
예제 #56
0
class topicalq_transformer(Initializable):

    def __init__(self, vocab_size, topical_embedding_dim, state_dim,word_num,batch_size,
                 **kwargs):
        super(topicalq_transformer, self).__init__(**kwargs)
        self.vocab_size = vocab_size;
        self.word_embedding_dim = topical_embedding_dim;
        self.state_dim = state_dim;
        self.word_num=word_num;
        self.batch_size=batch_size;
        self.look_up=LookupTable(name='topical_embeddings');
        self.transformer=MLP(activations=[Tanh()],
                                dims=[self.word_embedding_dim*self.word_num, self.state_dim],
                                name='topical_transformer');
        self.children = [self.look_up,self.transformer];

    def _push_allocation_config(self):
        self.look_up.length = self.vocab_size
        self.look_up.dim = self.word_embedding_dim


    # do we have to push_config? remain unsure
    @application(inputs=['source_topical_word_sequence'],
                 outputs=['topical_embedding'])
    def apply(self, source_topical_word_sequence):
        # Time as first dimension
        source_topical_word_sequence=source_topical_word_sequence.T;
        word_topical_embeddings = self.look_up.apply(source_topical_word_sequence);
        word_topical_embeddings=word_topical_embeddings.swapaxes(0,1);
        #requires testing
        concatenated_topical_embeddings=tensor.reshape(word_topical_embeddings,[word_topical_embeddings.shape[0],word_topical_embeddings.shape[1]*word_topical_embeddings.shape[2]]);
        topical_embedding=self.transformer.apply(concatenated_topical_embeddings);
        return topical_embedding
예제 #57
0
파일: emitters.py 프로젝트: yingzha/tsa-rnn
class SingleSoftmax(Initializable):
    def __init__(self, hidden_dim, n_classes, **kwargs):
        super(SingleSoftmax, self).__init__(**kwargs)

        self.hidden_dim = hidden_dim
        self.n_classes = n_classes

        self.mlp = MLP(activations=[Rectifier(), Softmax()],
                       dims=[hidden_dim, hidden_dim/2, self.n_classes],
                       weights_init=Orthogonal(),
                       biases_init=Constant(0))
        self.softmax = Softmax()

        self.children = [self.mlp, self.softmax]

    # some day: @application(...) def feedback(self, h)

    @application(inputs=['cs', 'y'], outputs=['cost'])
    def cost(self, cs, y, n_patches):
        energies = [self.mlp.apply(cs[:, t, :])
                    for t in xrange(n_patches)]
        cross_entropies = [self.softmax.categorical_cross_entropy(y.flatten(), energy)
                           for energy in energies]
        error_rates = [T.neq(y, energy.argmax(axis=1)).mean(axis=0)
                       for energy in energies]
        # train on final prediction
        cost = util.named(cross_entropies[-1], "cost")
        # monitor final prediction
        self.add_auxiliary_variable(cross_entropies[-1], name="cross_entropy")
        self.add_auxiliary_variable(error_rates[-1], name="error_rate")
        return cost
예제 #58
0
 def __init__(self, attended_dim, **kwargs):
     super(GRUInitialState, self).__init__(**kwargs)
     self.attended_dim = attended_dim
     self.initial_transformer = MLP(activations=[Tanh()],
                                    dims=[attended_dim, self.dim],
                                    name='state_initializer')
     self.children.append(self.initial_transformer)