Beispiel #1
0
 def lllistool(i, inp, func):
     if func == LSTM:
         NUMS[i+1] *= 4
     sdim = DIMS[i]
     if func == SimpleRecurrent or func == LSTM:
         sdim = DIMS[i] + DIMS[i+1]
     l = Linear(input_dim=DIMS[i], output_dim=DIMS[i+1] * NUMS[i+1], 
                weights_init=IsotropicGaussian(std=sdim**(-0.5)), 
                biases_init=IsotropicGaussian(std=sdim**(-0.5)),
                name='Lin{}'.format(i))
     l.initialize()
     if func == SimpleRecurrent:
         gong = func(dim=DIMS[i+1], activation=Rectifier(), weights_init=IsotropicGaussian(std=sdim**(-0.5)))
         gong.initialize()
         ret = gong.apply(l.apply(inp))
     elif func == LSTM:
         gong = func(dim=DIMS[i+1], activation=Tanh(), weights_init=IsotropicGaussian(std=sdim**(-0.5)))
         gong.initialize()
         print(inp)
         ret, _ = gong.apply(
             l.apply(inp), 
             T.zeros((inp.shape[1], DIMS[i+1])),
             T.zeros((inp.shape[1], DIMS[i+1])),
         )
     elif func == SequenceGenerator:
         gong = func(
             readout=None, 
             transition=SimpleRecurrent(dim=100, activation=Rectifier(), weights_init=IsotropicGaussian(std=0.1)))
         ret = None
     elif func == None:
         ret = l.apply(inp)
     else:
         gong = func()
         ret = gong.apply(l.apply(inp))
     return ret
Beispiel #2
0
    def __init__(self, n_out, dwin, vector_size, n_hidden_layer, **kwargs):
        super(ConvPoolNlp, self).__init__(**kwargs)
        self.vector_size = vector_size
        self.n_hidden_layer = n_hidden_layer
        self.dwin = dwin
        self.n_out = n_out

        self.rectifier = Rectifier()
        """
	self.convolution = Convolutional(filter_size=(1,self.filter_size),num_filters=self.num_filter,num_channels=1,
					weights_init=IsotropicGaussian(0.01), use_bias=False)
	"""
        # second dimension is of fixed size sum(vect_size) less the fiter_size borders
        self.mlp = MLP(activations=[Rectifier()] * len(self.n_hidden_layer) +
                       [Identity()],
                       dims=[self.n_out] + self.n_hidden_layer + [2],
                       weights_init=IsotropicGaussian(0.01),
                       biases_init=Constant(0.))

        self.parameters = []
        self.children = []
        #self.children.append(self.lookup)
        #self.children.append(self.convolution)
        self.children.append(self.mlp)
        self.children.append(self.rectifier)
Beispiel #3
0
def create_kim_cnn(layer0_input, embedding_size, input_len, config, pref):
    '''
        One layer convolution with different filter-sizes and maxpooling
    '''
    filter_width_list = [
        int(fw) for fw in config[pref + '_filterwidth'].split()
    ]
    print filter_width_list
    num_filters = int(config[pref + '_num_filters'])
    #num_filters /= len(filter_width_list)
    totfilters = 0
    for i, fw in enumerate(filter_width_list):
        num_feature_map = input_len - fw + 1  #39
        conv = Convolutional(image_size=(input_len, embedding_size),
                             filter_size=(fw, embedding_size),
                             num_filters=min(int(config[pref + '_maxfilter']),
                                             num_filters * fw),
                             num_channels=1)
        totfilters += conv.num_filters
        initialize2(conv, num_feature_map)
        conv.name = pref + 'conv_' + str(fw)
        convout = conv.apply(layer0_input)
        pool_layer = MaxPooling(pooling_size=(num_feature_map, 1))
        pool_layer.name = pref + 'pool_' + str(fw)
        act = Rectifier()
        act.name = pref + 'act_' + str(fw)
        outpool = act.apply(pool_layer.apply(convout)).flatten(2)
        if i == 0:
            outpools = outpool
        else:
            outpools = T.concatenate([outpools, outpool], axis=1)
    name_rep_len = totfilters
    return outpools, name_rep_len
Beispiel #4
0
def test_convolutional_sequence_tied_biases_pushed_if_explicitly_set():
    cnn = ConvolutionalSequence(sum([[
        Convolutional(filter_size=(1, 1), num_filters=1, tied_biases=True),
        Rectifier()
    ] for _ in range(3)], []),
                                num_channels=1,
                                image_size=(1, 1),
                                tied_biases=False)
    cnn.allocate()
    assert [
        not child.tied_biases for child in cnn.children
        if isinstance(child, Convolutional)
    ]

    cnn = ConvolutionalSequence(sum(
        [[Convolutional(filter_size=(1, 1), num_filters=1),
          Rectifier()] for _ in range(3)], []),
                                num_channels=1,
                                image_size=(1, 1),
                                tied_biases=True)
    cnn.allocate()
    assert [
        child.tied_biases for child in cnn.children
        if isinstance(child, Convolutional)
    ]
Beispiel #5
0
    def __init__(self, dim, mini_dim, summary_dim, **kwargs):
        super(RNNwMini, self).__init__(**kwargs)
        self.dim = dim
        self.mini_dim = mini_dim
        self.summary_dim = summary_dim

        self.recurrent_layer = SimpleRecurrent(
            dim=self.summary_dim,
            activation=Rectifier(),
            name='recurrent_layer',
            weights_init=IsotropicGaussian(),
            biases_init=Constant(0.0))
        self.mini_recurrent_layer = SimpleRecurrent(
            dim=self.mini_dim,
            activation=Rectifier(),
            name='mini_recurrent_layer',
            weights_init=IsotropicGaussian(),
            biases_init=Constant(0.0))

        self.mini_to_main = Linear(self.dim + self.mini_dim,
                                   self.summary_dim,
                                   name='mini_to_main',
                                   weights_init=IsotropicGaussian(),
                                   biases_init=Constant(0.0))
        self.children = [
            self.recurrent_layer, self.mini_recurrent_layer, self.mini_to_main
        ]
Beispiel #6
0
def create_vae(x=None, batch=batch_size):
    x = T.matrix('features') if x is None else x
    x = x / 255.

    encoder = MLP(
        activations=[Rectifier(), Logistic()],
        dims=[img_dim**2, hidden_dim, 2*latent_dim],
        weights_init=IsotropicGaussian(std=0.01, mean=0),
        biases_init=Constant(0.01),
        name='encoder'
    )
    encoder.initialize()
    z_param = encoder.apply(x)
    z_mean, z_log_std = z_param[:,latent_dim:], z_param[:,:latent_dim]
    z = Sampling(theano_seed=seed).apply([z_mean, z_log_std], batch=batch_size)

    decoder = MLP(
        activations=[Rectifier(), Logistic()],
        dims=[latent_dim, hidden_dim, img_dim**2],
        weights_init=IsotropicGaussian(std=0.01, mean=0),
        biases_init=Constant(0.01),
        name='decoder'
    )
    decoder.initialize()
    x_reconstruct = decoder.apply(z)

    cost = VAEloss().apply(x, x_reconstruct, z_mean, z_log_std)
    cost.name = 'vae_cost'
    return cost
Beispiel #7
0
def create_kim_cnn(layer0_input, embedding_size, input_len, config, pref):
    '''
        One layer convolution with different filter-sizes and maxpooling
    '''
    filter_width_list = [int(fw) for fw in config[pref + '_filterwidth'].split()]
    print filter_width_list
    num_filters = int(config[pref+'_num_filters'])
    #num_filters /= len(filter_width_list)
    totfilters = 0
    for i, fw in enumerate(filter_width_list):
        num_feature_map = input_len - fw + 1 #39
        conv = Convolutional(
            image_size=(input_len, embedding_size),
            filter_size=(fw, embedding_size),
            num_filters=min(int(config[pref + '_maxfilter']), num_filters * fw),
            num_channels=1
        )
        totfilters += conv.num_filters
        initialize2(conv, num_feature_map)
        conv.name = pref + 'conv_' + str(fw)
        convout = conv.apply(layer0_input)
        pool_layer = MaxPooling(
            pooling_size=(num_feature_map,1)
        )
        pool_layer.name = pref + 'pool_' + str(fw)
        act = Rectifier()
        act.name = pref + 'act_' + str(fw)
        outpool = act.apply(pool_layer.apply(convout)).flatten(2)
        if i == 0:
            outpools = outpool
        else:
            outpools = T.concatenate([outpools, outpool], axis=1)
    name_rep_len = totfilters
    return outpools, name_rep_len
Beispiel #8
0
    def create_model(self, x, y, input_dim, tol=10e-5):

        # Create the output of the MLP
        mlp = MLP(
            [Rectifier(), Rectifier(), Logistic()], [input_dim, 100, 100, 1],
            weights_init=IsotropicGaussian(0.01),
            biases_init=Constant(0))
        mlp.initialize()
        probs = mlp.apply(x)
        y = y.dimshuffle(0, 'x')
        # Create the if-else cost function
        true_p = (T.sum(y * probs) + tol) * 1.0 / (T.sum(y) + tol)
        true_n = (T.sum((1 - y) * (1 - probs)) + tol) * \
            1.0 / (T.sum(1 - y) + tol)
        #p = (T.sum(y) + tol) / (y.shape[0] + tol)
        theta = (1 - self.p) / self.p
        numerator = (1 + self.beta**2) * true_p
        denominator = self.beta**2 + theta + true_p - theta * true_n

        Fscore = numerator / denominator

        cost = -1 * Fscore
        cost.name = "cost"

        return mlp, cost, probs
Beispiel #9
0
def build_mlp(features_cat, features_int, labels):

    mlp_int = MLP(activations=[Rectifier(), Rectifier()],
                  dims=[19, 50, 50],
                  weights_init=IsotropicGaussian(),
                  biases_init=Constant(0),
                  name='mlp_interval')
    mlp_int.initialize()
    mlp_cat = MLP(activations=[Logistic()],
                  dims=[320, 50],
                  weights_init=IsotropicGaussian(),
                  biases_init=Constant(0),
                  name='mlp_categorical')
    mlp_cat.initialize()

    mlp = MLP(activations=[Rectifier(), None],
              dims=[50, 50, 1],
              weights_init=IsotropicGaussian(),
              biases_init=Constant(0))
    mlp.initialize()

    gated = mlp_cat.apply(features_cat) * mlp_int.apply(features_int)
    prediction = mlp.apply(gated)
    cost = MAPECost().apply(prediction, labels)

    cg = ComputationGraph(cost)
    print cg.variables

    cg_dropout1   = apply_dropout(cg, [VariableFilter(roles=[OUTPUT])(cg.variables)[1], VariableFilter(roles=[OUTPUT])(cg.variables)[3]], .2)
    cost_dropout1 = cg_dropout1.outputs[0]

    return cost_dropout1, cg_dropout1.parameters, cost
Beispiel #10
0
def create_lenet_5():
    feature_maps = [6, 16]
    mlp_hiddens = [120, 84]
    conv_sizes = [5, 5]
    pool_sizes = [2, 2]
    image_size = (28, 28)
    output_size = 10

    # The above are from LeCun's paper. The blocks example had:
    #    feature_maps = [20, 50]
    #    mlp_hiddens = [500]

    # Use ReLUs everywhere and softmax for the final prediction
    conv_activations = [Rectifier() for _ in feature_maps]
    mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()]
    convnet = LeNet(conv_activations, 1, image_size,
                    filter_sizes=zip(conv_sizes, conv_sizes),
                    feature_maps=feature_maps,
                    pooling_sizes=zip(pool_sizes, pool_sizes),
                    top_mlp_activations=mlp_activations,
                    top_mlp_dims=mlp_hiddens + [output_size],
                    border_mode='valid',
                    weights_init=Uniform(width=.2),
                    biases_init=Constant(0))
    # We push initialization config to set different initialization schemes
    # for convolutional layers.
    convnet.push_initialization_config()
    convnet.layers[0].weights_init = Uniform(width=.2)
    convnet.layers[1].weights_init = Uniform(width=.09)
    convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=.08)
    convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=.11)
    convnet.initialize()

    return convnet
    def __init__(self, config, **kwargs):
        super(Model, self).__init__(config, **kwargs)

        self.dest_mlp = MLP(
            activations=[Rectifier()
                         for _ in config.dim_hidden_dest] + [Softmax()],
            dims=[config.dim_hidden[-1]] + config.dim_hidden_dest +
            [config.dim_output_dest],
            name='dest_mlp')
        self.time_mlp = MLP(
            activations=[Rectifier()
                         for _ in config.dim_hidden_time] + [Softmax()],
            dims=[config.dim_hidden[-1]] + config.dim_hidden_time +
            [config.dim_output_time],
            name='time_mlp')

        self.dest_classes = theano.shared(numpy.array(
            config.dest_tgtcls, dtype=theano.config.floatX),
                                          name='dest_classes')
        self.time_classes = theano.shared(numpy.array(
            config.time_tgtcls, dtype=theano.config.floatX),
                                          name='time_classes')

        self.inputs.append('input_time')
        self.children.extend([self.dest_mlp, self.time_mlp])
Beispiel #12
0
def build_mlp(features_int, features_cat, labels, labels_mean):

    inputs = tensor.concatenate([features_int, features_cat], axis=1)

    mlp = MLP(activations=[Rectifier(),
                           Rectifier(),
                           Rectifier(), None],
              dims=[337, 800, 1200, 1],
              weights_init=IsotropicGaussian(),
              biases_init=Constant(1))
    mlp.initialize()

    prediction = mlp.apply(inputs)
    cost = MAPECost().apply(prediction, labels, labels_mean)

    cg = ComputationGraph(cost)
    #cg_dropout0   = apply_dropout(cg, [VariableFilter(roles=[INPUT])(cg.variables)[1]], .2)
    cg_dropout1 = apply_dropout(cg, [
        VariableFilter(roles=[OUTPUT])(cg.variables)[1],
        VariableFilter(roles=[OUTPUT])(cg.variables)[3],
        VariableFilter(roles=[OUTPUT])(cg.variables)[5]
    ], .2)
    cost_dropout1 = cg_dropout1.outputs[0]

    return cost_dropout1, cg_dropout1.parameters, cost  #cost, cg.parameters, cost #
Beispiel #13
0
 def __init__(self,
              filter_size,
              num_filters,
              num_channels,
              noise_batch_size,
              image_size=(None, None),
              step=(1, 1),
              border_mode='valid',
              tied_biases=True,
              prior_mean=0,
              prior_noise_level=0,
              **kwargs):
     self.convolution = Convolutional()
     self.rectifier = Rectifier()
     self.mask = Convolutional(name='mask')
     children = [self.convolution, self.rectifier, self.mask]
     kwargs.setdefault('children', []).extend(children)
     super(NoisyConvolutional2, self).__init__(**kwargs)
     self.filter_size = filter_size
     self.num_filters = num_filters
     self.num_channels = num_channels
     self.noise_batch_size = noise_batch_size
     self.image_size = image_size
     self.step = step
     self.border_mode = border_mode
     self.tied_biases = tied_biases
     self.prior_mean = prior_mean
     self.prior_noise_level = prior_noise_level
Beispiel #14
0
def build_mlp(features_car_cat, features_car_int, features_nocar_cat,
              features_nocar_int, features_cp, features_hascar, means, labels):

    features = tensor.concatenate([
        features_hascar, means['cp'][features_cp[:, 0]],
        means['dep'][features_cp[:, 1]]
    ],
                                  axis=1)

    mlp = MLP(activations=[Rectifier(), Rectifier(), None],
              dims=[5, 50, 50, 1],
              weights_init=IsotropicGaussian(.1),
              biases_init=Constant(0),
              name='mlp')
    mlp.initialize()

    prediction = mlp.apply(features)

    cost = MAPECost().apply(labels, prediction)

    cg = ComputationGraph(cost)
    input_var = VariableFilter(roles=[INPUT])(cg.variables)
    print input_var

    cg_dropout1 = apply_dropout(cg, [input_var[3], input_var[5]], .4)
    cost_dropout1 = cg_dropout1.outputs[0]

    return prediction, cost_dropout1, cg_dropout1.parameters, cost
Beispiel #15
0
def test_defaults_sequence2():
    seq = DefaultsSequence(input_dim=(3, 4, 4),
                           lists=[
                               Convolutional(num_filters=10,
                                             stride=(2, 2),
                                             filter_size=(3, 3)),
                               BatchNormalization(),
                               Rectifier(),
                               Flattener(),
                               Linear(output_dim=10),
                               BatchNormalization(),
                               Rectifier(),
                               Linear(output_dim=12),
                               BatchNormalization(),
                               Rectifier()
                           ])
    seq.weights_init = Constant(1.0)
    seq.biases_init = Constant(0.0)
    seq.push_allocation_config()
    seq.push_initialization_config()
    seq.initialize()

    x = T.tensor4('input')
    y = seq.apply(x)
    func_ = theano.function([x], [y])

    x_val = np.ones((1, 3, 4, 4), dtype=theano.config.floatX)
    res = func_(x_val)[0]
    assert_allclose(res.shape, (1, 12))
Beispiel #16
0
def setup_ff_network(in_dim, out_dim, num_layers, num_neurons):
    """Setup a feedforward neural network.

    Parameters
    ----------
    in_dim : int
        input dimension of network
    out_dim : int
        output dimension of network
    num_layers : int
        number of hidden layers
    num_neurons : int
        number of neurons of each layer

    Returns
    -------
    net : object
        network structure
    """
    activations = [Rectifier()]
    dims = [in_dim]

    for i in xrange(num_layers):
        activations.append(Rectifier())
        dims.append(num_neurons)

    dims.append(out_dim)

    net = MLP(activations=activations,
              dims=dims,
              weights_init=IsotropicGaussian(),
              biases_init=Constant(0.01))

    return net
Beispiel #17
0
    def __init__(self, image_dimension, **kwargs):

        layers = []

        #############################################
        # a first block with 2 convolutions of 32 (3, 3) filters
        layers.append(Convolutional((3, 3), 32, border_mode='half'))
        layers.append(Rectifier())
        layers.append(Convolutional((3, 3), 32, border_mode='half'))
        layers.append(Rectifier())

        # maxpool with size=(2, 2)
        layers.append(MaxPooling((2, 2)))

        #############################################
        # a 2nd block with 3 convolutions of 64 (3, 3) filters
        layers.append(Convolutional((3, 3), 64, border_mode='half'))
        layers.append(Rectifier())
        layers.append(Convolutional((3, 3), 64, border_mode='half'))
        layers.append(Rectifier())
        layers.append(Convolutional((3, 3), 64, border_mode='half'))
        layers.append(Rectifier())

        # maxpool with size=(2, 2)
        layers.append(MaxPooling((2, 2)))

        #############################################
        # a 3rd block with 4 convolutions of 128 (3, 3) filters
        layers.append(Convolutional((3, 3), 128, border_mode='half'))
        layers.append(Rectifier())
        layers.append(Convolutional((3, 3), 128, border_mode='half'))
        layers.append(Rectifier())
        layers.append(Convolutional((3, 3), 128, border_mode='half'))
        layers.append(Rectifier())
        layers.append(Convolutional((3, 3), 128, border_mode='half'))
        layers.append(Rectifier())

        # maxpool with size=(2, 2)
        layers.append(MaxPooling((2, 2)))

        self.conv_sequence = ConvolutionalSequence(layers,
                                                   3,
                                                   image_size=image_dimension)

        flattener = Flattener()

        self.top_mlp = MLP(activations=[Rectifier(), Logistic()],
                           dims=[500, 1])

        application_methods = [
            self.conv_sequence.apply, flattener.apply, self.top_mlp.apply
        ]

        super(VGGNet, self).__init__(application_methods,
                                     biases_init=Constant(0),
                                     weights_init=Uniform(width=.1),
                                     **kwargs)
def test_fully_layer():
	batch_size=2
	x = T.tensor4();
	y = T.ivector()
	V = 200
	layer_conv = Convolutional(filter_size=(5,5),num_filters=V,
				name="toto",
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	# try with no bias
	activation = Rectifier()
	pool = MaxPooling(pooling_size=(2,2))

	convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15,
					image_size=(10,10),
					name="conv_section")
	convnet.push_allocation_config()
	convnet.initialize()
	output=convnet.apply(x)
	batch_size=output.shape[0]
	output_dim=np.prod(convnet.get_dim('output'))
	result_conv = output.reshape((batch_size, output_dim))
	mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10],
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	mlp.initialize()
	output=mlp.apply(result_conv)
	cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output))
	cg = ComputationGraph(cost)
	W = VariableFilter(roles=[WEIGHT])(cg.variables)
	B = VariableFilter(roles=[BIAS])(cg.variables)
	W = W[0]; b = B[0]

	inputs_fully = VariableFilter(roles=[INPUT], bricks=[Linear])(cg)
	outputs_fully = VariableFilter(roles=[OUTPUT], bricks=[Linear])(cg)
	var_input=inputs_fully[0]
	var_output=outputs_fully[0]
	
	[d_W,d_S,d_b] = T.grad(cost, [W, var_output, b])

	d_b = d_b.dimshuffle(('x',0))
	d_p = T.concatenate([d_W, d_b], axis=0)
	x_value = 1e3*np.random.ranf((2,15, 10, 10))
	f = theano.function([x,y], [var_input, d_S, d_p], allow_input_downcast=True, on_unused_input='ignore')
	A, B, C= f(x_value, [5, 0])
	A = np.concatenate([A, np.ones((2,1))], axis=1)
	print 'A', A.shape
	print 'B', B.shape
	print 'C', C.shape

	print lin.norm(C - np.dot(np.transpose(A), B), 'fro')

	return
	
	"""
Beispiel #19
0
    def __init__(self,
                 filter_size,
                 num_filters,
                 num_channels,
                 batch_size=None,
                 mid_noise=False,
                 out_noise=False,
                 tied_noise=False,
                 tied_sigma=False,
                 noise_rate=None,
                 noise_batch_size=None,
                 prior_noise_level=None,
                 image_size=(None, None),
                 step=(1, 1),
                 **kwargs):
        self.filter_size = filter_size
        self.num_filters = num_filters
        self.batch_size = batch_size
        self.num_channels = num_channels
        self.image_size = image_size
        self.mid_noise = mid_noise
        self.noise_batch_size = noise_batch_size
        self.noise_rate = noise_rate
        self.step = step
        self.border_mode = 'half'
        self.tied_biases = True
        depth = 2

        self.b0 = SpatialBatchNormalization(name='b0')
        self.r0 = Rectifier(name='r0')
        self.n0 = (SpatialNoise(name='n0',
                                noise_rate=self.noise_rate,
                                tied_noise=tied_noise,
                                tied_sigma=tied_sigma,
                                prior_noise_level=prior_noise_level)
                   if mid_noise else None)
        self.c0 = Convolutional(name='c0')
        self.b1 = SpatialBatchNormalization(name='b1')
        self.r1 = Rectifier(name='r1')
        self.n1 = (SpatialNoise(name='n1',
                                noise_rate=self.noise_rate,
                                tied_noise=tied_noise,
                                tied_sigma=tied_sigma,
                                prior_noise_level=prior_noise_level)
                   if out_noise else None)
        self.c1 = Convolutional(name='c1')
        kwargs.setdefault('children', []).extend([
            c for c in [
                self.c0, self.b0, self.r0, self.n0, self.c1, self.b1, self.r1,
                self.n1
            ] if c is not None
        ])
        super(ResidualConvolutional, self).__init__(**kwargs)
Beispiel #20
0
def build_mlp(features_car_cat, features_car_int, features_nocar_cat,
              features_nocar_int, features_cp, features_hascar, means, labels):

    mlp_car = MLP(activations=[Rectifier(), Rectifier(), None],
                  dims=[8 + 185, 200, 200, 1],
                  weights_init=IsotropicGaussian(.1),
                  biases_init=Constant(0),
                  name='mlp_interval_car')
    mlp_car.initialize()
    mlp_nocar = MLP(activations=[Rectifier(), Rectifier(), None],
                    dims=[5 + 135, 200, 200, 1],
                    weights_init=IsotropicGaussian(.1),
                    biases_init=Constant(0),
                    name='mlp_interval_nocar')
    mlp_nocar.initialize()

    feature_car = tensor.concatenate((features_car_cat, features_car_int),
                                     axis=1)
    feature_nocar = tensor.concatenate(
        (features_nocar_cat, features_nocar_int), axis=1)
    prediction = mlp_nocar.apply(feature_nocar)
    # gating with the last feature : does the dude own a car
    prediction += tensor.addbroadcast(features_hascar,
                                      1) * mlp_car.apply(feature_car)

    prediction_loc, _, _, _, = \
            build_mlp_onlyloc(features_car_cat, features_car_int,
                              features_nocar_cat, features_nocar_int,
                              features_cp, features_hascar,
                              means, labels)
    prediction += prediction_loc

    # add crm
    mlp_crm = MLP(activations=[None],
                  dims=[1, 1],
                  weights_init=IsotropicGaussian(.1),
                  biases_init=Constant(0),
                  name='mlp_crm')
    mlp_crm.initialize()
    crm = features_nocar_int[:, 0][:, None]
    prediction = prediction * mlp_crm.apply(crm)

    cost = MAPECost().apply(labels, prediction)

    cg = ComputationGraph(cost)
    input_var = VariableFilter(roles=[INPUT])(cg.variables)
    print input_var

    cg_dropout1 = apply_dropout(cg, [input_var[6], input_var[7]], .4)
    cost_dropout1 = cg_dropout1.outputs[0]

    return prediction, cost_dropout1, cg_dropout1.parameters, cost
Beispiel #21
0
 def apply_cnn(self, l_emb1, l_size1, l_emb2, l_size2, r_emb1, r_size1,
               r_emb2, r_size2, embedding_size, mycnf):
     assert l_size1 == r_size1
     assert l_size2 == r_size2
     assert l_size1 == l_size1
     max_len = l_size1
     fv_len = 0
     filter_sizes = mycnf['cnn_config']['filter_sizes']
     num_filters = mycnf['cnn_config']['num_filters']
     for i, fw in enumerate(filter_sizes):
         conv_left = ConvolutionalActivation(
             activation=Rectifier().apply,
             filter_size=(fw, embedding_size),
             num_filters=num_filters,
             num_channels=1,
             image_size=(max_len, embedding_size),
             name="conv" + str(fw) + l_emb1.name,
             seed=self.curSeed)
         conv_right = ConvolutionalActivation(
             activation=Rectifier().apply,
             filter_size=(fw, embedding_size),
             num_filters=num_filters,
             num_channels=1,
             image_size=(max_len, embedding_size),
             name="conv" + str(fw) + r_emb1.name,
             seed=self.curSeed)
         pooling = MaxPooling((max_len - fw + 1, 1), name="pool" + str(fw))
         initialize([conv_left, conv_right])
         l_convinp1 = l_emb1.flatten().reshape(
             (l_emb1.shape[0], 1, max_len, embedding_size))
         l_convinp2 = l_emb2.flatten().reshape(
             (l_emb2.shape[0], 1, max_len, embedding_size))
         l_pool1 = pooling.apply(conv_left.apply(l_convinp1)).flatten(2)
         l_pool2 = pooling.apply(conv_left.apply(l_convinp2)).flatten(2)
         r_convinp1 = r_emb1.flatten().reshape(
             (r_emb1.shape[0], 1, max_len, embedding_size))
         r_convinp2 = r_emb2.flatten().reshape(
             (r_emb2.shape[0], 1, max_len, embedding_size))
         r_pool1 = pooling.apply(conv_right.apply(r_convinp1)).flatten(2)
         r_pool2 = pooling.apply(conv_right.apply(r_convinp2)).flatten(2)
         onepools1 = T.concatenate([l_pool1, r_pool1], axis=1)
         onepools2 = T.concatenate([l_pool2, r_pool2], axis=1)
         fv_len += conv_left.num_filters * 2
         if i == 0:
             outpools1 = onepools1
             outpools2 = onepools2
         else:
             outpools1 = T.concatenate([outpools1, onepools1], axis=1)
             outpools2 = T.concatenate([outpools2, onepools2], axis=1)
     return outpools1, outpools2, fv_len
Beispiel #22
0
 def generate_elementary_block3(self, index, to_index):
     number_of_channels = 512
     name_conv_0        = 'fconv7_' + str(index)
     name_relu_0        = 'relu7_' + str(index)
     name_conv_1        = 'fconv7_' + str(index) + 'to' + str(to_index) + '_step1'
     name_relu_1        = 'relu7_' + str(index) + 'to' + str(to_index) + '_step1'
     name_conv_2        = 'fconv7_' + str(index) + 'to' + str(to_index) + '_step2'
     name_conv_3        = 'fconv7_output_' + str(index)
     return [Convolutional(filter_size=(1,1), num_filters = 128, border_mode = (0,0), use_bias=True, tied_biases=True, name=name_conv_0, biases_init=Constant(0.), weights_init=IsotropicGaussian(0.01), num_channels = number_of_channels), \
         ParallelSum3(), Rectifier(name=name_relu_0), \
         Convolutional(filter_size=(7,7), num_filters = 64, border_mode = (3,3), use_bias=True, tied_biases=True, name=name_conv_1, biases_init=Constant(0.), weights_init=IsotropicGaussian(0.01), num_channels = 128), \
         Rectifier(name=name_relu_1), \
         Convolutional(filter_size=(7,7), num_filters = 128, border_mode = (3,3), use_bias=True, tied_biases=True, name=name_conv_2, biases_init=Constant(0.), weights_init=IsotropicGaussian(0.01), num_channels = 64), \
         Convolutional(filter_size=(1,1), num_filters = 1, border_mode = (0,0), use_bias=True, tied_biases=True, name=name_conv_3, biases_init=Constant(0.), weights_init=IsotropicGaussian(0.01), num_channels = 128)]
Beispiel #23
0
def test_convolutional_sequence_with_no_input_size():
    # suppose x is outputted by some RNN
    x = tensor.tensor4('x')
    filter_size = (1, 1)
    num_filters = 2
    num_channels = 1
    pooling_size = (1, 1)
    conv = Convolutional(filter_size,
                         num_filters,
                         tied_biases=False,
                         weights_init=Constant(1.),
                         biases_init=Constant(1.))
    act = Rectifier()
    pool = MaxPooling(pooling_size)

    bad_seq = ConvolutionalSequence([conv, act, pool],
                                    num_channels,
                                    tied_biases=False)
    assert_raises_regexp(ValueError, 'Cannot infer bias size \S+',
                         bad_seq.initialize)

    seq = ConvolutionalSequence([conv, act, pool],
                                num_channels,
                                tied_biases=True)
    try:
        seq.initialize()
        out = seq.apply(x)
    except TypeError:
        assert False, "This should have succeeded"

    assert out.ndim == 4
Beispiel #24
0
def generation(z_list, n_latent, hu_decoder, n_out, y):
    logger.info('in generation: n_latent: %d, hu_decoder: %d', n_latent,
                hu_decoder)
    if hu_decoder == 0:
        return generation_simple(z_list, n_latent, n_out, y)
    mlp1 = MLP(activations=[Rectifier()],
               dims=[n_latent, hu_decoder],
               name='latent_to_hidDecoder')
    initialize([mlp1])
    hid_to_out = Linear(name='hidDecoder_to_output',
                        input_dim=hu_decoder,
                        output_dim=n_out)
    initialize([hid_to_out])
    mysigmoid = Logistic(name='y_hat_vae')
    agg_logpy_xz = 0.
    agg_y_hat = 0.
    for i, z in enumerate(z_list):
        y_hat = mysigmoid.apply(hid_to_out.apply(
            mlp1.apply(z)))  #reconstructed x
        agg_logpy_xz += cross_entropy_loss(y_hat, y)
        agg_y_hat += y_hat

    agg_logpy_xz /= len(z_list)
    agg_y_hat /= len(z_list)
    return agg_y_hat, agg_logpy_xz
Beispiel #25
0
def build_model(images, labels):
    
    # Construct a bottom convolutional sequence
    bottom_conv_sequence = convolutional_sequence((3,3), 16, (160, 160))
    bottom_conv_sequence._push_allocation_config()
    
    # Flatten layer
    flattener = Flattener()

    # Construct a top MLP
    conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output'))
    #top_mlp = MLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0))
    top_mlp = BatchNormalizedMLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0))
    
    # Construct feedforward sequence
    ss_seq = FeedforwardSequence([bottom_conv_sequence.apply, flattener.apply, top_mlp.apply])
    ss_seq.push_initialization_config()
    ss_seq.initialize()
    
    prediction = ss_seq.apply(images)
    cost_noreg = CategoricalCrossEntropy().apply(labels.flatten(), prediction)

    # add regularization
    selector = Selector([top_mlp])
    Ws = selector.get_parameters('W')
    mlp_brick_name = 'batchnormalizedmlp'
    W0 = Ws['/%s/linear_0.W' % mlp_brick_name]
    W1 = Ws['/%s/linear_1.W' % mlp_brick_name]

    cost = cost_noreg + .01 * (W0 ** 2).mean() + .01 * (W1 ** 2).mean()


    return cost
Beispiel #26
0
def test_convolutional_layer():
    x = tensor.tensor4('x')
    num_channels = 4
    batch_size = 5
    pooling_size = 3
    num_filters = 3
    filter_size = (3, 3)
    activation = Rectifier().apply

    conv = ConvolutionalLayer(activation,
                              filter_size,
                              num_filters, (pooling_size, pooling_size),
                              num_channels,
                              image_size=(17, 13),
                              weights_init=Constant(1.),
                              biases_init=Constant(5.))
    conv.initialize()

    y = conv.apply(x)
    func = function([x], y)

    x_val = numpy.ones((batch_size, num_channels, 17, 13),
                       dtype=theano.config.floatX)
    assert_allclose(
        func(x_val),
        numpy.prod(filter_size) * num_channels * numpy.ones(
            (batch_size, num_filters, 5, 4)) + 5)
Beispiel #27
0
def create_OLD_kim_cnn(layer0_input, embedding_size, input_len, config, pref):
    '''
        One layer convolution with the same filtersize
    '''
    filter_width_list = [
        int(fw) for fw in config[pref + '_filterwidth'].split()
    ]
    print filter_width_list
    num_filters = int(config[pref + '_num_filters'])
    totfilters = 0
    for i, fw in enumerate(filter_width_list):
        num_feature_map = input_len - fw + 1  #39
        conv = Convolutional(filter_size=(fw, embedding_size),
                             num_filters=num_filters,
                             num_channels=1,
                             image_size=(input_len, embedding_size),
                             name="conv" + str(fw))
        pooling = MaxPooling((num_feature_map, 1), name="pool" + str(fw))
        initialize([conv])

        totfilters += num_filters
        outpool = Flattener(name="flat" + str(fw)).apply(
            Rectifier(name=pref + 'act_' + str(fw)).apply(
                pooling.apply(conv.apply(layer0_input))))
        if i == 0:
            outpools = outpool
        else:
            outpools = T.concatenate([outpools, outpool], axis=1)
    name_rep_len = totfilters
    return outpools, name_rep_len
Beispiel #28
0
def test_convolutional_sequence():
    x = tensor.tensor4('x')
    num_channels = 4
    pooling_size = 3
    batch_size = 5
    activation = Rectifier().apply

    conv = ConvolutionalLayer(activation, (3, 3),
                              5, (pooling_size, pooling_size),
                              weights_init=Constant(1.),
                              biases_init=Constant(5.))
    conv2 = ConvolutionalActivation(activation, (2, 2),
                                    4,
                                    weights_init=Constant(1.))

    seq = ConvolutionalSequence([conv, conv2],
                                num_channels,
                                image_size=(17, 13))
    seq.push_allocation_config()
    assert conv.num_channels == 4
    assert conv2.num_channels == 5
    conv2.convolution.use_bias = False
    y = seq.apply(x)
    seq.initialize()
    func = function([x], y)

    x_val = numpy.ones((batch_size, 4, 17, 13), dtype=theano.config.floatX)
    y_val = (numpy.ones((batch_size, 4, 4, 3)) * (9 * 4 + 5) * 4 * 5)
    assert_allclose(func(x_val), y_val)
Beispiel #29
0
def create_cnn_general(embedded_x, mycnf, max_len, embedding_size, inp_conv=False):
    fv_len = 0
    filter_sizes = mycnf['cnn_config']['filter_sizes']
    num_filters = mycnf['cnn_config']['num_filters']
    for i, fw in enumerate(filter_sizes):
        conv = ConvolutionalActivation(
                        activation=Rectifier().apply,
                        filter_size=(fw, embedding_size), 
                        num_filters=num_filters,
                        num_channels=1,
                        image_size=(max_len, embedding_size),
                        name="conv"+str(fw)+embedded_x.name)
        pooling = MaxPooling((max_len-fw+1, 1), name="pool"+str(fw)+embedded_x.name)
        initialize([conv])
        if inp_conv:
            convinp = embedded_x
        else:
            convinp = embedded_x.flatten().reshape((embedded_x.shape[0], 1, max_len, embedding_size))
        onepool = pooling.apply(conv.apply(convinp)).flatten(2)
        if i == 0:
            outpools = onepool
        else:
            outpools = T.concatenate([outpools, onepool], axis=1)
        fv_len += conv.num_filters
    return outpools, fv_len
    def __init__(self,
                 input_dim,
                 dim,
                 mlp_hidden_dims,
                 batch_size,
                 image_shape,
                 patch_shape,
                 activation=None,
                 **kwargs):
        super(LSTMAttention, self).__init__(**kwargs)
        self.dim = dim
        self.image_shape = image_shape
        self.patch_shape = patch_shape
        self.batch_size = batch_size
        non_lins = [Rectifier()] * (len(mlp_hidden_dims) - 1) + [None]
        mlp_dims = [input_dim + dim] + mlp_hidden_dims
        mlp = MLP(non_lins,
                  mlp_dims,
                  weights_init=self.weights_init,
                  biases_init=self.biases_init,
                  name=self.name + '_mlp')
        hyperparameters = {}
        hyperparameters["cutoff"] = 3
        hyperparameters["batched_window"] = True
        cropper = LocallySoftRectangularCropper(
            patch_shape=patch_shape,
            hyperparameters=hyperparameters,
            kernel=Gaussian())

        if not activation:
            activation = Tanh()
        self.children = [activation, mlp, cropper]
Beispiel #31
0
def construct_mlp(name,
                  hidden_dims,
                  input_dim,
                  initargs,
                  batch_normalize,
                  activations=None):
    if not hidden_dims:
        return FeedforwardIdentity(dim=input_dim)

    if not activations:
        activations = [Rectifier() for dim in hidden_dims]
    elif not isinstance(activations, collections.Iterable):
        activations = [activations] * len(hidden_dims)
    assert len(activations) == len(hidden_dims)

    dims = [input_dim] + hidden_dims
    wrapped_activations = [
        NormalizedActivation(shape=[hidden_dim],
                             name="activation_%i" % i,
                             batch_normalize=batch_normalize,
                             activation=activation)
        for i, (hidden_dim,
                activation) in enumerate(zip(hidden_dims, activations))
    ]
    mlp = MLP(name=name,
              activations=wrapped_activations,
              dims=dims,
              **initargs)
    # biases are handled by our activation function
    for layer in mlp.linear_transformations:
        layer.use_bias = False
    return mlp
Beispiel #32
0
    def __init__(self, filter_size, num_filters, num_channels,
                 batch_size=None,
                 mid_noise=False,
                 out_noise=False,
                 tied_noise=False,
                 tied_sigma=False,
                 noise_rate=None,
                 noise_batch_size=None,
                 prior_noise_level=None,
                 image_size=(None, None), step=(1, 1),
                 **kwargs):
        self.filter_size = filter_size
        self.num_filters = num_filters
        self.batch_size = batch_size
        self.num_channels = num_channels
        self.image_size = image_size
        self.mid_noise = mid_noise
        self.noise_batch_size = noise_batch_size
        self.noise_rate = noise_rate
        self.step = step
        self.border_mode = 'half'
        self.tied_biases = True
        depth = 2

        self.b0 = SpatialBatchNormalization(name='b0')
        self.r0 = Rectifier(name='r0')
        self.n0 = (SpatialNoise(name='n0', noise_rate=self.noise_rate,
                tied_noise=tied_noise, tied_sigma=tied_sigma,
                prior_noise_level=prior_noise_level) if mid_noise else None)
        self.c0 = Convolutional(name='c0')
        self.b1 = SpatialBatchNormalization(name='b1')
        self.r1 = Rectifier(name='r1')
        self.n1 = (SpatialNoise(name='n1', noise_rate=self.noise_rate,
                tied_noise=tied_noise, tied_sigma=tied_sigma,
                prior_noise_level=prior_noise_level) if out_noise else None)
        self.c1 = Convolutional(name='c1')
        kwargs.setdefault('children', []).extend([c for c in [
            self.c0, self.b0, self.r0, self.n0,
            self.c1, self.b1, self.r1, self.n1] if c is not None])
        super(ResidualConvolutional, self).__init__(**kwargs)
Beispiel #33
0
def create_yy_cnn(numConvLayer, conv_input, embedding_size, input_len, config, pref):
    '''
     CNN with several layers of convolution, each with specific filter size. 
     Maxpooling at the end. 
    '''
    filter_width_list = [int(fw) for fw in config[pref + '_filterwidth'].split()]
    base_num_filters = int(config[pref + '_num_filters'])
    assert len(filter_width_list) == numConvLayer
    convs = []; fmlist = []
    last_fm = input_len
    for i in range(numConvLayer):
        fw = filter_width_list[i]
        num_feature_map = last_fm - fw + 1 #39
        conv = Convolutional(
            image_size=(last_fm, embedding_size),
            filter_size=(fw, embedding_size),
            num_filters=min(int(config[pref + '_maxfilter']), base_num_filters * fw),
            num_channels=1
        )
        fmlist.append(num_feature_map)
        last_fm = num_feature_map
        embedding_size = conv.num_filters
        convs.append(conv)

    initialize(convs)
    for i, conv in enumerate(convs):
        conv.name = pref+'_conv' + str(i)
        conv_input = conv.apply(conv_input)
        conv_input = conv_input.flatten().reshape((conv_input.shape[0], 1, fmlist[i], conv.num_filters))
        lastconv = conv 
        lastconv_out = conv_input
    pool_layer = MaxPooling(
        pooling_size=(last_fm,1)
    )
    pool_layer.name = pref+'_pool_' + str(fw)
    act = Rectifier(); act.name = 'act_' + str(fw)
    outpool = act.apply(pool_layer.apply(lastconv_out).flatten(2))
    return outpool, lastconv.num_filters
Beispiel #34
0
 def __init__(self, filter_size, num_filters, num_channels, noise_batch_size,
              image_size=(None, None), step=(1, 1), border_mode='valid',
              tied_biases=True,
              prior_mean=0, prior_noise_level=0, **kwargs):
     self.convolution = Convolutional()
     self.rectifier = Rectifier()
     self.mask = Convolutional(name='mask')
     children = [self.convolution, self.rectifier, self.mask]
     kwargs.setdefault('children', []).extend(children)
     super(NoisyConvolutional2, self).__init__(**kwargs)
     self.filter_size = filter_size
     self.num_filters = num_filters
     self.num_channels = num_channels
     self.noise_batch_size = noise_batch_size
     self.image_size = image_size
     self.step = step
     self.border_mode = border_mode
     self.tied_biases = tied_biases
     self.prior_mean = prior_mean
     self.prior_noise_level = prior_noise_level
Beispiel #35
0
def main():
    x = T.tensor3('features')
    m = T.matrix('features_mask')
    y = T.imatrix('targets')
    x = m.mean() + x #stupid mask not always needed...

    #embedding_size = 300
    #glove_version = "glove.6B.300d.txt"

    embedding_size = 50
    glove_version = "vectors.6B.50d.txt"
    wstd = 0.02

    conv1 = Conv1D(filter_length=5, num_filters=128, input_dim=embedding_size,
            weights_init=IsotropicGaussian(std=wstd),
            biases_init=Constant(0.0))
    conv1.initialize()
    o = conv1.apply(x)
    o = Rectifier(name="conv1red").apply(o)
    o = MaxPooling1D(pooling_length=5
            #, step=2
            ).apply(o)

    conv2 = Conv1D(filter_length=5, num_filters=128, input_dim=128,
            weights_init=IsotropicGaussian(std=wstd),
            biases_init=Constant(0.0),
            step=3,
            name="conv2")
    conv2.initialize()
    o = conv2.apply(o)

    o = Rectifier(name="conv2rec").apply(o)
    conv2 = Conv1D(filter_length=5, num_filters=128, input_dim=128,
            weights_init=IsotropicGaussian(std=wstd),
            biases_init=Constant(0.0),
            step=3,
            name="conv3")
    conv2.initialize()
    o = conv2.apply(o)
    o = Rectifier(name="conv3rec").apply(o)

    fork = Fork(weights_init=IsotropicGaussian(0.02),
            biases_init=Constant(0.),
            input_dim=128,
            output_dims=[128]*3,
            output_names=['inputs', 'reset_inputs', 'update_inputs']
            )
    fork.initialize()

    inputs, reset_inputs, update_inputs = fork.apply(o)

    out = o.mean(axis=1)

    #gru = GatedRecurrent(dim=128,
            #weights_init=IsotropicGaussian(0.02),
            #biases_init=IsotropicGaussian(0.0))

    #gru.initialize()
    #states = gru.apply(inputs=inputs, reset_inputs=reset_inputs, update_inputs=update_inputs)

    #out = states[:, -1, :]

    hidden = Linear(
        input_dim = 128,
        output_dim = 128,
        weights_init = Uniform(std=0.01),
        biases_init = Constant(0.))
    hidden.initialize()

    o = hidden.apply(out)
    o = Rectifier().apply(o)
    #hidden = Linear(
        #input_dim = 128,
        #output_dim = 128,
        #weights_init = IsotropicGaussian(std=0.02),
        #biases_init = Constant(0.),
        #name="hiddenmap2")
    #hidden.initialize()

    #o = hidden.apply(o)
    #o = Rectifier(name="rec2").apply(o)


    score_layer = Linear(
            input_dim = 128,
            output_dim = 1,
            weights_init = IsotropicGaussian(std=wstd),
            biases_init = Constant(0.),
            name="linear2")
    score_layer.initialize()
    o = score_layer.apply(o)

    probs = Sigmoid().apply(o)

    cost = - (y * T.log(probs) + (1-y) * T.log(1 - probs)).mean()
    cost.name = 'cost'
    misclassification = (y * (probs < 0.5) + (1-y) * (probs > 0.5)).mean()
    misclassification.name = 'misclassification'

    #print (rnn_states * m.dimshuffle(0, 1, 'x')).sum(axis=1).shape.eval(
            #{x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX),
                #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #print (m).sum(axis=1).shape.eval({
                #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #print (m).shape.eval({
                #m : np.ones((45, 111), dtype=theano.config.floatX)})
    #raw_input()


    # =================

    cg = ComputationGraph([cost])
    params = cg.parameters

    algorithm = GradientDescent(
            cost = cost,
            params=params,
            step_rule = CompositeRule([
                StepClipping(threshold=10),
                AdaM(),
                #AdaDelta(),
                ])

            )


    # ========
    print "setting up data"
    ports = {
            'gpu0_train' : 5557,
            'gpu0_test' : 5558,
            'gpu1_train' : 5559,
            'gpu1_test' : 5560,
            }

    batch_size = 16
    def start_server(port, which_set):
        fuel.server.logger.setLevel('WARN')

        dataset = IMDBText(which_set)
        n_train = dataset.num_examples
        stream = DataStream(
                dataset=dataset,
                iteration_scheme=ShuffledScheme(
                    examples=n_train,
                    batch_size=batch_size)
                )
        print "loading glove"
        glove = GloveTransformer(glove_version, data_stream=stream)
        padded = Padding(
                data_stream=glove,
                mask_sources=('features',)
                )

        fuel.server.start_server(padded, port=port, hwm=20)

    train_port = ports[theano.config.device + '_train']
    train_p = Process(target=start_server, args=(train_port, 'train'))
    train_p.start()

    test_port = ports[theano.config.device + '_test']
    test_p = Process(target=start_server, args=(test_port, 'test'))
    test_p.start()

    train_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=train_port)
    test_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=test_port)

    print "setting up model"
    #import ipdb
    #ipdb.set_trace()

    n_examples = 25000
    #======
    model = Model(cost)
    extensions = []
    extensions.append(EpochProgress(batch_per_epoch=n_examples // batch_size + 1))
    extensions.append(TrainingDataMonitoring(
        [cost, misclassification],
        prefix='train',
        after_epoch=True
        ))

    extensions.append(DataStreamMonitoring(
        [cost, misclassification],
        data_stream=test_stream,
        prefix='test',
        after_epoch=True
        ))
    extensions.append(Timing())
    extensions.append(Printing())

    #extensions.append(Plot("norms", channels=[['train_lstm_norm', 'train_pre_norm']], after_epoch=True))
    extensions.append(Plot(theano.config.device+"_result", channels=[['test_misclassification', 'train_misclassification']], after_epoch=True))

    main_loop = MainLoop(
            model=model,
            data_stream=train_stream,
            algorithm=algorithm,
            extensions=extensions)
    main_loop.run()
Beispiel #36
0
def main_run(_config, _log):
    from collections import namedtuple

    c = namedtuple("Config", _config.keys())(*_config.values())

    _log.info("Running with" + str(_config))

    import theano
    from theano import tensor as T
    import numpy as np

    from dataset import IMDBText, GloveTransformer

    from blocks.initialization import Uniform, Constant, IsotropicGaussian, NdarrayInitialization, Identity, Orthogonal
    from blocks.bricks.recurrent import LSTM, SimpleRecurrent, GatedRecurrent
    from blocks.bricks.parallel import Fork

    from blocks.bricks import Linear, Sigmoid, Tanh, Rectifier
    from blocks import bricks

    from blocks.extensions import Printing, Timing
    from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring

    from blocks.extensions.plot import Plot
    from plot import PlotHistogram

    from blocks.algorithms import GradientDescent, Adam, Scale, StepClipping, CompositeRule, AdaDelta
    from blocks.graph import ComputationGraph, apply_dropout
    from blocks.main_loop import MainLoop
    from blocks.model import Model

    from cuboid.algorithms import AdaM, NAG
    from cuboid.extensions import EpochProgress

    from fuel.streams import DataStream, ServerDataStream
    from fuel.transformers import Padding

    from fuel.schemes import ShuffledScheme
    from Conv1D import Conv1D, MaxPooling1D
    from schemes import BatchwiseShuffledScheme
    from bricks import WeightedSigmoid, GatedRecurrentFull

    from multiprocessing import Process
    import fuel
    import logging
    from initialization import SumInitialization

    from transformers import DropSources

    global train_p
    global test_p

    x = T.tensor3("features")
    # m = T.matrix('features_mask')
    y = T.imatrix("targets")

    # x = x+m.mean()*0

    dropout_variables = []
    embedding_size = 300
    glove_version = "glove.6B.300d.txt"
    # embedding_size = 50
    # glove_version = "vectors.6B.50d.txt"

    gloveMapping = Linear(
        input_dim=embedding_size,
        output_dim=c.rnn_input_dim,
        weights_init=Orthogonal(),
        # weights_init = IsotropicGaussian(c.wstd),
        biases_init=Constant(0.0),
        name="gloveMapping",
    )
    gloveMapping.initialize()
    o = gloveMapping.apply(x)
    o = Rectifier(name="gloveRec").apply(o)
    dropout_variables.append(o)

    summed_mapped_glove = o.sum(axis=1)  # take out the sequence
    glove_out = Linear(
        input_dim=c.rnn_input_dim,
        output_dim=1.0,
        weights_init=IsotropicGaussian(c.wstd),
        biases_init=Constant(0.0),
        name="mapping_to_output",
    )
    glove_out.initialize()
    deeply_sup_0 = glove_out.apply(summed_mapped_glove)
    deeply_sup_probs = Sigmoid(name="deeply_sup_softmax").apply(deeply_sup_0)

    input_dim = c.rnn_input_dim
    hidden_dim = c.rnn_dim

    gru = GatedRecurrentFull(
        hidden_dim=hidden_dim,
        activation=Tanh(),
        # activation=bricks.Identity(),
        gate_activation=Sigmoid(),
        state_to_state_init=SumInitialization([Identity(1.0), IsotropicGaussian(c.wstd)]),
        state_to_reset_init=IsotropicGaussian(c.wstd),
        state_to_update_init=IsotropicGaussian(c.wstd),
        input_to_state_transform=Linear(
            input_dim=input_dim,
            output_dim=hidden_dim,
            weights_init=IsotropicGaussian(c.wstd),
            biases_init=Constant(0.0),
        ),
        input_to_update_transform=Linear(
            input_dim=input_dim,
            output_dim=hidden_dim,
            weights_init=IsotropicGaussian(c.wstd),
            # biases_init=Constant(-2.0)),
            biases_init=Constant(-1.0),
        ),
        input_to_reset_transform=Linear(
            input_dim=input_dim,
            output_dim=hidden_dim,
            weights_init=IsotropicGaussian(c.wstd),
            # biases_init=Constant(-3.0))
            biases_init=Constant(-2.0),
        ),
    )
    gru.initialize()
    rnn_in = o.dimshuffle(1, 0, 2)
    # rnn_in = o
    # rnn_out = gru.apply(rnn_in, mask=m.T)
    rnn_out = gru.apply(rnn_in)
    state_to_state = gru.rnn.state_to_state
    state_to_state.name = "state_to_state"
    # o = rnn_out[-1, :, :]
    o = rnn_out[-1]

    # o = rnn_out[:, -1, :]
    # o = rnn_out.mean(axis=1)

    # print rnn_last_out.eval({
    # x: np.ones((3, 101, 300), dtype=theano.config.floatX),
    # m: np.ones((3, 101), dtype=theano.config.floatX)})
    # raw_input()
    # o = rnn_out.mean(axis=1)
    dropout_variables.append(o)

    score_layer = Linear(
        input_dim=hidden_dim,
        output_dim=1,
        weights_init=IsotropicGaussian(std=c.wstd),
        biases_init=Constant(0.0),
        name="linear2",
    )
    score_layer.initialize()
    o = score_layer.apply(o)
    probs = Sigmoid().apply(o)

    # probs = deeply_sup_probs
    cost = -(y * T.log(probs) + (1 - y) * T.log(1 - probs)).mean()
    # cost_deeply_sup0 = - (y * T.log(deeply_sup_probs) + (1-y) * T.log(1 - deeply_sup_probs)).mean()
    # cost += cost_deeply_sup0 * c.deeply_factor

    cost.name = "cost"
    misclassification = (y * (probs < 0.5) + (1 - y) * (probs > 0.5)).mean()
    misclassification.name = "misclassification"

    # print rnn_in.shape.eval(
    # {x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX),
    # })
    # print rnn_out.shape.eval(
    # {x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX),
    # m : np.ones((45, 111), dtype=theano.config.floatX)})
    # print (m).sum(axis=1).shape.eval({
    # m : np.ones((45, 111), dtype=theano.config.floatX)})
    # print (m).shape.eval({
    # m : np.ones((45, 111), dtype=theano.config.floatX)})
    # raw_input()

    # =================

    cg = ComputationGraph([cost])
    cg = apply_dropout(cg, variables=dropout_variables, drop_prob=0.5)
    params = cg.parameters

    algorithm = GradientDescent(
        cost=cg.outputs[0],
        params=params,
        step_rule=CompositeRule(
            [
                StepClipping(threshold=4),
                Adam(learning_rate=0.002, beta1=0.1, beta2=0.001),
                # NAG(lr=0.1, momentum=0.9),
                # AdaDelta(),
            ]
        ),
    )

    # ========
    print "setting up data"
    ports = {
        "gpu0_train": 5557,
        "gpu0_test": 5558,
        "cuda0_train": 5557,
        "cuda0_test": 5558,
        "opencl0:0_train": 5557,
        "opencl0:0_test": 5558,
        "gpu1_train": 5559,
        "gpu1_test": 5560,
    }

    # batch_size = 16
    # batch_size = 32
    batch_size = 40

    def start_server(port, which_set):
        fuel.server.logger.setLevel("WARN")
        dataset = IMDBText(which_set, sorted=True)

        n_train = dataset.num_examples
        # scheme = ShuffledScheme(examples=n_train, batch_size=batch_size)
        scheme = BatchwiseShuffledScheme(examples=n_train, batch_size=batch_size)

        stream = DataStream(dataset=dataset, iteration_scheme=scheme)
        print "loading glove"
        glove = GloveTransformer(glove_version, data_stream=stream)
        padded = Padding(
            data_stream=glove,
            # mask_sources=('features',)
            mask_sources=("features",),
        )

        padded = DropSources(padded, ["features_mask"])

        fuel.server.start_server(padded, port=port, hwm=20)

    train_port = ports[theano.config.device + "_train"]
    train_p = Process(target=start_server, args=(train_port, "train"))
    train_p.start()

    test_port = ports[theano.config.device + "_test"]
    test_p = Process(target=start_server, args=(test_port, "test"))
    test_p.start()

    # train_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=train_port)
    # test_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=test_port)

    train_stream = ServerDataStream(("features", "targets"), port=train_port)
    test_stream = ServerDataStream(("features", "targets"), port=test_port)

    print "setting up model"
    # ipdb.set_trace()

    n_examples = 25000
    print "Batches per epoch", n_examples // (batch_size + 1)
    batches_extensions = 100
    monitor_rate = 50
    # ======
    model = Model(cg.outputs[0])
    extensions = []
    extensions.append(EpochProgress(batch_per_epoch=n_examples // batch_size + 1))
    extensions.append(TrainingDataMonitoring([cost, misclassification], prefix="train", every_n_batches=monitor_rate))

    extensions.append(
        DataStreamMonitoring(
            [cost, misclassification],
            data_stream=test_stream,
            prefix="test",
            after_epoch=True,
            before_first_epoch=False,
        )
    )

    extensions.append(Timing())
    extensions.append(Printing())

    # extensions.append(Plot("norms", channels=[['train_lstm_norm', 'train_pre_norm']], after_epoch=True))
    # extensions.append(Plot(theano.config.device+"_result", channels=[['test_misclassification', 'train_misclassification']], after_epoch=True))

    # extensions.append(PlotHistogram(
    # channels=['train_state_to_state'],
    # bins=50,
    # every_n_batches=30))

    extensions.append(
        Plot(
            theano.config.device + "_result",
            channels=[["train_cost"], ["train_misclassification"]],
            every_n_batches=monitor_rate,
        )
    )

    main_loop = MainLoop(model=model, data_stream=train_stream, algorithm=algorithm, extensions=extensions)
    main_loop.run()
Beispiel #37
0
class NoisyConvolutional2(Initializable, Feedforward, Random):
    """Convolutional transformation sent through a learned noisy channel.

    Applies the noise after the Relu rather than before it.

    Parameters (same as Convolutional)
    """
    @lazy(allocation=[
        'filter_size', 'num_filters', 'num_channels', 'noise_batch_size'])
    def __init__(self, filter_size, num_filters, num_channels, noise_batch_size,
                 image_size=(None, None), step=(1, 1), border_mode='valid',
                 tied_biases=True,
                 prior_mean=0, prior_noise_level=0, **kwargs):
        self.convolution = Convolutional()
        self.rectifier = Rectifier()
        self.mask = Convolutional(name='mask')
        children = [self.convolution, self.rectifier, self.mask]
        kwargs.setdefault('children', []).extend(children)
        super(NoisyConvolutional2, self).__init__(**kwargs)
        self.filter_size = filter_size
        self.num_filters = num_filters
        self.num_channels = num_channels
        self.noise_batch_size = noise_batch_size
        self.image_size = image_size
        self.step = step
        self.border_mode = border_mode
        self.tied_biases = tied_biases
        self.prior_mean = prior_mean
        self.prior_noise_level = prior_noise_level

    def _push_allocation_config(self):
        self.convolution.filter_size = self.filter_size
        self.convolution.num_filters = self.num_filters
        self.convolution.num_channels = self.num_channels
        # self.convolution.batch_size = self.batch_size
        self.convolution.image_size = self.image_size
        self.convolution.step = self.step
        self.convolution.border_mode = self.border_mode
        self.convolution.tied_biases = self.tied_biases
        self.mask.filter_size = (1, 1)
        self.mask.num_filters = self.num_filters
        self.mask.num_channels = self.num_filters
        # self.mask.batch_size = self.batch_size
        self.mask.image_size = self.convolution.get_dim('output')[1:]
        # self.mask.step = self.step
        # self.mask.border_mode = self.border_mode
        self.mask.tied_biases = self.tied_biases

    def _allocate(self):
        out_shape = self.convolution.get_dim('output')
        N = shared_floatx_zeros((self.noise_batch_size,) + out_shape, name='N')
        add_role(N, NOISE)
        self.parameters.append(N)

    @application(inputs=['input_'], outputs=['output'])
    def apply(self, input_, application_call):
        """Apply the linear transformation followed by masking with noise.
        Parameters
        ----------
        input_ : :class:`~tensor.TensorVariable`
            The input on which to apply the transformations
        Returns
        -------
        output : :class:`~tensor.TensorVariable`
            The transformed input
        """
        from theano.printing import Print

        pre_noise = self.rectifier.apply(self.convolution.apply(input_))
        # noise_level = self.mask.apply(input_)
        noise_level = (self.prior_noise_level
                - tensor.clip(self.mask.apply(pre_noise), -16, 16))
        noise_level = copy_and_tag_noise(
                noise_level, self, LOG_SIGMA, 'log_sigma')
        # Allow incomplete batches by just taking the noise that is needed
        noise = self.parameters[0][:noise_level.shape[0], :, :, :]
        # noise = self.theano_rng.normal(noise_level.shape)
        kl = (
            self.prior_noise_level - noise_level 
            + 0.5 * (
                tensor.exp(2 * noise_level)
                + (pre_noise - self.prior_mean) ** 2
                ) / tensor.exp(2 * self.prior_noise_level)
            - 0.5
            )
        application_call.add_auxiliary_variable(kl, roles=[NITS], name='nits')
        return pre_noise + tensor.exp(noise_level) * noise

    def get_dim(self, name):
        if name == 'input_':
            return self.convolution.get_dim(name)
        if name == 'output':
            return self.convolution.get_dim(name)
        if name == 'nits':
            return self.convolution.get_dim('output')
        return super(NoisyConvolutional2, self).get_dim(name)

    @property
    def num_output_channels(self):
        return self.num_filters
Beispiel #38
0
class ResidualConvolutional(Initializable):
    @lazy(allocation=['filter_size', 'num_filters', 'num_channels'])
    def __init__(self, filter_size, num_filters, num_channels,
                 batch_size=None,
                 mid_noise=False,
                 out_noise=False,
                 tied_noise=False,
                 tied_sigma=False,
                 noise_rate=None,
                 noise_batch_size=None,
                 prior_noise_level=None,
                 image_size=(None, None), step=(1, 1),
                 **kwargs):
        self.filter_size = filter_size
        self.num_filters = num_filters
        self.batch_size = batch_size
        self.num_channels = num_channels
        self.image_size = image_size
        self.mid_noise = mid_noise
        self.noise_batch_size = noise_batch_size
        self.noise_rate = noise_rate
        self.step = step
        self.border_mode = 'half'
        self.tied_biases = True
        depth = 2

        self.b0 = SpatialBatchNormalization(name='b0')
        self.r0 = Rectifier(name='r0')
        self.n0 = (SpatialNoise(name='n0', noise_rate=self.noise_rate,
                tied_noise=tied_noise, tied_sigma=tied_sigma,
                prior_noise_level=prior_noise_level) if mid_noise else None)
        self.c0 = Convolutional(name='c0')
        self.b1 = SpatialBatchNormalization(name='b1')
        self.r1 = Rectifier(name='r1')
        self.n1 = (SpatialNoise(name='n1', noise_rate=self.noise_rate,
                tied_noise=tied_noise, tied_sigma=tied_sigma,
                prior_noise_level=prior_noise_level) if out_noise else None)
        self.c1 = Convolutional(name='c1')
        kwargs.setdefault('children', []).extend([c for c in [
            self.c0, self.b0, self.r0, self.n0,
            self.c1, self.b1, self.r1, self.n1] if c is not None])
        super(ResidualConvolutional, self).__init__(**kwargs)

    def get_dim(self, name):
        if name == 'input_':
            return ((self.num_channels,) + self.image_size)
        if name == 'output':
            return self.c1.get_dim(name)
        return super(ResidualConvolutionalUnit, self).get_dim(name)

    @property
    def num_output_channels(self):
        return self.num_filters

    def _push_allocation_config(self):
        self.b0.input_dim = self.get_dim('input_')
        self.b0.push_allocation_config()
        if self.r0:
            self.r0.push_allocation_config()
        if self.n0:
            self.n0.noise_batch_size = self.noise_batch_size
            self.n0.num_channels = self.num_channels
            self.n0.image_size = self.image_size
        self.c0.filter_size = self.filter_size
        self.c0.batch_size = self.batch_size
        self.c0.num_channels = self.num_channels
        self.c0.num_filters = self.num_filters
        self.c0.border_mode = self.border_mode
        self.c0.image_size = self.image_size
        self.c0.step = self.step
        self.c0.use_bias = False
        self.c0.push_allocation_config()
        c0_shape = self.c0.get_dim('output')
        self.b1.input_dim = c0_shape
        self.b1.push_allocation_config()
        self.r1.push_allocation_config()
        if self.n1:
            self.n1.noise_batch_size = self.noise_batch_size
            self.n1.num_channels = self.num_filters
            self.n1.image_size = c0_shape[1:]
        self.c1.filter_size = self.filter_size
        self.c1.batch_size = self.batch_size
        self.c1.num_channels = self.num_filters
        self.c1.num_filters = self.num_filters
        self.c1.border_mode = self.border_mode
        self.c1.image_size = c0_shape[1:]
        self.c1.step = (1, 1)
        self.c1.use_bias = False
        self.c1.push_allocation_config()

    @application(inputs=['input_'], outputs=['output'])
    def apply(self, input_):
        shortcut = input_
        # Batchnorm, then Relu, then Convolution
        first_conv = self.b0.apply(input_)
        first_conv = self.r0.apply(first_conv)
        if self.n0:
            first_conv = self.n0.apply(first_conv)
        first_conv = self.c0.apply(first_conv)
        # Batchnorm, then Relu, then Convolution (second time)
        second_conv = self.b1.apply(first_conv)
        second_conv = self.r1.apply(second_conv)
        if self.n1:
            second_conv = self.n1.apply(second_conv)
        residual = second_conv

        # Apply stride and zero-padding to match shortcut to output
        if self.step and self.step != (1, 1):
            shortcut = shortcut[:,:,::self.step[0],::self.step[1]]
        if self.num_filters > self.num_channels:
            padshape = (residual.shape[0],
                    self.num_filters - self.num_channels,
                    residual.shape[2], residual.shape[3])
            shortcut = tensor.concatenate(
                    [shortcut, tensor.zeros(padshape, dtype=residual.dtype)],
                    axis=1)
        elif self.num_filters < self.num_channels:
            shortcut = shortcut[:,:self.num_channels,:,:]

        response = shortcut + residual
        return response