def __init__(self, rng, input, input_shape, activation=tanh, n_kernels=4, n_out=300, pool_out=False, mean_pool=True, sum_out=False, concat_out=False, skip_gram=False, mask=None, use_bias=False): """ Allocate a BigramLayer with shared variable internal parameters. """ self.input = input self.activation = activation # initialize weights with random weights n_in, n_out, fan_in, fan_out = get_input_info(input_shape, sum_out=sum_out, ngram=3, n_out=n_out) W_values = get_W_values(rng=rng, activation=activation, fan_in=fan_in, fan_out=fan_out, n_in=n_in, n_out=n_out, n_kernels=n_kernels) self.T1 = theano.shared(W_values, borrow=True, name="T1") self.T2 = theano.shared(W_values, borrow=True, name="T2") self.T3 = theano.shared(W_values, borrow=True, name="T3") self.b = theano.shared(np.zeros(shape=(n_out, ), dtype=theano.config.floatX), borrow=True, name="b_cov") offset = 4 if skip_gram else 2 self.mask = mask[:, : -offset] if mask is not None else None left = T.dot(input, self.T1)[:, : -offset] center = T.dot(input, self.T2)[:, offset / 2: -offset / 2] right = T.dot(input, self.T3)[:, offset:] cov_out = left + center + right + self.b if use_bias else left + center + right activation_out = activation(cov_out) # concatenate the output of each kernel if concat_out: trigram_sum = T.sum(activation_out, axis=1) self.output = trigram_sum.flatten(2) else: pooled_out = T.mean(activation_out, axis=2) if mean_pool else T.max(activation_out, axis=2) if self.mask is not None: trigram_sum = (pooled_out * self.mask[:, :, None]).sum(axis=1) trigram_avg = trigram_sum / self.mask.sum(axis=1)[:, None] else: trigram_sum = T.sum(pooled_out, axis=1) trigram_avg = trigram_sum / input_shape[0] if pool_out: self.output = pooled_out elif sum_out: self.output = trigram_sum else: self.output = trigram_avg self.params = [self.T1, self.T2, self.T3, self.b] if use_bias else [self.T1, self.T2, self.T3]
def __init__(self, rng, input, input_shape, activation=relu, n_kernels=4, n_out=300, pool_out=False, sum_out=True, mean_pool=True, concat_out=False, mask=None, skip_gram=False, use_bias=False): """ Allocate a UnigramLayer with shared variable internal parameters. """ self.input = input self.activation = activation self.mask = mask # initialize weights with random weights n_in, n_out, fan_in, fan_out = get_input_info(input_shape, sum_out=sum_out, ngram=1, n_out=n_out) W_values = get_W_values(rng=rng, activation=activation, fan_in=fan_in, fan_out=fan_out, n_in=n_in, n_out=n_out, n_kernels=n_kernels) self.W = theano.shared(W_values, borrow=True, name="W_cov") self.b = theano.shared(np.zeros(shape=(n_out, ), dtype=theano.config.floatX), borrow=True, name="b_cov") cov_out = T.dot(input, self.W) + self.b if use_bias else T.dot(input, self.W) activation_out = activation(cov_out) if concat_out: unigram_sum = T.sum(activation_out, axis=1) self.output = unigram_sum.flatten(2) else: pooled_out = T.mean(activation_out, axis=2) if mean_pool else T.max(activation_out, axis=2) if self.mask is not None: unigram_sum = (pooled_out * self.mask[:, :, None]).sum(axis=1) unigram_avg = unigram_sum / self.mask.sum(axis=1)[:, None] else: unigram_sum = T.sum(pooled_out, axis=1) unigram_avg = unigram_sum / input_shape[0] if pool_out: self.output = pooled_out elif sum_out: self.output = unigram_sum else: self.output = unigram_avg self.params = [self.W, self.b] if use_bias else [self.W]