Exemple #1
0
    def __init__(self, emb_dim, dim, num_input_words, 
                 num_output_words, vocab, 
                 **kwargs):
        if emb_dim == 0:
            emb_dim = dim
        if num_input_words == 0:
            num_input_words = vocab.size()
        if num_output_words == 0:
            num_output_words = vocab.size()

        self._num_input_words = num_input_words
        self._num_output_words = num_output_words
        self._vocab = vocab

        self._word_to_id = WordToIdOp(self._vocab)

        children = []

        self._main_lookup = LookupTable(self._num_input_words, emb_dim, name='main_lookup')
        self._encoder_fork = Linear(emb_dim, 4 * dim, name='encoder_fork')
        self._encoder_rnn = LSTM(dim, name='encoder_rnn')
        self._decoder_fork = Linear(emb_dim, 4 * dim, name='decoder_fork')
        self._decoder_rnn = LSTM(dim, name='decoder_rnn')
        children.extend([self._main_lookup,
                         self._encoder_fork, self._encoder_rnn,
                         self._decoder_fork, self._decoder_rnn])
        self._pre_softmax = Linear(dim, self._num_output_words)
        self._softmax = NDimensionalSoftmax()
        children.extend([self._pre_softmax, self._softmax])

        super(LanguageModel, self).__init__(children=children, **kwargs)
def rating_cost(pred_score,
                true_ratings,
                input_masks,
                output_masks,
                D,
                d,
                std=1.0,
                alpha=0.01):

    pred_score_cum = T.extra_ops.cumsum(pred_score, axis=2)
    prob_item_ratings = NDimensionalSoftmax(name='rating_cost_sf').apply(
        pred_score_cum, extra_ndim=1)
    accu_prob_1N = T.extra_ops.cumsum(prob_item_ratings, axis=2)
    accu_prob_N1 = T.extra_ops.cumsum(prob_item_ratings[:, :, ::-1],
                                      axis=2)[:, :, ::-1]
    mask1N = T.extra_ops.cumsum(true_ratings[:, :, ::-1], axis=2)[:, :, ::-1]
    maskN1 = T.extra_ops.cumsum(true_ratings, axis=2)
    cost_ordinal_1N = -T.sum(
        (T.log(prob_item_ratings) - T.log(accu_prob_1N)) * mask1N, axis=2)
    cost_ordinal_N1 = -T.sum(
        (T.log(prob_item_ratings) - T.log(accu_prob_N1)) * maskN1, axis=2)
    cost_ordinal = cost_ordinal_1N + cost_ordinal_N1
    nll_item_ratings = -(true_ratings * T.log(prob_item_ratings)).sum(axis=2)
    nll = std * nll_item_ratings.sum(
        axis=1) * 1.0 * D / (D - d + 1e-6) + alpha * cost_ordinal.sum(
            axis=1) * 1.0 * D / (D - d + 1e-6)
    cost = T.mean(nll)
    return cost, nll, nll_item_ratings, cost_ordinal_1N, cost_ordinal_N1, prob_item_ratings
Exemple #3
0
def softmax_layer(h, y, x_mask, y_mask, lens, vocab_size, hidden_size,
                  boosting):
    hidden_to_output = Linear(name='hidden_to_output',
                              input_dim=hidden_size,
                              output_dim=vocab_size)
    initialize([hidden_to_output])
    linear_output = hidden_to_output.apply(h)
    linear_output.name = 'linear_output'
    softmax = NDimensionalSoftmax()

    #y_hat = softmax.apply(linear_output, extra_ndim=1)
    #y_hat.name = 'y_hat'
    cost_a = softmax.categorical_cross_entropy(y, linear_output, extra_ndim=1)
    #produces correct average
    cost_a = cost_a * y_mask

    if boosting:
        #boosting step, must divide by length here
        lensMat = T.tile(lens, (y.shape[0], 1))
        cost_a = cost_a / lensMat

    #only count cost of correctly masked entries
    cost = cost_a.sum() / y_mask.sum()

    cost.name = 'cost'

    return (linear_output, cost)
Exemple #4
0
def softmax_layer(h, y, vocab_size, hidden_size):
    hidden_to_output = Linear(name='hidden_to_output',
                              input_dim=hidden_size,
                              output_dim=vocab_size)
    initialize([hidden_to_output])
    linear_output = hidden_to_output.apply(h)
    linear_output.name = 'linear_output'
    softmax = NDimensionalSoftmax()
    y_hat = softmax.apply(linear_output, extra_ndim=1)
    y_hat.name = 'y_hat'
    cost = softmax.categorical_cross_entropy(y, linear_output,
                                             extra_ndim=1).mean()
    cost.name = 'cost'
    return y_hat, cost
Exemple #5
0
def create_rnn(hidden_dim, vocab_dim, mode="rnn"):
    # input
    x = tensor.imatrix('inchar')
    y = tensor.imatrix('outchar')

    #
    W = LookupTable(
        name="W1",
        #dim = hidden_dim*4,
        dim=hidden_dim,
        length=vocab_dim,
        weights_init=initialization.IsotropicGaussian(0.01),
        biases_init=initialization.Constant(0))
    if mode == "lstm":
        # Long Short Term Memory
        H = LSTM(hidden_dim,
                 name='H',
                 weights_init=initialization.IsotropicGaussian(0.01),
                 biases_init=initialization.Constant(0.0))
    else:
        # recurrent history weight
        H = SimpleRecurrent(
            name="H",
            dim=hidden_dim,
            activation=Tanh(),
            weights_init=initialization.IsotropicGaussian(0.01))
    #
    S = Linear(name="W2",
               input_dim=hidden_dim,
               output_dim=vocab_dim,
               weights_init=initialization.IsotropicGaussian(0.01),
               biases_init=initialization.Constant(0))

    A = NDimensionalSoftmax(name="softmax")

    initLayers([W, H, S])
    activations = W.apply(x)
    hiddens = H.apply(activations)  #[0]
    activations2 = S.apply(hiddens)
    y_hat = A.apply(activations2, extra_ndim=1)
    cost = A.categorical_cross_entropy(y, activations2, extra_ndim=1).mean()

    cg = ComputationGraph(cost)
    #print VariableFilter(roles=[WEIGHT])(cg.variables)
    #W1,H,W2 = VariableFilter(roles=[WEIGHT])(cg.variables)

    layers = (x, W, H, S, A, y)

    return cg, layers, y_hat, cost
 def __init__(self,
              lm_costs_name,
              lm_weight,
              normalize_am_weights=False,
              normalize_lm_weights=False,
              normalize_tot_weights=True,
              am_beta=1.0,
              **kwargs):
     super(ShallowFusionReadout, self).__init__(**kwargs)
     self.lm_costs_name = lm_costs_name
     self.lm_weight = lm_weight
     self.normalize_am_weights = normalize_am_weights
     self.normalize_lm_weights = normalize_lm_weights
     self.normalize_tot_weights = normalize_tot_weights
     self.am_beta = am_beta
     self.softmax = NDimensionalSoftmax()
     self.children += [self.softmax]
Exemple #7
0
def softmax_output_layer(x, h, y, in_size, out_size, hidden_size, pred):
    if connect_h_to_o:
        hidden_to_output = Linear(name='hidden_to_output' + str(pred),
                                  input_dim=hidden_size * len(h),
                                  output_dim=out_size)
        hiddens = T.concatenate([hidden for hidden in h], axis=2)
    else:
        hidden_to_output = Linear(name='hidden_to_output' + str(pred),
                                  input_dim=hidden_size,
                                  output_dim=out_size)
        hiddens = h[-1]
    initialize([hidden_to_output])
    linear_output = hidden_to_output.apply(hiddens)
    linear_output.name = 'linear_output'
    softmax = NDimensionalSoftmax()
    extra_ndim = 1 if single_dim_out else 2
    y_hat = softmax.apply(linear_output, extra_ndim=extra_ndim)
    cost = softmax.categorical_cross_entropy(y,
                                             linear_output,
                                             extra_ndim=extra_ndim).mean()

    return y_hat, cost
    def softmax_layer(self, h, y):
        """
        Perform Softmax over the hidden state in order to
        predict the next word in the sequence and compute
        the loss.
        :param h The hidden state sequence
        :param y The target words
        """
        hidden_to_output = Linear(name='hidden_to_output', input_dim=self.hidden_size,
                                  output_dim=self.vocab_size)
        initialize(hidden_to_output, sqrt(6.0 / (self.hidden_size + self.vocab_size)))

        linear_output = hidden_to_output.apply(h)
        linear_output.name = 'linear_output'
        softmax = NDimensionalSoftmax(name="lm_softmax")
        y_hat = softmax.log_probabilities(linear_output, extra_ndim=1)
        y_hat.name = 'y_hat'

        cost = softmax.categorical_cross_entropy(y, linear_output, extra_ndim=1).mean()

        cost.name = 'cost'
        return y_hat, cost
Exemple #9
0
    def __init__(self, visual_dim, textual_dim, output_dim, hidden_size,
                 init_ranges, **kwargs):
        (visual_range, textual_range, linear_range_1, linear_range_2,
         linear_range_3) = init_ranges
        manager_dim = visual_dim + textual_dim
        visual_mlp = MLPGenreClassifier(
            visual_dim,
            output_dim,
            hidden_size, [linear_range_1, linear_range_2, linear_range_3],
            name='visual_mlp')
        textual_mlp = MLPGenreClassifier(
            textual_dim,
            output_dim,
            hidden_size, [linear_range_1, linear_range_2, linear_range_3],
            name='textual_mlp')
        # manager_mlp = MLPGenreClassifier(manager_dim, 2, hidden_size, [
        # linear_range_1, linear_range_2, linear_range_3], output_act=Softmax,
        # name='manager_mlp')
        bn = BatchNormalization(input_dim=manager_dim, name='bn3')
        manager_mlp = Sequence([
            Linear(manager_dim,
                   2,
                   name='linear_output',
                   use_bias=False,
                   weights_init=initialization.Uniform(
                       width=linear_range_1)).apply,
        ],
                               name='manager_mlp')
        fork = Fork(
            input_dim=manager_dim,
            output_dims=[2] * output_dim,
            prototype=manager_mlp,
            output_names=['linear_' + str(i) for i in range(output_dim)])

        children = [visual_mlp, textual_mlp, fork, bn, NDimensionalSoftmax()]
        kwargs.setdefault('use_bias', False)
        kwargs.setdefault('children', children)
        super(MoEClassifier, self).__init__(**kwargs)
Exemple #10
0
    def __init__(self, mlp, dim, k, const=1e-5, **kwargs):
        super(GMMMLP, self).__init__(**kwargs)

        self.dim = dim
        self.const = const
        self.k = k
        input_dim = mlp.output_dim
        self.mu = MLP(activations=[Identity()],
                      dims=[input_dim, dim],
                      name=self.name + "_mu")
        self.sigma = MLP(activations=[SoftPlus()],
                         dims=[input_dim, dim],
                         name=self.name + "_sigma")

        self.coeff = MLP(activations=[Identity()],
                         dims=[input_dim, k],
                         name=self.name + "_coeff")

        self.coeff2 = NDimensionalSoftmax()
        self.mlp = mlp
        self.children = [
            self.mlp, self.mu, self.sigma, self.coeff, self.coeff2
        ]
Exemple #11
0
    def __init__(self,
                 emb_dim,
                 emb_def_dim,
                 dim,
                 num_input_words,
                 def_num_input_words,
                 num_output_words,
                 vocab,
                 retrieval=None,
                 def_reader='LSTM',
                 standalone_def_lookup=True,
                 standalone_def_rnn=True,
                 disregard_word_embeddings=False,
                 compose_type='sum',
                 very_rare_threshold=[10],
                 cache_size=0,
                 **kwargs):
        # TODO(tombosc): document
        if emb_dim == 0:
            emb_dim = dim
        if emb_def_dim == 0:
            emb_def_dim = emb_dim
        if num_input_words == 0:
            num_input_words = vocab.size()
        if def_num_input_words == 0:
            def_num_input_words = num_input_words

        if (num_input_words !=
                def_num_input_words) and (not standalone_def_lookup):
            raise NotImplementedError()

        self._very_rare_threshold = very_rare_threshold
        self._num_input_words = num_input_words
        self._num_output_words = num_output_words
        self._vocab = vocab
        self._retrieval = retrieval
        self._disregard_word_embeddings = disregard_word_embeddings
        self._compose_type = compose_type

        self._word_to_id = WordToIdOp(self._vocab)
        self._word_to_count = WordToCountOp(self._vocab)

        children = []
        self._cache = None
        if cache_size > 0:
            #TODO(tombosc) do we implement cache as LookupTable or theano matrix?
            #self._cache = theano.shared(np.zeros((def_num_input_words, emb_dim)))
            self._cache = LookupTable(cache_size,
                                      emb_dim,
                                      name='cache_def_embeddings')
            children.append(self._cache)

        if self._retrieval:
            self._retrieve = RetrievalOp(retrieval)

        self._main_lookup = LookupTable(self._num_input_words,
                                        emb_dim,
                                        name='main_lookup')
        self._main_fork = Linear(emb_dim, 4 * dim, name='main_fork')
        self._main_rnn = DebugLSTM(
            dim, name='main_rnn')  # TODO(tombosc): use regular LSTM?
        children.extend([self._main_lookup, self._main_fork, self._main_rnn])
        if self._retrieval:
            if standalone_def_lookup:
                lookup = None
            else:
                if emb_dim != emb_def_dim:
                    raise ValueError(
                        "emb_dim != emb_def_dim: cannot share lookup")
                lookup = self._main_lookup

            if def_reader == 'LSTM':
                if standalone_def_rnn:
                    fork_and_rnn = None
                else:
                    fork_and_rnn = (self._main_fork, self._main_rnn)
                self._def_reader = LSTMReadDefinitions(def_num_input_words,
                                                       emb_def_dim,
                                                       dim,
                                                       vocab,
                                                       lookup,
                                                       fork_and_rnn,
                                                       cache=self._cache)

            elif def_reader == 'mean':
                self._def_reader = MeanPoolReadDefinitions(
                    def_num_input_words,
                    emb_def_dim,
                    dim,
                    vocab,
                    lookup,
                    translate=(emb_def_dim != dim),
                    normalize=False)
            else:
                raise Exception("def reader not understood")

            self._combiner = MeanPoolCombiner(dim=dim,
                                              emb_dim=emb_dim,
                                              compose_type=compose_type)

            children.extend([self._def_reader, self._combiner])

        self._pre_softmax = Linear(dim, self._num_output_words)
        self._softmax = NDimensionalSoftmax()
        children.extend([self._pre_softmax, self._softmax])

        super(LanguageModel, self).__init__(children=children, **kwargs)
Exemple #12
0
    def __init__(self,
                 input1_size,
                 input2_size,
                 lookup1_dim=200,
                 lookup2_dim=200,
                 hidden_size=512):
        self.hidden_size = hidden_size
        self.input1_size = input1_size
        self.input2_size = input2_size
        self.lookup1_dim = lookup1_dim
        self.lookup2_dim = lookup2_dim

        x1 = tensor.lmatrix('durations')
        x2 = tensor.lmatrix('syllables')
        y = tensor.lmatrix('pitches')

        lookup1 = LookupTable(dim=self.lookup1_dim,
                              length=self.input1_size,
                              name='lookup1',
                              weights_init=initialization.Uniform(width=0.01),
                              biases_init=Constant(0))
        lookup1.initialize()
        lookup2 = LookupTable(dim=self.lookup2_dim,
                              length=self.input2_size,
                              name='lookup2',
                              weights_init=initialization.Uniform(width=0.01),
                              biases_init=Constant(0))
        lookup2.initialize()
        merge = Merge(['lookup1', 'lookup2'],
                      [self.lookup1_dim, self.lookup2_dim],
                      self.hidden_size,
                      weights_init=initialization.Uniform(width=0.01),
                      biases_init=Constant(0))
        merge.initialize()
        recurrent_block = LSTM(
            dim=self.hidden_size,
            activation=Tanh(),
            weights_init=initialization.Uniform(width=0.01)
        )  #RecurrentStack([LSTM(dim=self.hidden_size, activation=Tanh())] * 3)
        recurrent_block.initialize()
        linear = Linear(input_dim=self.hidden_size,
                        output_dim=self.input1_size,
                        weights_init=initialization.Uniform(width=0.01),
                        biases_init=Constant(0))
        linear.initialize()
        softmax = NDimensionalSoftmax()

        l1 = lookup1.apply(x1)
        l2 = lookup2.apply(x2)
        m = merge.apply(l1, l2)
        h = recurrent_block.apply(m)
        a = linear.apply(h)

        y_hat = softmax.apply(a, extra_ndim=1)
        # ValueError: x must be 1-d or 2-d tensor of floats. Got TensorType(float64, 3D)

        self.Cost = softmax.categorical_cross_entropy(y, a,
                                                      extra_ndim=1).mean()

        self.ComputationGraph = ComputationGraph(self.Cost)

        self.Model = Model(y_hat)
        H1 = dims[i]
        H2 = dims[i + 1]
        width = 2 * np.sqrt(6) / np.sqrt(H1 + H2)
        #         std = np.sqrt(2. / dim)
        linear_layers[i].weights_init = Uniform(width=width)

#     NADE_CF_model.children[0].weights_init = Constant(1)
#     NADE_CF_model.children[0].biases_init = Constant(1.5)
#     NADE_CF_model.children[1].weights_init = Constant(2)
#     NADE_CF_model.children[1].biases_init = Constant(2.5)
    NADE_CF_model.initialize()
    NADE_CF_model.children[-1].parameters[-1].set_value(
        init_b.astype(theano.config.floatX))
    y = NADE_CF_model.apply(input_ratings_cum)
    y_cum = T.extra_ops.cumsum(y, axis=2)
    predicted_ratings = NDimensionalSoftmax().apply(y_cum, extra_ndim=1)
    d = input_masks.sum(axis=1)
    D = (input_masks + output_masks).sum(axis=1)
    #     ratings = T.tensor3(name="ratings", dtype=theano.config.floatX)
    cost, nll, nll_item_ratings, cost_ordinal_1N, cost_ordinal_N1, prob_item_ratings = rating_cost(
        y,
        output_ratings,
        input_masks,
        output_masks,
        D,
        d,
        alpha=alpha,
        std=std)
    cost.name = 'cost'

    cg = ComputationGraph(cost)
Exemple #14
0
    def __init__(self,
                 emb_dim,
                 dim,
                 num_input_words,
                 num_output_words,
                 vocab,
                 proximity_coef=0,
                 proximity_distance='l2',
                 encoder='lstm',
                 decoder='lstm',
                 shared_rnn=False,
                 translate_layer=None,
                 word_dropout=0.,
                 tied_in_out=False,
                 vocab_keys=None,
                 seed=0,
                 reconstruction_coef=1.,
                 provide_targets=False,
                 **kwargs):
        """
        translate_layer: either a string containing the activation function to use
                         either a list containg the list of activations for a MLP
        """
        if emb_dim == 0:
            emb_dim = dim
        if num_input_words == 0:
            num_input_words = vocab.size()
        if num_output_words == 0:
            num_output_words = vocab.size()

        self._word_dropout = word_dropout

        self._tied_in_out = tied_in_out

        if not encoder:
            if proximity_coef:
                raise ValueError("Err: meaningless penalty term (no encoder)")
            if not vocab_keys:
                raise ValueError("Err: specify a key vocabulary (no encoder)")

        if tied_in_out and num_input_words != num_output_words:
            raise ValueError("Can't tie in and out embeddings. Different "
                             "vocabulary size")
        if shared_rnn and (encoder != 'lstm' or decoder != 'lstm'):
            raise ValueError(
                "can't share RNN because either encoder or decoder"
                "is not an RNN")
        if shared_rnn and decoder == 'lstm_c':
            raise ValueError(
                "can't share RNN because the decoder takes different"
                "inputs")
        if word_dropout < 0 or word_dropout > 1:
            raise ValueError("invalid value for word dropout",
                             str(word_dropout))
        if proximity_distance not in ['l1', 'l2', 'cos']:
            raise ValueError(
                "unrecognized distance: {}".format(proximity_distance))

        if proximity_coef and emb_dim != dim and not translate_layer:
            raise ValueError(
                """if proximity penalisation, emb_dim should equal dim or 
                              there should be a translate layer""")

        if encoder not in [
                None, 'lstm', 'bilstm', 'mean', 'weighted_mean', 'max_bilstm',
                'bilstm_sum', 'max_bilstm_sum'
        ]:
            raise ValueError('encoder not recognized')
        if decoder not in ['skip-gram', 'lstm', 'lstm_c']:
            raise ValueError('decoder not recognized')

        self._proximity_distance = proximity_distance
        self._decoder = decoder
        self._encoder = encoder
        self._num_input_words = num_input_words
        self._num_output_words = num_output_words
        self._vocab = vocab
        self._proximity_coef = proximity_coef
        self._reconstruction_coef = reconstruction_coef
        self._provide_targets = provide_targets

        self._word_to_id = WordToIdOp(self._vocab)
        if vocab_keys:
            self._key_to_id = WordToIdOp(vocab_keys)

        children = []

        if encoder or (not encoder and decoder in ['lstm', 'lstm_c']):
            self._main_lookup = LookupTable(self._num_input_words,
                                            emb_dim,
                                            name='main_lookup')
            children.append(self._main_lookup)
        if provide_targets:
            # this is useful to simulate Hill's baseline without pretrained embeddings
            # in the encoder, only as targets for the encoder.
            self._target_lookup = LookupTable(self._num_input_words,
                                              emb_dim,
                                              name='target_lookup')
            children.append(self._target_lookup)
        if not encoder:
            self._key_lookup = LookupTable(vocab_keys.size(),
                                           emb_dim,
                                           name='key_lookup')
            children.append(self._key_lookup)
        elif encoder == 'lstm':
            self._encoder_fork = Linear(emb_dim, 4 * dim, name='encoder_fork')
            self._encoder_rnn = LSTM(dim, name='encoder_rnn')
            children.extend([self._encoder_fork, self._encoder_rnn])
        elif encoder in ['bilstm', 'max_bilstm']:
            # dim is the dim of the concatenated vector
            self._encoder_fork = Linear(emb_dim, 2 * dim, name='encoder_fork')
            self._encoder_rnn = Bidirectional(LSTM(dim / 2,
                                                   name='encoder_rnn'))
            children.extend([self._encoder_fork, self._encoder_rnn])
        elif encoder in ['bilstm_sum', 'max_bilstm_sum']:
            self._encoder_fork = Linear(emb_dim, 4 * dim, name='encoder_fork')
            self._encoder_rnn = BidirectionalSum(LSTM(dim, name='encoder_rnn'))
            children.extend([self._encoder_fork, self._encoder_rnn])
        elif encoder == 'mean':
            pass
        elif encoder == 'weighted_mean':
            self._encoder_w = MLP([Logistic()], [dim, 1],
                                  name="encoder_weights")
            children.extend([self._encoder_w])
        else:
            raise NotImplementedError()

        if decoder in ['lstm', 'lstm_c']:
            dim_after_translate = emb_dim
            if shared_rnn:
                self._decoder_fork = self._encoder_fork
                self._decoder_rnn = self._encoder_rnn
            else:
                if decoder == 'lstm_c':
                    dim_2 = dim + emb_dim
                else:
                    dim_2 = dim
                self._decoder_fork = Linear(dim_2,
                                            4 * dim,
                                            name='decoder_fork')
                self._decoder_rnn = LSTM(dim, name='decoder_rnn')
            children.extend([self._decoder_fork, self._decoder_rnn])
        elif decoder == 'skip-gram':
            dim_after_translate = emb_dim

        self._translate_layer = None
        activations = {'sigmoid': Logistic(), 'tanh': Tanh(), 'linear': None}

        if translate_layer:
            if type(translate_layer) == str:
                translate_layer = [translate_layer]
            assert (type(translate_layer) == list)
            activations_translate = [activations[a] for a in translate_layer]
            dims_translate = [
                dim,
            ] * len(translate_layer) + [dim_after_translate]
            self._translate_layer = MLP(activations_translate,
                                        dims_translate,
                                        name="translate_layer")
            children.append(self._translate_layer)

        if not self._tied_in_out:
            self._pre_softmax = Linear(emb_dim, self._num_output_words)
            children.append(self._pre_softmax)
        if decoder in ['lstm', 'lstm_c']:
            self._softmax = NDimensionalSoftmax()
        elif decoder in ['skip-gram']:
            self._softmax = Softmax()
        children.append(self._softmax)

        super(Seq2Seq, self).__init__(children=children, **kwargs)
 def __init__(self,
              reward_brick,
              compute_targets,
              compute_policy,
              solve_bellman,
              freeze_actor,
              freeze_critic,
              critic_uses_actor_states,
              critic_uses_groundtruth,
              critic=None,
              critic_burnin_steps=None,
              critic_policy_t=None,
              entropy_reward_coof=None,
              cross_entropy_reward_coof=None,
              discount=None,
              value_penalty=None,
              value_softmax=False,
              same_value_for_wrong=False,
              accumulate_outputs=False,
              use_value_biases=None,
              actor_grad_estimate=None,
              bos_token=None,
              **kwargs):
     super(ActorCriticReadout, self).__init__(**kwargs)
     self.reward_brick = reward_brick
     self.critic = critic
     self.freeze_actor = freeze_actor
     self.freeze_critic = freeze_critic
     self.critic_uses_actor_states = critic_uses_actor_states
     self.critic_uses_groundtruth = (critic_uses_groundtruth
                                     if critic_uses_groundtruth is not None
                                     else True)
     self.critic_burnin_steps = (critic_burnin_steps
                                 if critic_burnin_steps is not None else 0)
     self.value_summand = Linear(output_dim=1, name='summand')
     self.softmax_t = 1.
     self.critic_policy_t = (critic_policy_t
                             if critic_policy_t is not None else 1.0)
     self.epsilon = 0.
     self.discount = (discount if discount is not None else 1.)
     self.entropy_reward_coof = (entropy_reward_coof
                                 if entropy_reward_coof is not None else 0.)
     self.cross_entropy_reward_coof = (cross_entropy_reward_coof
                                       if cross_entropy_reward_coof
                                       is not None else 0.)
     self.value_penalty = value_penalty
     self.value_softmax = value_softmax
     self.same_value_for_wrong = same_value_for_wrong
     self.compute_targets = compute_targets
     self.compute_policy = compute_policy
     self.solve_bellman = solve_bellman
     self.accumulate_outputs = accumulate_outputs
     self.use_value_biases = (use_value_biases
                              if use_value_biases is not None else True)
     self.actor_grad_estimate = (actor_grad_estimate
                                 if actor_grad_estimate else 'all_actions')
     self.bos_token = bos_token
     self.softmax = NDimensionalSoftmax()
     self.children += [reward_brick, self.value_summand, self.softmax]
     if self.critic:
         self.children.append(self.critic)
     self.costs.inputs += ['attended', 'attended_mask']
Exemple #16
0
    def __init__(self, mlp, target_size, frame_size, k, frnn_hidden_size, \
            frnn_step_size, const=1e-5, **kwargs):

        super(FRNNEmitter, self).__init__(**kwargs)

        self.mlp = mlp
        self.target_size = target_size
        self.frame_size = frame_size
        self.k = k
        self.frnn_hidden_size = frnn_hidden_size
        self.const = const
        self.input_dim = self.mlp.output_dim

        self.frnn_step_size = frnn_step_size

        # adding a step if the division is not exact.
        self.number_of_steps = frame_size // frnn_step_size
        self.last_steps = frame_size % frnn_step_size
        if self.last_steps != 0:
            self.number_of_steps += 1

        self.mu = MLP(activations=[Identity()],
                dims=[frnn_hidden_size, k*frnn_step_size],
                name=self.name + "_mu")
        self.sigma = MLP(activations=[SoftPlus()],
                dims=[frnn_hidden_size, k*frnn_step_size],
                name=self.name + "_sigma")

        self.coeff = MLP(activations=[Identity()],
                dims=[frnn_hidden_size, k],
                name=self.name + "_coeff")

        self.coeff2 = NDimensionalSoftmax()

        self.frnn_initial_state = Linear(
            input_dim = self.input_dim,
            output_dim=frnn_hidden_size,
            name="frnn_initial_state")

        #self.frnn_hidden = Linear(
        #    input_dim=frnn_hidden_size,
        #    output_dim=frnn_hidden_size,
        #    activation=Tanh(),
        #    name="frnn_hidden")

        self.frnn_activation = Tanh(
            name="frnn_activation")

        self.frnn_linear_transition_state = Linear (
            input_dim = frnn_hidden_size,
            output_dim= frnn_hidden_size,
            name="frnn_linear_transition_state")

        self.frnn_linear_transition_input = Linear (
            input_dim = self.frnn_step_size,
            output_dim = frnn_hidden_size,
            name="frnn_linear_transition_input")

        #self.frnn_linear_transition_output = Linear (
        #    input_dim = frnn_hidden_size,
        #    output_dim = self.rnn_hidden_dim,
        #    name="frnn_linear_transition_output")

        self.children = [self.mlp,self.mu,self.sigma,self.coeff,
            self.coeff2,self.frnn_initial_state,self.frnn_activation,
            self.frnn_linear_transition_state,
            self.frnn_linear_transition_input]
Exemple #17
0
 def __init__(self, initial_output=0, **kwargs):
     self.initial_output = initial_output
     self.softmax = NDimensionalSoftmax()
     children = [self.softmax]
     kwargs.setdefault('children', []).extend(children)
     super(SoftmaxEmitter, self).__init__(**kwargs)
Exemple #18
0
    def __init__(
            self,
            dim,
            emb_dim,
            vocab,
            def_emb_translate_dim=-1,
            def_dim=-1,
            encoder='bilstm',
            bn=True,
            def_reader=None,
            def_combiner=None,
            dropout=0.5,
            num_input_words=-1,
            # Others
            **kwargs):

        self._dropout = dropout
        self._vocab = vocab
        self._emb_dim = emb_dim
        self._def_reader = def_reader
        self._def_combiner = def_combiner

        if encoder != 'bilstm':
            raise NotImplementedError()

        if def_emb_translate_dim < 0:
            self.def_emb_translate_dim = emb_dim
        else:
            self.def_emb_translate_dim = def_emb_translate_dim

        if def_dim < 0:
            self._def_dim = emb_dim
        else:
            self._def_dim = def_dim

        if num_input_words > 0:
            logger.info("Restricting vocab to " + str(num_input_words))
            self._num_input_words = num_input_words
        else:
            self._num_input_words = vocab.size()

        children = []

        if self.def_emb_translate_dim != self._emb_dim:
            self._translate_pre_def = Linear(input_dim=emb_dim,
                                             output_dim=def_emb_translate_dim)
            children.append(self._translate_pre_def)
        else:
            self._translate_pre_def = None

        ## Embedding
        self._lookup = LookupTable(self._num_input_words,
                                   emb_dim,
                                   weights_init=GlorotUniform())
        children.append(self._lookup)

        if def_reader:
            self._final_emb_dim = self._def_dim
            self._def_reader = def_reader
            self._def_combiner = def_combiner
            children.extend([self._def_reader, self._def_combiner])
        else:
            self._final_emb_dim = self._emb_dim

        ## BiLSTM
        self._hyp_bidir_fork = Linear(
            self._def_dim if def_reader else self._emb_dim,
            4 * dim,
            name='hyp_bidir_fork')
        self._hyp_bidir = Bidirectional(LSTM(dim), name='hyp_bidir')
        self._prem_bidir_fork = Linear(
            self._def_dim if def_reader else self._emb_dim,
            4 * dim,
            name='prem_bidir_fork')
        self._prem_bidir = Bidirectional(LSTM(dim), name='prem_bidir')
        children.extend([self._hyp_bidir_fork, self._hyp_bidir])
        children.extend([self._prem_bidir, self._prem_bidir_fork])

        ## BiLSTM no. 2 (encoded attentioned embeddings)
        self._hyp_bidir_fork2 = Linear(8 * dim,
                                       4 * dim,
                                       name='hyp_bidir_fork2')
        self._hyp_bidir2 = Bidirectional(LSTM(dim), name='hyp_bidir2')
        self._prem_bidir_fork2 = Linear(8 * dim,
                                        4 * dim,
                                        name='prem_bidir_fork2')
        self._prem_bidir2 = Bidirectional(LSTM(dim), name='prem_bidir2')
        children.extend([self._hyp_bidir_fork2, self._hyp_bidir2])
        children.extend([self._prem_bidir2, self._prem_bidir_fork2])

        self._rnns = [
            self._prem_bidir2, self._hyp_bidir2, self._prem_bidir,
            self._hyp_bidir
        ]

        ## MLP
        if bn:
            self._mlp = BatchNormalizedMLP([Tanh()], [8 * dim, dim],
                                           conserve_memory=False,
                                           name="mlp")
            self._pred = BatchNormalizedMLP([Softmax()], [dim, 3],
                                            conserve_memory=False,
                                            name="pred_mlp")
        else:
            self._mlp = MLP([Tanh()], [8 * dim, dim], name="mlp")
            self._pred = MLP([Softmax()], [dim, 3], name="pred_mlp")

        children.append(self._mlp)
        children.append(self._pred)

        ## Softmax
        self._ndim_softmax = NDimensionalSoftmax()
        children.append(self._ndim_softmax)

        super(ESIM, self).__init__(children=children, **kwargs)
Exemple #19
0
    lstm = AssociativeLSTM(activation=Tanh(),
                           dim=h_dim,
                           num_copies=num_copies,
                           use_W_xu=use_W_xu,
                           name="lstm")
else:
    lstm = LSTM(activation=Tanh(),
                dim=h_dim,
                bias=bias,
                name="lstm")
h, c = lstm.apply(x_transform)
h_to_o = Linear(name='h_to_o',
                input_dim=h_dim,
                output_dim=o_dim)
o = h_to_o.apply(h)
o = NDimensionalSoftmax().apply(o, extra_ndim=1)

for brick in (lstm, x_to_h, h_to_o):
    brick.weights_init = Glorot()
    brick.biases_init = Constant(0)
    brick.initialize()

cost = CategoricalCrossEntropy().apply(y, o)
cost.name = 'CE'

print 'Bulding training process...'
shapes = []
for param in ComputationGraph(cost).parameters:
    # shapes.append((param.name, param.eval().shape))
    shapes.append(np.prod(list(param.eval().shape)))
print "Total number of parameters: " + str(np.sum(shapes))
Exemple #20
0
# ******************* Model *******************
recognizer = SimpleSpeechRecognizer(transition=transition,
                                    dims_transition=conf.dims_transition,
                                    num_features=num_features,
                                    num_classes=num_classes)

#recognizer = SpeechRecognizer(
#    num_features=num_features, dims_bottom=[],
#    dims_bidir=conf.dims_transition, dims_top=[num_classes],
#    bidir_trans=GatedRecurrent, bottom_activation=None)

# ******************* output *******************
y_hat = recognizer.apply(x, x_m)
y_hat.name = 'outputs'
y_hat_softmax = NDimensionalSoftmax().apply(y_hat, extra_ndim=y_hat.ndim - 2)
y_hat_softmax.name = 'outputs_softmax'

# there is a cost function for monitoring and for training, because one is more stable to compute
# gradients and seems also to be more memory efficient, but does not compute the true cost.
if conf.task == 'CTC':
    cost_train = ctc.pseudo_cost(y, y_hat, y_m, x_m).mean()
    cost_train.name = "cost_train"

    cost_monitor = ctc.cost(y, y_hat_softmax, y_m, x_m).mean()
    cost_monitor.name = "cost_monitor"
elif conf.task == 'framewise':
    cost_train = categorical_crossentropy_batch().apply(y_hat_softmax, y, x_m)
    cost_train.name = 'cost'
    cost_monitor = cost_train
else:
Exemple #21
0
linear_input.initialize()

rnn = SimpleRecurrent(name='hidden',
                      dim=hidden_layer_dim,
                      activation=Tanh(),
                      weights_init=initialization.Uniform(width=0.01))
rnn.initialize()

linear_output = Linear(name='linear_output',
                       input_dim=hidden_layer_dim,
                       output_dim=train_dataset.durations_vocab_size(),
                       weights_init=initialization.Uniform(width=0.01),
                       biases_init=Constant(0))
linear_output.initialize()

softmax = NDimensionalSoftmax(name='ndim_softmax')

activation_input = lookup_input.apply(x)
hidden = rnn.apply(linear_input.apply(activation_input))
activation_output = linear_output.apply(hidden)
y_est = softmax.apply(activation_output, extra_ndim=1)

cost = softmax.categorical_cross_entropy(y, activation_output,
                                         extra_ndim=1).mean()

from blocks.graph import ComputationGraph
from blocks.algorithms import GradientDescent, Adam

cg = ComputationGraph([cost])

step_rules = [RMSProp(learning_rate=0.002, decay_rate=0.95), StepClipping(1.0)]
 def __init__(self, initial_output=0, **kwargs):
     self.initial_output = initial_output
     self.softmax = NDimensionalSoftmax()
     children = [self.softmax] + kwargs.get('children', [])
     super(SoftmaxEmitter, self).__init__(children=children, **kwargs)
    def __init__(self, dim, emb_dim, readout_dims, num_input_words,
                 def_num_input_words, vocab, use_definitions, def_word_gating,
                 compose_type, coattention, def_reader, reuse_word_embeddings,
                 random_unk, **kwargs):
        self._vocab = vocab
        if emb_dim == 0:
            emb_dim = dim
        if num_input_words == 0:
            num_input_words = vocab.size()
        if def_num_input_words == 0:
            def_num_input_words = num_input_words

        self._coattention = coattention
        self._num_input_words = num_input_words
        self._use_definitions = use_definitions
        self._random_unk = random_unk
        self._reuse_word_embeddings = reuse_word_embeddings

        lookup_num_words = num_input_words
        if reuse_word_embeddings:
            lookup_num_words = max(num_input_words, def_num_input_words)
        if random_unk:
            lookup_num_words = vocab.size()

        # Dima: we can have slightly less copy-paste here if we
        # copy the RecurrentFromFork class from my other projects.
        children = []
        self._lookup = LookupTable(lookup_num_words, emb_dim)
        self._encoder_fork = Linear(emb_dim, 4 * dim, name='encoder_fork')
        self._encoder_rnn = LSTM(dim, name='encoder_rnn')
        self._question_transform = Linear(dim, dim, name='question_transform')
        self._bidir_fork = Linear(3 * dim if coattention else 2 * dim,
                                  4 * dim,
                                  name='bidir_fork')
        self._bidir = Bidirectional(LSTM(dim), name='bidir')
        children.extend([
            self._lookup, self._encoder_fork, self._encoder_rnn,
            self._question_transform, self._bidir, self._bidir_fork
        ])

        activations = [Rectifier()] * len(readout_dims) + [None]
        readout_dims = [2 * dim] + readout_dims + [1]
        self._begin_readout = MLP(activations,
                                  readout_dims,
                                  name='begin_readout')
        self._end_readout = MLP(activations, readout_dims, name='end_readout')
        self._softmax = NDimensionalSoftmax()
        children.extend(
            [self._begin_readout, self._end_readout, self._softmax])

        if self._use_definitions:
            # A potential bug here: we pass the same vocab to the def reader.
            # If a different token is reserved for UNK in text and in the definitions,
            # we can be screwed.
            def_reader_class = eval(def_reader)
            def_reader_kwargs = dict(
                num_input_words=def_num_input_words,
                dim=dim,
                emb_dim=emb_dim,
                vocab=vocab,
                lookup=self._lookup if reuse_word_embeddings else None)
            if def_reader_class == MeanPoolReadDefinitions:
                def_reader_kwargs.update(dict(normalize=True, translate=False))
            self._def_reader = def_reader_class(**def_reader_kwargs)
            self._combiner = MeanPoolCombiner(dim=dim,
                                              emb_dim=emb_dim,
                                              def_word_gating=def_word_gating,
                                              compose_type=compose_type)
            children.extend([self._def_reader, self._combiner])

        super(ExtractiveQAModel, self).__init__(children=children, **kwargs)

        # create default input variables
        self.contexts = tensor.lmatrix('contexts')
        self.context_mask = tensor.matrix('contexts_mask')
        self.questions = tensor.lmatrix('questions')
        self.question_mask = tensor.matrix('questions_mask')
        self.answer_begins = tensor.lvector('answer_begins')
        self.answer_ends = tensor.lvector('answer_ends')
        input_vars = [
            self.contexts, self.context_mask, self.questions,
            self.question_mask, self.answer_begins, self.answer_ends
        ]
        if self._use_definitions:
            self.defs = tensor.lmatrix('defs')
            self.def_mask = tensor.matrix('def_mask')
            self.contexts_def_map = tensor.lmatrix('contexts_def_map')
            self.questions_def_map = tensor.lmatrix('questions_def_map')
            input_vars.extend([
                self.defs, self.def_mask, self.contexts_def_map,
                self.questions_def_map
            ])
        self.input_vars = OrderedDict([(var.name, var) for var in input_vars])
Exemple #24
0
out3 = Fire((55,55), 128, 32, 32, 32, out2, 300)
out31 = MaxPooling((3,3), step=(2,2), padding=(1,1), name='poolLow').apply(out3)
out4 = Fire((28,28), 256, 32, 32, 32, out31, 45)
out5 = Fire((28,28), 256, 48, 48, 48, out4, 500)
out6 = Fire((28,28), 384, 48, 48, 48, out5, 65)
out7 = Fire((28,28), 384, 64, 64, 64, out6, 700)
out71 = MaxPooling((3,3), step=(2,2), padding=(1,1), name='poolLow2').apply(out7)
out8 = Fire((14,14), 512, 64, 64, 64, out71, 85)

#LAST LAYERS
conv_layers1 = list([Convolutional(filter_size=(1,1), num_filters=2, name='Convx2'), BatchNormalization(name='batch_vx2'), Rectifier(),
    AveragePooling((14,14), name='MaxPol1')])
conv_sequence1 = ConvolutionalSequence(conv_layers1, num_channels=512, image_size=(14,14), weights_init=Orthogonal(), use_bias=False, name='ConvSeq3')
conv_sequence1.initialize()
out_soft1 = Flattener(name='Flatt1').apply(conv_sequence1.apply(out8))
predict1 = NDimensionalSoftmax(name='Soft1').apply(out_soft1)
cost = CategoricalCrossEntropy(name='Cross1').apply(y.flatten(), predict1).copy(name='cost')
error = MisclassificationRate().apply(y.flatten(), predict1)

#Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason)
error_rate = error.copy(name='error_rate')
error_rate2 = error.copy(name='error_rate2')
cg = ComputationGraph([cost, error_rate])

########### GET THE DATA #####################
stream_train = ServerDataStream(('image_features','targets'), False, port=5512, hwm=40)
stream_valid = ServerDataStream(('image_features','targets'), False, port=5513, hwm=40)

########### DEFINE THE ALGORITHM #############
algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum(learning_rate=0.01, momentum=0.9))
extensions = [Timing(),
Exemple #25
0
    MaxPooling((2, 2), name='MaxPol1'),
    Convolutional(filter_size=(1, 1), num_filters=1024, name='Convx3'),
    Rectifier(),
    MaxPooling((2, 2), name='MaxPol2'),
    Convolutional(filter_size=(1, 1), num_filters=2, name='Convx4'),
    Rectifier(),
])
conv_sequence1 = ConvolutionalSequence(conv_layers1,
                                       num_channels=512,
                                       image_size=(10, 10),
                                       weights_init=Orthogonal(),
                                       use_bias=False,
                                       name='ConvSeq3')
conv_sequence1.initialize()
out_soft1 = Flattener(name='Flatt1').apply(conv_sequence1.apply(out5))
predict1 = NDimensionalSoftmax(name='Soft1').apply(out_soft1)
cost1 = CategoricalCrossEntropy(name='Cross1').apply(
    y.flatten(), predict1).copy(name='cost1')

#SECOND SOFTMAX
conv_layers2 = list([
    MaxPooling((2, 2), name='MaxPol2'),
    Convolutional(filter_size=(1, 1), num_filters=128, name='Convx21'),
    Rectifier(),
    MaxPooling((2, 2), name='MaxPol11'),
    Convolutional(filter_size=(1, 1), num_filters=1024, name='Convx31'),
    Rectifier(),
    MaxPooling((2, 2), name='MaxPol21'),
    Convolutional(filter_size=(1, 1), num_filters=2, name='Convx41'),
    Rectifier(),
])
Exemple #26
0
    def __init__(self, input_sources_list, input_sources_vocab_size_list,
                 output_source, output_source_vocab_size,
                 lookup_dim=200, hidden_size=256, recurrent_stack_size=1):

        self.InputSources = input_sources_list
        self.InputSourcesVocab = input_sources_vocab_size_list
        self.OutputSource = output_source
        self.OutputSourceVocab = output_source_vocab_size

        inputs = [tensor.lmatrix(source) for source in input_sources_list]
        output = tensor.lmatrix(output_source)

        lookups = self.get_lookups(lookup_dim, input_sources_vocab_size_list)

        for lookup in lookups:
            lookup.initialize()

        merge = Merge([lookup.name for lookup in lookups], [lookup.dim for lookup in lookups], hidden_size,
                              weights_init=initialization.Uniform(width=0.01),
                              biases_init=Constant(0))
        merge.initialize()

        linear0 = Linear(input_dim=hidden_size, output_dim=hidden_size,
                        weights_init=initialization.Uniform(width=0.01),
                        biases_init=Constant(0), name='linear0')
        linear0.initialize()

        recurrent_blocks = []

        for i in range(recurrent_stack_size):
            recurrent_blocks.append(SimpleRecurrent(
                dim=hidden_size, activation=Tanh(),
                weights_init=initialization.Uniform(width=0.01),
                use_bias=False))

        for i, recurrent_block in enumerate(recurrent_blocks):
            recurrent_block.name = 'recurrent'+str(i+1)
            recurrent_block.initialize()

        linear_out = Linear(input_dim=hidden_size, output_dim=output_source_vocab_size,
                              weights_init=initialization.Uniform(width=0.01),
                              biases_init=Constant(0), name='linear_out')
        linear_out.initialize()
        softmax = NDimensionalSoftmax(name='softmax')

        lookup_outputs = [lookup.apply(input) for lookup, input in zip(lookups, inputs)]

        m = merge.apply(*lookup_outputs)
        r = linear0.apply(m)
        for block in recurrent_blocks:
            r = block.apply(r)
        a = linear_out.apply(r)

        self.Cost = softmax.categorical_cross_entropy(output, a, extra_ndim=1).mean()
        self.Cost.name = 'cost'

        y_hat = softmax.apply(a, extra_ndim=1)
        y_hat.name = 'y_hat'

        self.ComputationGraph = ComputationGraph(self.Cost)

        self.Function = None
        self.MainLoop = None
        self.Model = Model(y_hat)
Exemple #27
0
 def __init__(self, *args, **kwargs):
     self.softmax = NDimensionalSoftmax()
     super(MinRiskInitialContextSequenceGenerator,
           self).__init__(*args, **kwargs)
     self.children.append(self.softmax)
Exemple #28
0
    def fit(self, trainset, retrain=True):
        batch_size = self.batch_size
        n_iter = self.n_iter
        look_ahead = self.look_ahead
        lr = self.lr
        b1 = self.b1
        b2 = self.b2
        epsilon = self.epsilon
        hidden_size = self.hidden_size
        activation_function = self.activation_function
        drop_rate = self.drop_rate
        weight_decay = self.weight_decay
        optimizer = self.optimizer
        std = self.std
        alpha = self.alpha
        polyak_mu = self.polyak_mu
        rating_category = self.rating_category
        item_num = self.item_num
        user_num = self.user_num
        trainset = self.load_dataset(which_set=['train'],
                                     sources=('input_ratings',
                                              'output_ratings', 'input_masks',
                                              'output_masks'))
        validset = self.load_dataset(which_set=['valid'],
                                     sources=('input_ratings',
                                              'output_ratings', 'input_masks',
                                              'output_masks'))

        train_loop_stream = ForceFloatX(data_stream=MovieLensTransformer(
            data_stream=Trainer_MovieLensTransformer(data_stream=DataStream(
                dataset=trainset,
                iteration_scheme=ShuffledScheme(trainset.num_examples,
                                                batch_size)))))

        valid_monitor_stream = ForceFloatX(data_stream=MovieLensTransformer(
            data_stream=DataStream(dataset=validset,
                                   iteration_scheme=ShuffledScheme(
                                       validset.num_examples, batch_size))))

        rating_freq = np.zeros((user_num, rating_category))
        init_b = np.zeros((user_num, rating_category))
        for batch in valid_monitor_stream.get_epoch_iterator():
            inp_r, out_r, inp_m, out_m = batch
            rating_freq += inp_r.sum(axis=0)

        log_rating_freq = np.log(rating_freq + 1e-8)
        log_rating_freq_diff = np.diff(log_rating_freq, axis=1)
        init_b[:, 1:] = log_rating_freq_diff
        init_b[:, 0] = log_rating_freq[:, 0]
        #     init_b = np.log(rating_freq / (rating_freq.sum(axis=1)[:, None] + 1e-8) +1e-8)  * (rating_freq>0)

        new_items = np.where(rating_freq.sum(axis=1) == 0)[0]
        self.new_items = new_items
        input_ratings = T.tensor3(name='input_ratings',
                                  dtype=theano.config.floatX)
        output_ratings = T.tensor3(name='output_ratings',
                                   dtype=theano.config.floatX)
        input_masks = T.matrix(name='input_masks', dtype=theano.config.floatX)
        output_masks = T.matrix(name='output_masks',
                                dtype=theano.config.floatX)

        input_ratings_cum = T.extra_ops.cumsum(input_ratings[:, :, ::-1],
                                               axis=2)[:, :, ::-1]

        #     hidden_size = [256]
        if activation_function == 'reclin':
            act = Rectifier
        elif activation_function == 'tanh':
            act = Tanh
        elif activation_function == 'sigmoid':
            act = Logistic
        else:
            act = Softplus
        layers_act = [act('layer_%d' % i) for i in range(len(hidden_size))]
        NADE_CF_model = tabula_NADE(activations=layers_act,
                                    input_dim0=user_num,
                                    input_dim1=rating_category,
                                    other_dims=hidden_size,
                                    batch_size=batch_size,
                                    weights_init=Uniform(std=0.05),
                                    biases_init=Constant(0.0))
        NADE_CF_model.push_initialization_config()
        dims = [user_num] + hidden_size + [user_num]
        linear_layers = [
            layer for layer in NADE_CF_model.children if 'linear' in layer.name
        ]
        assert len(linear_layers) == len(dims) - 1
        for i in range(len(linear_layers)):
            H1 = dims[i]
            H2 = dims[i + 1]
            width = 2 * np.sqrt(6) / np.sqrt(H1 + H2)
            #         std = np.sqrt(2. / dim)
            linear_layers[i].weights_init = Uniform(width=width)
        NADE_CF_model.initialize()
        NADE_CF_model.children[-1].parameters[-1].set_value(
            init_b.astype(theano.config.floatX))
        y = NADE_CF_model.apply(input_ratings_cum)
        y_cum = T.extra_ops.cumsum(y, axis=2)
        predicted_ratings = NDimensionalSoftmax().apply(y_cum, extra_ndim=1)
        d = input_masks.sum(axis=1)
        D = (input_masks + output_masks).sum(axis=1)
        cost, nll, nll_item_ratings, cost_ordinal_1N, cost_ordinal_N1, prob_item_ratings = rating_cost(
            y,
            output_ratings,
            input_masks,
            output_masks,
            D,
            d,
            alpha=alpha,
            std=std)
        cost.name = 'cost'

        cg = ComputationGraph(cost)
        if weight_decay > 0.0:
            all_weights = VariableFilter(roles=[WEIGHT])(cg.variables)
            l2_weights = T.sum([(W**2).sum() for W in all_weights])
            l2_cost = cost + weight_decay * l2_weights
            l2_cost.name = 'l2_decay_' + cost.name
            cg = ComputationGraph(l2_cost)
        if drop_rate > 0.0:
            dropped_layer = VariableFilter(roles=[INPUT],
                                           bricks=NADE_CF_model.children)(
                                               cg.variables)
            dropped_layer = [
                layer for layer in dropped_layer if 'linear' in layer.name
            ]
            dropped_layer = dropped_layer[1:]
            cg_dropout = apply_dropout(cg, dropped_layer, drop_rate)
        else:
            cg_dropout = cg
        training_cost = cg_dropout.outputs[0]
        lr0 = T.scalar(name='learning_rate', dtype=theano.config.floatX)
        input_list = [input_ratings, input_masks, output_ratings, output_masks]
        if optimizer == 'Adam':
            f_get_grad, f_update_parameters, shared_gradients = Adam_optimizer(
                input_list, training_cost, cg_dropout.parameters, lr0, b1, b2,
                epsilon)
        elif optimizer == 'Adadelta':
            f_get_grad, f_update_parameters, shared_gradients = Adadelta_optimizer(
                input_list, training_cost, cg_dropout.parameters, lr, epsilon)
        else:
            f_get_grad, f_update_parameters, shared_gradients = SGD_optimizer(
                input_list, training_cost, cg_dropout.parameters, lr0, b1)

        param_list = []
        [param_list.extend(p.parameters) for p in NADE_CF_model.children]
        f_update_polyak, shared_polyak = polyak(param_list, mu=polyak_mu)

        f_monitor = theano.function(inputs=[input_ratings],
                                    outputs=[predicted_ratings])
        nb_of_epocs_without_improvement = 0
        best_valid_error = np.Inf
        epoch = 0
        best_model = cp.deepcopy(NADE_CF_model)
        best_polyak = cp.deepcopy(shared_polyak)
        start_training_time = t.time()
        lr_tracer = []
        rate_score = np.array(list(range(1, rating_category + 1)), np.float32)
        best_epoch = -1
        while epoch < n_iter and nb_of_epocs_without_improvement < look_ahead:
            print('Epoch {0}'.format(epoch))
            epoch += 1
            start_time_epoch = t.time()
            cost_train = []
            squared_error_train = []
            n_sample_train = []
            cntt = 0
            train_time = 0
            for batch in train_loop_stream.get_epoch_iterator():

                inp_r, out_r, inp_m, out_m = batch
                train_t = t.time()
                cost_value = f_get_grad(inp_r, inp_m, out_r, out_m)
                train_time += t.time() - train_t
                #             pred_ratings = f_monitor(inp_r)
                if optimizer == 'Adadelta':
                    f_update_parameters()
                else:
                    f_update_parameters(lr)
                f_update_polyak()
                pred_ratings = f_monitor(inp_r)
                true_r = out_r.argmax(axis=2) + 1
                pred_r = (pred_ratings[0] *
                          rate_score[np.newaxis, np.newaxis, :]).sum(axis=2)
                pred_r[:, new_items] = 3
                mask = out_r.sum(axis=2)
                se = np.sum(np.square(true_r - pred_r) * mask)
                n = np.sum(mask)
                squared_error_train.append(se)
                n_sample_train.append(n)
                cost_train.append(cost_value)
                cntt += 1

            cost_train = np.array(cost_train).mean()
            squared_error_ = np.array(squared_error_train).sum()
            n_samples = np.array(n_sample_train).sum()
            train_RMSE = np.sqrt(squared_error_ / (n_samples * 1.0 + 1e-8))

            print('\tTraining   ...')
            print('Train     :', "RMSE: {0:.6f}".format(train_RMSE),
                  " Cost Error: {0:.6f}".format(cost_train),
                  "Train Time: {0:.6f}".format(train_time),
                  get_done_text(start_time_epoch))

            print('\tValidating ...', )
            start_time = t.time()
            squared_error_valid = []
            n_sample_valid = []
            valid_time = 0
            for batch in valid_monitor_stream.get_epoch_iterator():
                inp_r, out_r, inp_m, out_m = batch
                valid_t = t.time()
                pred_ratings = f_monitor(inp_r)
                valid_time += t.time() - valid_t
                true_r = out_r.argmax(axis=2) + 1
                pred_r = (pred_ratings[0] *
                          rate_score[np.newaxis, np.newaxis, :]).sum(axis=2)

                pred_r[:, new_items] = 3
                mask = out_r.sum(axis=2)
                se = np.sum(np.square(true_r - pred_r) * mask)
                n = np.sum(mask)
                squared_error_valid.append(se)
                n_sample_valid.append(n)

            squared_error_ = np.array(squared_error_valid).sum()
            n_samples = np.array(n_sample_valid).sum()
            valid_RMSE = np.sqrt(squared_error_ / (n_samples * 1.0 + 1e-8))
            print('Validation:', " RMSE: {0:.6f}".format(valid_RMSE),
                  "Valid Time: {0:.6f}".format(valid_time),
                  get_done_text(start_time))
            if valid_RMSE < best_valid_error:
                best_epoch = epoch
                nb_of_epocs_without_improvement = 0
                best_valid_error = valid_RMSE
                del best_model
                del best_polyak
                gc.collect()

                best_model = cp.deepcopy(NADE_CF_model)
                best_polyak = cp.deepcopy(shared_polyak)
                print('\n\n Got a good one')
            else:
                nb_of_epocs_without_improvement += 1
                if optimizer == 'Adadelta':
                    pass
                elif nb_of_epocs_without_improvement == look_ahead and lr > 1e-5:
                    nb_of_epocs_without_improvement = 0
                    lr /= 4
                    print("learning rate is now %s" % lr)
            lr_tracer.append(lr)

        print('\n### Training, n_layers=%d' % (len(hidden_size)),
              get_done_text(start_training_time))

        best_y = best_model.apply(input_ratings_cum)
        best_y_cum = T.extra_ops.cumsum(best_y, axis=2)
        best_predicted_ratings = NDimensionalSoftmax().apply(best_y_cum,
                                                             extra_ndim=1)
        self.f_monitor_best = theano.function(inputs=[input_ratings],
                                              outputs=[best_predicted_ratings])
        self.best_valid_error = best_valid_error
        self.best_epoch = best_epoch
        self.best_model = best_model
        self.best_polyak = best_polyak
 def __init__(self, initial_output=0, **kwargs):
     super(SoftmaxEmitter, self).__init__(**kwargs)
     self.initial_output = initial_output
     self.softmax = NDimensionalSoftmax()
     self.children = [self.softmax]
Exemple #30
0
         for i, (filter_size, num_filter) in enumerate(conv_parameters)),
        (BatchNormalization(name='batch_{}'.format(i))
         for i, _ in enumerate(conv_parameters)),
        (Rectifier() for i, (f_size, num_f) in enumerate(conv_parameters)),
        (MaxPooling(size, name='pool_{}'.format(i))
         for i, size in enumerate(pooling_sizes))
    ]))

#Create the sequence
conv_sequence = ConvolutionalSequence(conv_layers,
                                      num_channels,
                                      image_size=image_shape,
                                      use_bias=False)
#Add the Softmax function
out = Flattener().apply(conv_sequence.apply(x))
predict = NDimensionalSoftmax().apply(out)

#get the test stream
from fuel.datasets.dogs_vs_cats import DogsVsCats
from fuel.streams import DataStream, ServerDataStream
from fuel.schemes import ShuffledScheme, SequentialExampleScheme
from fuel.transformers.image import RandomFixedSizeCrop, MinimumImageDimensions, MaximumImageDimensions, Random2DRotation
from fuel.transformers import Flatten, Cast, ScaleAndShift
size = (128, 128)
cats = DogsVsCats(('test', ))
stream = DataStream.default_stream(cats,
                                   iteration_scheme=SequentialExampleScheme(
                                       cats.num_examples))
stream_upscale = MaximumImageDimensions(stream,
                                        size,
                                        which_sources=('image_features', ))