Beispiel #1
0
    def get_updates(self, params, cost):
        grads = self.get_grads(params, cost)

        updates = OrderedDict()
        t_prev = shared(np.asarray(0., dtype=get_dtype()))
        one = tensor.constant(1)
        t = t_prev + 1
        a_t = self.learning_rate * tensor.sqrt(one - self.beta2**t) / (
            one - self.beta1**t)

        for param, g_t in zip(params, grads):
            value = param.get_value(borrow=True)
            m_prev = shared(np.zeros(value.shape, dtype=value.dtype),
                            broadcastable=param.broadcastable)
            v_prev = shared(np.zeros(value.shape, dtype=value.dtype),
                            broadcastable=param.broadcastable)

            m_t = self.beta1 * m_prev + (one - self.beta1) * g_t
            v_t = self.beta2 * v_prev + (one - self.beta2) * g_t**2
            step = a_t * m_t / (tensor.sqrt(v_t) + self.epsilon)

            updates[m_prev] = m_t
            updates[v_prev] = v_t
            updates[param] = param - step

        updates[t_prev] = t

        updates.update(self.get_lr_updates())
        return updates
Beispiel #2
0
    def get_updates(self, params, cost):
        grads = self.get_grads(params, cost)

        t_prev = shared(np.asarray(0., dtype=get_dtype()))
        updates = OrderedDict()

        # Using theano constant to prevent upcasting of float32
        one = tensor.constant(1)

        t = t_prev + 1
        a_t = self.learning_rate / (one - self.beta1**t)

        for param, g_t in zip(params, grads):
            value = param.get_value(borrow=True)
            m_prev = shared(np.zeros(value.shape, dtype=value.dtype),
                            broadcastable=param.broadcastable)
            u_prev = shared(np.zeros(value.shape, dtype=value.dtype),
                            broadcastable=param.broadcastable)

            m_t = self.beta1 * m_prev + (one - self.beta1) * g_t
            u_t = tensor.maximum(self.beta2 * u_prev, abs(g_t))
            step = a_t * m_t / (u_t + self.epsilon)

            updates[m_prev] = m_t
            updates[u_prev] = u_t
            updates[param] = param - step

        updates[t_prev] = t

        updates.update(self.get_lr_updates())
        return updates
Beispiel #3
0
 def sample(self, size):
     flat_shape = (size[0], np.prod(size[1:]))
     a = get_rng().normal(loc=0., scale=1., size=flat_shape)
     u, _, v = np.linalg.svd(a, full_matrices=False)
     q = u if u.shape == flat_shape else v
     value = q.reshape(size).astype(get_dtype())
     return value
Beispiel #4
0
 def __init__(self,
              learning_rate=0.001,
              decay=0.,
              clip_norm=0.,
              max_norm=0.):
     self.learning_rate = shared(np.cast[get_dtype()](learning_rate))
     self.decay = decay
     self.clip_norm = clip_norm
     self.max_norm = max_norm
Beispiel #5
0
 def sample(self, size):
     value = get_rng().normal(loc=0.0, scale=self.scale, size=size)
     value = value.astype(get_dtype())
     return value
Beispiel #6
0
 def sample(self, size):
     value = get_rng().uniform(-self.scale, self.scale, size=size)
     value = value.astype(get_dtype())
     return value
Beispiel #7
0
 def sample(self, size):
     value = np.ones(size, dtype=get_dtype())
     return value
Beispiel #8
0
    def build(self):
        # load vocabulary
        with open(self.vocab_path, 'rb') as fin:
            vocabs_freqs = pickle.load(fin)
        threshold = 1 if self.threshold is None else self.threshold

        sorted_vocab = [word for word, freq in sorted(vocabs_freqs.items()) if freq >= threshold]
        sorted_vocab.append(_UNKNOWN)
        sorted_vocab.append(_ZERO)
        self.idx_to_vocab = {i: vocab for i, vocab in enumerate(sorted_vocab)}
        self.vocab_to_idx = {vocab: i for i, vocab in enumerate(sorted_vocab)}

        pkl_path = "./f_data/prefix-{}-thre-{}-valid-{}-test-{}-total-{}.pkl".format(
            self.prefix, self.threshold, self.valid_split, self.test_split, self.total_len)
        print("Building data ...")

        if os.path.exists(pkl_path):
            with open(pkl_path, 'rb') as fin:
                self.all_xs, self.all_ys, \
                self._train_start, self._train_end, \
                self._valid_start, self._valid_end, \
                self._test_start, self._test_end = pickle.load(fin)
        else:
            # load xs in index
            all_xs = []
            remove_idxs = []
            with open(os.path.join(os.getcwd(), self.xs_path), encoding='utf-8') as fin:
                i = 0
                for line in fin:
                    sentences = [sent.strip().split() for sent in line.strip().split("\t")]
                    if len(sentences) != 2:
                        print("Not A Pair: {}".format(line))
                        remove_idxs.append(i)
                    else:
                        all_xs.append(sentences)
                        i += 1
                        if i == self.total_len:
                            break
            self.all_xs = all_xs

            # load ys
            with open(os.path.join(os.getcwd(), self.ys_path), 'rb') as fin:
                idx_all_ys = pickle.load(fin)
                if self.total_len > 0:
                    idx_all_ys = idx_all_ys[:self.total_len]
                for i in remove_idxs:
                    idx_all_ys.pop(i)
                idx_all_ys = np.asarray(idx_all_ys, dtype='int32')

            self.all_ys = np.zeros((idx_all_ys.shape[0], 2), dtype=get_dtype())
            for i in range(2):
                self.all_ys[idx_all_ys == i, i] = 1

            if self.total_len == -1:
                self.total_len = len(all_xs)

            # shuffle data
            self.shuffle_data(self.all_xs, self.all_ys)

            # the start and end of the valid and test splits
            valid_len = int(self.total_len * self.valid_split)
            test_len = int(self.total_len * self.test_split)
            train_len = int(self.total_len * (1 - self.valid_split - self.test_split))
            self._train_start, self._train_end = 0, train_len
            self._valid_start, self._valid_end = train_len, train_len + valid_len
            self._test_start, self._test_end = train_len + valid_len, train_len + valid_len + test_len

            # pickle
            with open(pkl_path, 'wb') as fin:
                dump_contents = [self.all_xs, self.all_ys,
                                 self._train_start, self._train_end,
                                 self._valid_start, self._valid_end,
                                 self._test_start, self._test_end]
                pickle.dump(dump_contents, fin)

        assert len(self.all_xs) == len(self.all_ys)
        self.index_to_tag = self.get_index_to_tag()
Beispiel #9
0
    def build(self, **kwargs):
        assert self.comp_objective is not None
        assert self.comp_optimizer is not None

        # random seed
        if self.seed:
            set_seed(self.seed)

        # forward
        train_prob_ys, train_ys, train_loss = self._forward(True)
        if self.train_test_split:
            predict_prob_ys, predict_ys, predict_loss = self._forward(False)
        else:
            predict_prob_ys, predict_ys, predict_loss = train_prob_ys, train_ys, train_loss

        # regularizers
        regularizers = []
        for layer in self.comp_layers:
            regularizers.extend(layer.regularizers)
        regularizer_loss = tensor.cast(tensor.sum(regularizers), get_dtype())

        # total loss
        total_train_losses = regularizer_loss + train_loss

        # params
        params = []
        for layer in self.comp_layers:
            params += layer.params

        # layer updates
        layer_updates = OrderedDict()
        for layer in self.comp_layers:
            layer_updates.update(layer.updates)

        # model updates
        updates = self.comp_optimizer(params, total_train_losses)
        updates.update(layer_updates)

        # inputs
        if is_iterable(self.input_tensor):
            inputs = list(self.input_tensor) + [self.output_tensor]
        else:
            inputs = [self.input_tensor, self.output_tensor]
        train_outputs = [
            train_ys,
        ]

        # train functions
        for metric in self.train_metrics:
            if isinstance(metric, metrics.Regularizer):
                train_outputs.append(regularizer_loss)
            elif isinstance(metric, metrics.Loss):
                train_outputs.append(train_loss)
            elif isinstance(metric, metrics.TotalLoss):
                train_outputs.append(total_train_losses)
            else:
                train_outputs.append(metric(train_prob_ys, self.output_tensor))
        self.train_func_for_eval = function(inputs=inputs,
                                            outputs=train_outputs,
                                            updates=updates)

        # test functions
        test_outputs = [
            predict_ys,
        ]
        for metric in self.predict_metrics:
            if isinstance(metric, metrics.Loss):
                test_outputs.append(predict_loss)
            else:
                test_outputs.append(metric(predict_prob_ys,
                                           self.output_tensor))
        self.predict_func_for_eval = function(inputs=inputs,
                                              outputs=test_outputs)