Ejemplo n.º 1
0
    def fit(self,
            im,
            om,
            generator,
            cell=LSTMCell,
            n_hidden=128,
            n_history=0,
            squeeze=None,
            activation=None,
            lr=0.01,
            epoch=10,
            n_iter=128,
            batch_size=64,
            optimizer="Adam",
            eps=1e-8,
            verbose=1):
        if squeeze:
            self._squeeze = True
        if callable(activation):
            self._activation = activation
        self._generator = generator
        self._im, self._om = im, om
        self._optimizer = OptFactory().get_optimizer_by_name(optimizer, lr)
        self._define_input(im, om)

        cell = cell(n_hidden)
        initial_state = cell.zero_state(tf.shape(self._input)[0], tf.float32)
        rnn_outputs, rnn_final_state = tf.nn.dynamic_rnn(
            cell, self._input, initial_state=initial_state)
        self._get_output(rnn_outputs, rnn_final_state, n_history)
        loss = self._get_loss(eps)
        train_step = self._optimizer.minimize(loss)
        self._log["iter_err"] = []
        self._log["epoch_err"] = []
        self._sess.run(tf.global_variables_initializer())
        bar = ProgressBar(max_value=epoch, name="Epoch", start=False)
        if verbose >= 2:
            bar.start()
        for _ in range(epoch):
            epoch_err = 0
            sub_bar = ProgressBar(max_value=n_iter, name="Iter", start=False)
            if verbose >= 2:
                sub_bar.start()
            for __ in range(n_iter):
                x_batch, y_batch = self._generator.gen(batch_size)
                iter_err = self._sess.run([loss, train_step], {
                    self._tfx: x_batch,
                    self._tfy: y_batch,
                })[0]
                self._log["iter_err"].append(iter_err)
                epoch_err += iter_err
                if verbose >= 2:
                    sub_bar.update()
            self._log["epoch_err"].append(epoch_err / n_iter)
            if verbose >= 1:
                self._verbose()
                if verbose >= 2:
                    bar.update()
Ejemplo n.º 2
0
 def fit(self, x, n_clusters=None, epoch=None, norm=None, animation_params=None):
     if n_clusters is None:
         n_clusters = self._params["n_clusters"]
     if epoch is None:
         epoch = self._params["epoch"]
     if norm is not None:
         self._params["norm"] = norm
     *animation_properties, animation_params = self._get_animation_params(animation_params)
     x = np.atleast_2d(x)
     arange = np.arange(n_clusters)[..., None]
     x_high_dim, labels_cache, counter = x[:, None, ...], None, 0
     self._centers = x[np.random.permutation(len(x))[:n_clusters]]
     bar = ProgressBar(max_value=epoch, name="KMeans")
     ims = []
     for i in range(epoch):
         labels = self.predict(x_high_dim, high_dim=True)
         if labels_cache is None:
             labels_cache = labels
         else:
             if np.all(labels_cache == labels):
                 bar.update(epoch)
                 break
             else:
                 labels_cache = labels
         for j, indices in enumerate(labels == arange):
             self._centers[j] = np.average(x[indices], axis=0)
         counter += 1
         animation_params["extra"] = self._centers
         self._handle_animation(i, x, labels, ims, animation_params, *animation_properties)
         bar.update()
     self._counter = counter
     self._handle_mp4(ims, animation_properties)
Ejemplo n.º 3
0
 def fit(self, x, y, sample_weight=None, c=None, lr=None, epoch=None, tol=None):
     if sample_weight is None:
         sample_weight = self._params["sw"]
     if c is None:
         c = self._params["c"]
     if lr is None:
         lr = self._params["lr"]
     if epoch is None:
         epoch = self._params["epoch"]
     if tol is None:
         tol = self._params["tol"]
     x, y = np.atleast_2d(x), np.asarray(y)
     if sample_weight is None:
         sample_weight = np.ones(len(y))
     else:
         sample_weight = np.asarray(sample_weight) * len(y)
     self._w = np.zeros(x.shape[1])
     self._b = 0
     bar = ProgressBar(max_value=epoch, name="LinearSVM")
     for _ in range(epoch):
         _err = (1 - self.predict(x, get_raw_results=True) * y) * sample_weight
         _indices = np.random.permutation(len(y))
         _idx = _indices[np.argmax(_err[_indices])]
         if _err[_idx] <= tol:
             bar.update(epoch)
             return
         _delta = lr * c * y[_idx] * sample_weight[_idx]
         self._w *= 1 - lr
         self._w += _delta * x[_idx]
         self._b += _delta
         bar.update()
Ejemplo n.º 4
0
 def _get_prediction(self, x, name=None, batch_size=1e6, verbose=None):
     if verbose is None:
         verbose = self.verbose
     single_batch = int(batch_size / np.prod(x.shape[1:]))
     if not single_batch:
         single_batch = 1
     if single_batch >= len(x):
         return self._get_activations(x, predict=True).pop()
     epoch = int(len(x) / single_batch)
     if not len(x) % single_batch:
         epoch += 1
     name = "Prediction" if name is None else "Prediction ({})".format(name)
     sub_bar = ProgressBar(min_value=0, max_value=epoch, name=name)
     if verbose >= NNVerbose.METRICS:
         sub_bar.start()
     rs, count = [self._get_activations(x[:single_batch], predict=True).pop()], single_batch
     if verbose >= NNVerbose.METRICS:
         sub_bar.update()
     while count < len(x):
         count += single_batch
         if count >= len(x):
             rs.append(self._get_activations(x[count-single_batch:], predict=True).pop())
         else:
             rs.append(self._get_activations(x[count-single_batch:count], predict=True).pop())
         if verbose >= NNVerbose.METRICS:
             sub_bar.update()
     return np.vstack(rs)
Ejemplo n.º 5
0
 def opt(self, epoch=None, eps=None):
     """
     Main procedure of opt
     :param epoch : Maximum iteration ; default: 1000
     :param eps   : Tolerance         ; default: 1e-8
     :return      : x*, f*, n_iter, feva
     """
     if epoch is None:
         epoch = self._params["epoch"]
     if eps is None:
         eps = self._params["eps"]
     self._func.refresh_cache(self._x)
     self._loss_cache, self._grad_cache = self.func(0), self.func(1)
     bar = ProgressBar(max_value=epoch, name="Opt")
     bar.start()
     for _ in range(epoch):
         self.iter += 1
         with warnings.catch_warnings():
             warnings.filterwarnings("error")
             try:
                 if self._core(eps):
                     break
                 self.log.append(self._loss_cache)
             except RuntimeWarning as err:
                 print("\n", err, "\n")
                 break
             except np.linalg.linalg.LinAlgError as err:
                 print("\n", err, "\n")
                 break
         bar.update()
     bar.update()
     bar.terminate()
     return self._x, self._loss_cache, self.iter, self.feva
Ejemplo n.º 6
0
 def fit(self,
         x,
         y,
         sample_weight=None,
         tree=None,
         epoch=None,
         feature_bound=None,
         **kwargs):
     if sample_weight is None:
         sample_weight = self._params["sw"]
     if tree is None:
         tree = self._params["tree"]
     if epoch is None:
         epoch = self._params["epoch"]
     if feature_bound is None:
         feature_bound = self._params["feature_bound"]
     x, y = np.atleast_2d(x), np.asarray(y)
     n_sample = len(y)
     self._tree = tree
     bar = ProgressBar(max_value=epoch, name="RF")
     for _ in range(epoch):
         tmp_tree = RandomForest._cvd_trees[tree](**kwargs)
         _indices = np.random.randint(n_sample, size=n_sample)
         if sample_weight is None:
             _local_weight = None
         else:
             _local_weight = sample_weight[_indices]
             _local_weight /= _local_weight.sum()
         tmp_tree.fit(x[_indices],
                      y[_indices],
                      sample_weight=_local_weight,
                      feature_bound=feature_bound)
         self._trees.append(deepcopy(tmp_tree))
         bar.update()
Ejemplo n.º 7
0
def main(clf):
    dat_path = os.path.join("_Data", "dataset.dat")
    gen_dataset(dat_path)
    with open(dat_path, "rb") as _file:
        x, y = pickle.load(_file)
    x = [" ".join(sentence) for sentence in x]
    _indices = np.random.permutation(len(x))
    x = list(np.array(x)[_indices])
    y = list(np.array(y)[_indices])
    data_len = len(x)
    batch_size = math.ceil(data_len * 0.1)
    _acc_lst, y_results = [], []
    bar = ProgressBar(max_value=10, name=str(clf))
    bar.start()
    for i in range(10):
        _next = (i + 1) * batch_size if i != 9 else data_len
        x_train = x[:i * batch_size] + x[(i + 1) * batch_size:]
        y_train = y[:i * batch_size] + y[(i + 1) * batch_size:]
        x_test, y_test = x[i * batch_size:_next], y[i * batch_size:_next]
        count_vec = CountVectorizer()
        counts_train = count_vec.fit_transform(x_train)
        x_test = count_vec.transform(x_test)
        tfidf_transformer = TfidfTransformer()
        x_train = tfidf_transformer.fit_transform(counts_train)
        clf.fit(x_train, y_train)
        y_pred = clf.predict(x_test)
        _acc_lst.append(clf.acc(y_test, y_pred))
        y_results.append([y_test, y_pred])
        del x_train, y_train, x_test, y_test, y_pred
        bar.update()
    return _acc_lst, y_results
Ejemplo n.º 8
0
 def fit(self, x, y, sample_weight=None, lr=None, epoch=None):
     if sample_weight is None:
         sample_weight = self._params["sw"]
     if lr is None:
         lr = self._params["lr"]
     if epoch is None:
         epoch = self._params["epoch"]
     x, y = np.atleast_2d(x), np.asarray(y)
     if sample_weight is None:
         sample_weight = np.ones(len(y))
     else:
         sample_weight = np.asarray(sample_weight) * len(y)
     self._w = np.zeros(x.shape[1])
     self._b = 0
     bar = ProgressBar(max_value=epoch, name="Perceptron")
     bar.start()
     for _ in range(epoch):
         y_pred = self.predict(x)
         _err = (y_pred != y) * sample_weight
         _indices = np.random.permutation(len(y))
         _idx = _indices[np.argmax(_err[_indices])]
         if y_pred[_idx] == y[_idx]:
             bar.update(epoch)
             return
         _delta = lr * y[_idx] * sample_weight[_idx]
         self._w += _delta * x[_idx]
         self._b += _delta
         bar.update()
Ejemplo n.º 9
0
def run(clf):
    acc_records, y_records = [], []
    bar = ProgressBar(max_value=10, name="Main")
    bar.start()
    for _ in range(10):
        if clf == "Naive Bayes":
            _clf = SKMultinomialNB(alpha=0.1)
        elif clf == "Non-linear SVM":
            _clf = SKSVM()
        else:
            _clf = SKLinearSVM()
        rs = main(_clf)
        acc_records.append(rs[0])
        y_records += rs[1]
        bar.update()
    acc_records = np.array(acc_records) * 100

    plt.figure()
    plt.boxplot(acc_records, vert=False, showmeans=True)
    plt.show()

    from Util.DataToolkit import DataToolkit
    idx = np.argmax(acc_records)  # type: int
    print(
        metrics.classification_report(y_records[idx][0],
                                      y_records[idx][1],
                                      target_names=np.load(
                                          os.path.join("_Data",
                                                       "LABEL_DIC.npy"))))
    toolkit = DataToolkit(acc_records[np.argmax(np.average(acc_records,
                                                           axis=1))])
    print("Acc Mean     : {:8.6}".format(toolkit.mean))
    print("Acc Variance : {:8.6}".format(toolkit.variance))
    print("Done")
Ejemplo n.º 10
0
    def _get_prediction(self, x, name=None, batch_size=1e6, verbose=None):
        if verbose is None:
            verbose = self.verbose
        single_batch = batch_size / np.prod(x.shape[1:])  #prod 将(2,)->2
        single_batch = int(single_batch)
        if not single_batch:
            single_batch = 1
        if single_batch >= len(x):
            return self._get_activations(x).pop()  #返回pop
        epoch = int(len(x) / single_batch)
        if not len(x) % single_batch:
            epoch += 1
        name = "Prediction" if name is None else "Prediction ({})".format(name)
        sub_bar = ProgressBar(max_value=epoch, name=name, start=False)
        if verbose >= NNVerbose.METRICS:
            sub_bar.start()
        rs, count = [self._get_prediction(x[:single_batch]).pop()
                     ], single_batch

        if verbose >= NNVerbose.METRICS:
            sub_bar.update()
        """
        count先加然后判断如果count>len(x),则训练剩余部分:[ count减去single_batch:]
        如果小于len(x),则训练count个
        """
        while count < len(x):
            count += single_batch
            if count >= len(x):
                rs.append(self._get_prediction(x[count - single_batch:]).pop())
            else:
                rs.append(
                    self._get_prediction(x[count - single_batch:count]).pop())
            if verbose >= NNVerbose.METRICS:
                sub_bar.update()
        return np.vstack(rs)  #
Ejemplo n.º 11
0
 def fit(self, x, y, sample_weight=None, c=None, lr=None, epoch=None, tol=None):
     if sample_weight is None:
         sample_weight = self._params["sw"]
     if c is None:
         c = self._params["c"]
     if lr is None:
         lr = self._params["lr"]
     if epoch is None:
         epoch = self._params["epoch"]
     if tol is None:
         tol = self._params["tol"]
     if sample_weight is None:
         sample_weight = tf.constant(np.ones(len(y)), dtype=tf.float32, name="sample_weight")
     else:
         sample_weight = tf.constant(np.asarray(sample_weight) * len(y), dtype=tf.float32, name="sample_weight")
     x, y = tf.constant(x, dtype=tf.float32), tf.constant(y, dtype=tf.float32)
     self._w = tf.Variable(np.zeros(x.shape[1]), dtype=tf.float32, name="w")
     self._b = tf.Variable(0., dtype=tf.float32, name="b")
     y_pred = self.predict(x, True, False)
     cost = tf.reduce_sum(tf.maximum(1 - y * y_pred, 0) * sample_weight) + c * tf.nn.l2_loss(self._w)
     train_step = tf.train.AdamOptimizer(learning_rate=lr).minimize(cost)
     self._sess.run(tf.global_variables_initializer())
     bar = ProgressBar(max_value=epoch, name="TFLinearSVM")
     for _ in range(epoch):
         _l = self._sess.run([cost, train_step])[0]
         if _l < tol:
             bar.update(epoch)
             break
         bar.update()
Ejemplo n.º 12
0
 def _get_prediction(self, x, name=None, batch_size=1e6, verbose=None):
     if verbose is None:
         verbose = self.verbose
     single_batch = batch_size / np.prod(x.shape[1:])  # type: float
     single_batch = int(single_batch)
     if not single_batch:
         single_batch = 1
     if single_batch >= len(x):
         return self._sess.run(self._y_pred, {self._tfx: x})
     epoch = int(len(x) / single_batch)
     if not len(x) % single_batch:
         epoch += 1
     name = "Prediction" if name is None else "Prediction ({})".format(name)
     sub_bar = ProgressBar(max_value=epoch, name=name, start=False)
     if verbose >= NNVerbose.METRICS:
         sub_bar.start()
     rs = [self._sess.run(self._y_pred, {self._tfx: x[:single_batch]})]
     count = single_batch
     if verbose >= NNVerbose.METRICS:
         sub_bar.update()
     while count < len(x):
         count += single_batch
         if count >= len(x):
             rs.append(
                 self._sess.run(self._y_pred,
                                {self._tfx: x[count - single_batch:]}))
         else:
             rs.append(
                 self._sess.run(self._y_pred,
                                {self._tfx: x[count - single_batch:count]}))
         if verbose >= NNVerbose.METRICS:
             sub_bar.update()
     return np.vstack(rs)
Ejemplo n.º 13
0
 def fit(self, x, n_clusters=None, epoch=None, norm=None):
     if n_clusters is None:
         n_clusters = self._params["n_clusters"]
     if epoch is None:
         epoch = self._params["epoch"]
     if norm is not None:
         self._params["norm"] = norm
     x = np.atleast_2d(x)
     arange = np.arange(n_clusters)[..., None]
     x_high_dim, labels_cache, counter = x[:, None, ...], None, 0
     self._centers = x[np.random.permutation(len(x))[:n_clusters]]
     bar = ProgressBar(max_value=epoch, name="KMeans")
     bar.start()
     for _ in range(epoch):
         labels = self.predict(x_high_dim, high_dim=True)
         if labels_cache is None:
             labels_cache = labels
         elif np.all(labels_cache == labels):
             bar.update(epoch)
             break
         for i, indices in enumerate(labels == arange):
             self._centers[i] = np.average(x[indices], axis=0)
         counter += 1
         bar.update()
     self._counter = counter
Ejemplo n.º 14
0
 def fit(self, x, y, sample_weight=None, clf=None, epoch=None, eps=None, **kwargs):
     if sample_weight is None:
         sample_weight = self._params["sample_weight"]
     if clf is None:
         clf = self._params["clf"]
     if epoch is None:
         epoch = self._params["epoch"]
     if eps is None:
         eps = self._params["eps"]
     x, y = np.atleast_2d(x), np.asarray(y)
     if clf is None:
         clf = "Cart"
         kwargs = {"max_depth": 1}
     self._kwarg_cache = kwargs
     self._clf = clf
     if sample_weight is None:
         sample_weight = np.ones(len(y)) / len(y)
     else:
         sample_weight = np.asarray(sample_weight)
     bar = ProgressBar(max_value=epoch, name="AdaBoost")
     for _ in range(epoch):
         tmp_clf = AdaBoost._weak_clf[clf](**kwargs)
         tmp_clf.fit(x, y, sample_weight=sample_weight)
         y_pred = tmp_clf.predict(x)
         em = min(max((y_pred != y).astype(np.int8).dot(sample_weight[..., None])[0], eps), 1 - eps)
         am = 0.5 * log(1 / em - 1)
         sample_weight *= np.exp(-am * y * y_pred)
         sample_weight /= np.sum(sample_weight)
         self._clfs.append(deepcopy(tmp_clf))
         self._clfs_weights.append(am)
         bar.update()
Ejemplo n.º 15
0
 def _get_prediction(self,
                     x,
                     name=None,
                     batch_size=1e6,
                     verbose=None,
                     out_of_sess=False,
                     idx=-1):
     if verbose is None:
         verbose = self.verbose
     single_batch = int(batch_size / np.prod(x.shape[1:]))
     if not single_batch:
         single_batch = 1
     _y_pred = self._y_pred if idx == -1 else self.get_rs(self._tfx,
                                                          idx=idx)
     if single_batch >= len(x):
         if not out_of_sess:
             return _y_pred.eval(feed_dict={self._tfx: x})
         with self._sess.as_default():
             return self.get_rs(x, idx=idx).eval(feed_dict={self._tfx: x})
     epoch = int(len(x) / single_batch)
     if not len(x) % single_batch:
         epoch += 1
     name = "Prediction" if name is None else "Prediction ({})".format(name)
     sub_bar = ProgressBar(min_value=0, max_value=epoch, name=name)
     if verbose >= NNVerbose.METRICS:
         sub_bar.start()
     if not out_of_sess:
         rs = [_y_pred.eval(feed_dict={self._tfx: x[:single_batch]})]
     else:
         rs = [self.get_rs(x[:single_batch], idx=idx)]
     count = single_batch
     if verbose >= NNVerbose.METRICS:
         sub_bar.update()
     while count < len(x):
         count += single_batch
         if count >= len(x):
             if not out_of_sess:
                 rs.append(
                     _y_pred.eval(
                         feed_dict={self._tfx: x[count - single_batch:]}))
             else:
                 rs.append(self.get_rs(x[count - single_batch:], idx=idx))
         else:
             if not out_of_sess:
                 rs.append(
                     _y_pred.eval(feed_dict={
                         self._tfx: x[count - single_batch:count]
                     }))
             else:
                 rs.append(
                     self.get_rs(x[count - single_batch:count], idx=idx))
         if verbose >= NNVerbose.METRICS:
             sub_bar.update()
     if out_of_sess:
         with self._sess.as_default():
             rs = [_rs.eval() for _rs in rs]
     return np.vstack(rs)
Ejemplo n.º 16
0
 def gen_dicts(self, data, n_batch=None, include_label=True, predict=False, add_noises=False,
               shuffle=True, name=None, count=None):
     n_batch = self.n_batch if n_batch is None else int(n_batch)
     if name is not None:
         bar = ProgressBar(max_value=len(data) // n_batch, name=name)
     else:
         bar = None
     for batch in self.gen_batches(data, shuffle, n_batch):
         if bar is not None:
             bar.update()
         yield self.get_feed_dict(batch, include_label, predict, count=count)
Ejemplo n.º 17
0
    def fit(self,
            x,
            y,
            sample_weight=None,
            c=None,
            lr=None,
            optimizer=None,
            batch_size=None,
            epoch=None,
            tol=None,
            animation_params=None):
        if sample_weight is None:
            sample_weight = self._params["sample_weight"]
        if c is None:
            c = self._params["c"]
        if lr is None:
            lr = self._params["lr"]
        if batch_size is None:
            batch_size = self._params["batch_size"]
        if epoch is None:
            epoch = self._params["epoch"]
        if tol is None:
            tol = self._params["tol"]
        if optimizer is None:
            optimizer = self._params["optimizer"]
        *animation_properties, animation_params = self._get_animation_params(
            animation_params)
        x, y = np.atleast_2d(x), np.asarray(y, dtype=np.float32)
        if sample_weight is None:
            sample_weight = np.ones(len(y))
        else:
            sample_weight = np.asarray(sample_weight) * len(y)

        self._w = np.zeros(x.shape[1], dtype=np.float32)
        self._b = np.zeros(1, dtype=np.float32)
        self._model_parameters = [self._w, self._b]
        self._optimizer = OptFactory().get_optimizer_by_name(
            optimizer, self._model_parameters, lr, epoch)

        bar = ProgressBar(max_value=epoch, name="LinearSVM")
        ims = []
        train_repeat = self._get_train_repeat(x, batch_size)
        for i in range(epoch):
            self._optimizer.update()
            l = self._batch_training(x, y, batch_size, train_repeat,
                                     sample_weight, c)
            if l < tol:
                bar.terminate()
                break
            self._handle_animation(i, x, y, ims, animation_params,
                                   *animation_properties)
            bar.update()
        self._handle_mp4(ims, animation_properties)
Ejemplo n.º 18
0
    def fit(self,
            x,
            y,
            c=None,
            lr=None,
            batch_size=None,
            epoch=None,
            tol=None,
            optimizer=None,
            animation_params=None):
        if c is None:
            c = self._params["c"]
        if lr is None:
            lr = self._params["lr"]
        if batch_size is None:
            batch_size = self._params["batch_size"]
        if epoch is None:
            epoch = self._params["epoch"]
        if tol is None:
            tol = self._params["tol"]
        if optimizer is None:
            optimizer = self._params["optimizer"]
        *animation_properties, animation_params = self._get_animation_params(
            animation_params)
        x, y = np.atleast_2d(x), np.asarray(y)
        y_2d = y[..., None]

        self._w = tf.Variable(np.zeros([x.shape[1], 1]),
                              dtype=tf.float32,
                              name="w")
        self._b = tf.Variable(0., dtype=tf.float32, name="b")
        self._tfx = tf.placeholder(tf.float32, [None, x.shape[1]])
        self._tfy = tf.placeholder(tf.float32, [None, 1])
        self._y_pred_raw = tf.matmul(self._tfx, self._w) + self._b
        self._y_pred = tf.sign(self._y_pred_raw)
        loss = tf.reduce_sum(tf.nn.relu(
            1 - self._tfy * self._y_pred_raw)) + c * tf.nn.l2_loss(self._w)
        train_step = TFOptFac().get_optimizer_by_name(optimizer,
                                                      lr).minimize(loss)
        self._sess.run(tf.global_variables_initializer())
        bar = ProgressBar(max_value=epoch, name="TFLinearSVM")
        ims = []
        train_repeat = self._get_train_repeat(x, batch_size)
        for i in range(epoch):
            l = self._batch_training(x, y_2d, batch_size, train_repeat, loss,
                                     train_step)
            if l < tol:
                bar.terminate()
                break
            self._handle_animation(i, x, y, ims, animation_params,
                                   *animation_properties)
            bar.update()
        self._handle_mp4(ims, animation_properties)
Ejemplo n.º 19
0
        def fit(self,
                x,
                y,
                c=None,
                lr=None,
                batch_size=None,
                epoch=None,
                tol=None,
                optimizer=None,
                animation_params=None):
            if c is None:
                c = self._params["c"]
            if lr is None:
                lr = self._params["lr"]
            if batch_size is None:
                batch_size = self._params["batch_size"]
            if epoch is None:
                epoch = self._params["epoch"]
            if tol is None:
                tol = self._params["tol"]
            if optimizer is None:
                optimizer = self._params["optimizer"]
            *animation_properties, animation_params = self._get_animation_params(
                animation_params)
            x, y = np.atleast_2d(x), np.asarray(y, dtype=np.float32)
            y_2d = y[..., None]

            self._w = Variable(torch.rand([x.shape[1], 1]), requires_grad=True)
            self._b = Variable(torch.Tensor([0.]), requires_grad=True)
            self._model_parameters = [self._w, self._b]
            self._optimizer = PyTorchOptFac().get_optimizer_by_name(
                optimizer, self._model_parameters, lr, epoch)

            x, y, y_2d = self._arr_to_variable(False, x, y, y_2d)
            loss_function = lambda _y, _y_pred: self._loss(_y, _y_pred, c)

            bar = ProgressBar(max_value=epoch, name="TorchLinearSVM")
            ims = []
            train_repeat = self._get_train_repeat(x, batch_size)
            for i in range(epoch):
                self._optimizer.update()
                l = self.batch_training(x, y_2d, batch_size, train_repeat,
                                        loss_function)
                if l < tol:
                    bar.terminate()
                    break
                self._handle_animation(i, x, y, ims, animation_params,
                                       *animation_properties)
                bar.update()
            self._handle_mp4(ims, animation_properties)
Ejemplo n.º 20
0
 def fit(self,
         x,
         y,
         sample_weight=None,
         clf=None,
         epoch=None,
         eps=None,
         **kwargs):
     if sample_weight is None:
         sample_weight = self._params["sample_weight"]
     if clf is None:
         clf = self._params["clf"]
     if epoch is None:
         epoch = self._params["epoch"]
     if eps is None:
         eps = self._params["eps"]
     x, y = np.atleast_2d(x), np.asarray(y)
     # 默认使用10个CART决策树桩作为弱分类器
     if clf is None:
         clf = "Cart"
         kwargs = {"max_depth": 1}
     self._clf = clf
     self._kwarg_cache = kwargs
     if sample_weight is None:
         sample_weight = np.ones(len(y)) / len(y)
     else:
         sample_weight = np.array(sample_weight)
     bar = ProgressBar(max_value=epoch, name="AdaBoost")
     # AdaBoost算法的主循环,epoch为迭代次数
     for _ in range(epoch):
         # 根据样本权重训练弱分类器
         tmp_clf = AdaBoost._weak_clf[clf](**kwargs)
         tmp_clf.fit(x, y, sample_weight)
         # 调用弱分类器的predict方法进行预测
         y_pred = tmp_clf.predict(x)
         # 计算加权错误率,考虑到数值的稳定性,在边值情况加了一个小的常熟
         em = min(max((y_pred != y).dot(sample_weight[:, None])[0], eps),
                  1 - eps)
         # 计算该弱分类器的话语权
         am = 0.5 * log(1 / em - 1)
         # 更新样本权重并利用deepcopy将该弱分类器记录在列表总
         sample_weight *= np.exp(-am * y * y_pred)
         sample_weight /= np.sum(sample_weight)
         self._clfs.append(deepcopy(tmp_clf))
         self._clfs_weights.append(am)
         bar.update()
     self._clfs_weights = np.array(self._clfs_weights, dtype=np.float32)
Ejemplo n.º 21
0
 def fit(self,
         x=None,
         y=None,
         x_test=None,
         y_test=None,
         n_epoch=None,
         n_batch=None,
         print_settings=True):
     if not self.settings_inited:
         self.init_all_settings()
     if n_epoch is not None:
         self.n_epoch = n_epoch
     if n_batch is not None:
         self.n_batch = n_batch
     x, y, x_test, y_test = self.prepare_data(x, y, x_test, y_test)
     self.build_model(x, y, x_test, y_test, print_settings)
     count = 0
     with self._sess.as_default() as sess:
         # Prepare
         i = 0
         train_writer, test_writer, train_merge_op, test_merge_op = self._prepare_tensorboard_verbose(
             sess)
         bar = ProgressBar(max_value=self.n_epoch, name="Main")
         train_info = [train_merge_op, train_writer]
         test_info = [test_merge_op, test_writer]
         train_metric, test_metric = self._get_metrics(x, y, x_test, y_test)
         if self.tensorboard_verbose > 0:
             self._do_tensorboard_verbose(count, train_info, test_info,
                                          train_metric, test_metric)
         # Train
         while i < self.n_epoch:
             for local_dict in self.gen_dicts(self.train_data, count=count):
                 count += 1
                 self._sess.run(self._train_step, local_dict)
                 if self.snapshot_step > 0 and count % self.snapshot_step == 0:
                     if self.tensorboard_verbose > 0:
                         train_metric, test_metric = self._get_metrics(
                             x, y, x_test, y_test)
                         self._do_tensorboard_verbose(
                             count, train_info, test_info, train_metric,
                             test_metric)
             i += 1
             if self.snapshot_step == 0 and self.tensorboard_verbose > 0:
                 self._do_tensorboard_verbose(count, train_info, test_info,
                                              train_metric, test_metric)
             if bar is not None:
                 bar.update()
Ejemplo n.º 22
0
    def fit(self,
            x,
            y,
            sample_weight=None,
            c=None,
            lr=None,
            epoch=None,
            tol=None,
            animation_params=None):
        if sample_weight is None:
            sample_weight = self._params["sample_weight"]
        if c is None:
            c = self._params["c"]
        if lr is None:
            lr = self._params["lr"]
        if epoch is None:
            epoch = self._params["epoch"]
        if tol is None:
            tol = self._params["tol"]
        *animation_properties, animation_params = self._get_animation_params(
            animation_params)
        x, y = np.atleast_2d(x), np.asarray(y)
        if sample_weight is None:
            sample_weight = np.ones(len(y))
        else:
            sample_weight = np.asarray(sample_weight) * len(y)

        self._w = np.zeros(x.shape[1])
        self._b = 0
        ims = []
        bar = ProgressBar(max_value=epoch, name="LinearSVM")
        for i in range(epoch):
            err = (1 -
                   self.predict(x, get_raw_results=True) * y) * sample_weight
            indices = np.random.permutation(len(y))
            idx = indices[np.argmax(err[indices])]
            if err[idx] <= tol:
                bar.update(epoch)
                break
            delta = lr * c * y[idx] * sample_weight[idx]
            self._w *= 1 - lr
            self._w += delta * x[idx]
            self._b += delta
            self._handle_animation(i, x, y, ims, animation_params,
                                   *animation_properties)
            bar.update()
        self._handle_mp4(ims, animation_properties)
Ejemplo n.º 23
0
 def predict(self, x):
     x = NNDist._transfer_x(np.asarray(x))
     rs = []
     batch_size = math.floor(1e6 / np.prod(x.shape[1:]))
     epoch = int(math.ceil(len(x) / batch_size))
     output = self._sess.graph.get_tensor_by_name(self._output)
     bar = ProgressBar(max_value=epoch, name="Predict")
     bar.start()
     for i in range(epoch):
         if i == epoch - 1:
             rs.append(self._sess.run(output, {
                 self._entry: x[i * batch_size:]
             }))
         else:
             rs.append(self._sess.run(output, {
                 self._entry: x[i * batch_size:(i + 1) * batch_size]
             }))
         bar.update()
     return np.vstack(rs).astype(np.float32)
Ejemplo n.º 24
0
 def predict(self, x, get_raw_results=False, **kwargs):
     x = NNDist._transfer_x(np.asarray(x))
     rs = []
     batch_size = floor(1e6 / np.prod(x.shape[1:]))
     epoch = int(ceil(len(x) / batch_size))
     output = self._sess.graph.get_tensor_by_name(self._output)
     bar = ProgressBar(max_value=epoch, name="Predict")
     for i in range(epoch):
         if i == epoch - 1:
             rs.append(self._sess.run(output, {
                 self._entry: x[i * batch_size:]
             }))
         else:
             rs.append(self._sess.run(output, {
                 self._entry: x[i * batch_size:(i + 1) * batch_size]
             }))
         bar.update()
     y_pred = np.vstack(rs).astype(np.float32)
     return y_pred if get_raw_results else np.argmax(y_pred, axis=1)
Ejemplo n.º 25
0
 def predict(self, x):
     self._create_graph()
     x, rs = np.atleast_2d(x).astype(np.float32), []
     with tf.Session() as sess:
         flattened_tensor = sess.graph.get_tensor_by_name(self._output)
         print("Predicting...")
         batch_size = math.floor(1e6 / np.prod(x.shape[1:]))
         epoch = math.ceil(len(x) / batch_size)  # type: int
         bar = ProgressBar(max_value=epoch, name="Predict")
         for i in range(epoch):
             if i == epoch - 1:
                 rs.append(sess.run(flattened_tensor, {
                     self._entry: x[i*batch_size:]
                 }))
             else:
                 rs.append(sess.run(flattened_tensor, {
                     self._entry: x[i*batch_size:(i+1)*batch_size]
                 }))
             bar.update()
         return np.vstack(rs).astype(np.float32)
Ejemplo n.º 26
0
    def fit(self,
            x,
            y,
            sample_weight=None,
            lr=None,
            epoch=None,
            animation_params=None):
        if sample_weight is None:
            sample_weight = self._params["sample_weight"]
        if lr is None:
            lr = self._params["lr"]
        if epoch is None:
            epoch = self._params["epoch"]
        *animation_properties, animation_params = self._get_animation_params(
            animation_params)

        x, y = np.atleast_2d(x), np.asarray(y)
        if sample_weight is None:
            sample_weight = np.ones(len(y))
        else:
            sample_weight = np.asarray(sample_weight) * len(y)

        self._w = np.zeros(x.shape[1])
        self._b = 0
        ims = []
        bar = ProgressBar(max_value=epoch, name="Perceptron")
        for i in range(epoch):
            y_pred = self.predict(x)
            _err = (y_pred != y) * sample_weight
            _indices = np.random.permutation(len(y))
            _idx = _indices[np.argmax(_err[_indices])]
            if y_pred[_idx] == y[_idx]:
                bar.update(epoch)
                break
            _delta = lr * y[_idx] * sample_weight[_idx]
            self._w += _delta * x[_idx]
            self._b += _delta
            self._handle_animation(i, x, y, ims, animation_params,
                                   *animation_properties)
            bar.update()
        self._handle_mp4(ims, animation_properties)
Ejemplo n.º 27
0
    def fit(self,
            x,
            y,
            sample_weight=None,
            lr=None,
            epoch=None,
            animation_params=None):
        if sample_weight is None:
            sample_weight = self._params["sample_weight"]
        if lr is None:
            lr = self._params["lr"]
        if epoch is None:
            epoch = self._params["epoch"]
        *animation_properties, animation_params = self._get_animation_params(
            animation_params)

        x, y = np.atleast_2d(x), np.asarray(y)
        if sample_weight is None:
            sample_weight = np.ones(len(y))
        else:
            sample_weight = np.asarray(sample_weight) * len(y)

        self._w = np.random.random(x.shape[1])
        self._b = 0.
        ims = []
        bar = ProgressBar(max_value=epoch, name="Perceptron")
        for i in range(epoch):
            y_pred = self.predict(x, True)
            err = -y * y_pred * sample_weight
            idx = np.argmax(err)
            if err[idx] < 0:
                bar.terminate()
                break
            w_norm = np.linalg.norm(self._w)
            delta = lr * y[idx] * sample_weight[idx] / w_norm
            self._w += delta * (x[idx] - y_pred[idx] * self._w / w_norm**2)
            self._b += delta
            self._handle_animation(i, x, y, ims, animation_params,
                                   *animation_properties)
            bar.update()
        self._handle_mp4(ims, animation_properties)
Ejemplo n.º 28
0
    def fit(self, x, y, sample_weight=None, kernel=None, epoch=None,
            x_test=None, y_test=None, metrics=None, animation_params=None, **kwargs):
        if sample_weight is None:
            sample_weight = self._params["sample_weight"]  # type: list
        if kernel is None:
            kernel = self._params["kernel"]
        if epoch is None:
            epoch = self._params["epoch"]
        if x_test is None:
            x_test = self._params["x_test"]  # type: list
        if y_test is None:
            y_test = self._params["y_test"]  # type: list
        if metrics is None:
            metrics = self._params["metrics"]  # type: list
        *animation_properties, animation_params = self._get_animation_params(animation_params)
        self._x, self._y = np.atleast_2d(x), np.asarray(y)
        if kernel == "poly":
            _p = kwargs.get("p", self._params["p"])
            self._kernel_name = "Polynomial"
            self._kernel_param = "degree = {}".format(_p)
            self._kernel = lambda _x, _y: KernelBase._poly(_x, _y, _p)
        elif kernel == "rbf":
            _gamma = kwargs.get("gamma", 1 / self._x.shape[1])
            self._kernel_name = "RBF"
            self._kernel_param = r"$\gamma = {:8.6}$".format(_gamma)
            self._kernel = lambda _x, _y: KernelBase._rbf(_x, _y, _gamma)
        else:
            raise NotImplementedError("Kernel '{}' has not defined".format(kernel))
        if sample_weight is None:
            sample_weight = np.ones(len(y))
        else:
            sample_weight = np.asarray(sample_weight) * len(y)

        self._alpha, self._w, self._prediction_cache = (
            np.zeros(len(x)), np.zeros(len(x)), np.zeros(len(x)))
        self._gram = self._kernel(self._x, self._x)
        self._b = 0
        self._prepare(sample_weight, **kwargs)

        fit_args, logs, ims = [], [], []
        for name, arg in zip(self._fit_args_names, self._fit_args):
            if name in kwargs:
                arg = kwargs[name]
            fit_args.append(arg)
        if self._do_log:
            if metrics is not None:
                self.get_metrics(metrics)
            test_gram = None
            if x_test is not None and y_test is not None:
                x_cv, y_cv = np.atleast_2d(x_test), np.asarray(y_test)
                test_gram = self._kernel(self._x, x_cv)
            else:
                x_cv, y_cv = self._x, self._y
        else:
            y_cv = test_gram = None

        if self._is_torch:
            y_cv, self._x, self._y = self._torch_transform(y_cv)

        bar = ProgressBar(max_value=epoch, name=str(self))
        for i in range(epoch):
            if self._fit(sample_weight, *fit_args):
                bar.terminate()
                break
            if self._do_log and metrics is not None:
                local_logs = []
                for metric in metrics:
                    if test_gram is None:
                        if self._is_torch:
                            local_y = self._y.data.numpy()
                        else:
                            local_y = self._y
                        local_logs.append(metric(local_y, np.sign(self._prediction_cache)))
                    else:
                        if self._is_torch:
                            local_y = y_cv.data.numpy()
                        else:
                            local_y = y_cv
                        local_logs.append(metric(local_y, self.predict(test_gram, gram_provided=True)))
                logs.append(local_logs)
            self._handle_animation(i, self._x, self._y, ims, animation_params, *animation_properties)
            bar.update()
        self._handle_mp4(ims, animation_properties)
        return logs
Ejemplo n.º 29
0
    def fit(self,
            x,
            y,
            lr=None,
            epoch=None,
            batch_size=None,
            train_rate=None,
            optimizer=None,
            metrics=None,
            record_period=None,
            verbose=None,
            preview=None):
        if lr is None:
            lr = self._params["lr"]
        if epoch is None:
            epoch = self._params["epoch"]
        if optimizer is None:
            optimizer = self._params["optimizer"]
        if batch_size is None:
            batch_size = self._params["batch_size"]
        if train_rate is None:
            train_rate = self._params["train_rate"]
        if metrics is None:
            metrics = self._params["metrics"]
        if record_period is None:
            record_period = self._params["record_period"]
        if verbose is None:
            verbose = self._params["verbose"]
        if preview is None:
            preview = self._params["preview"]

        x = NN._transfer_x(x)
        self.verbose = verbose
        self._optimizer = OptFactory().get_optimizer_by_name(optimizer, lr)
        self._tfx = tf.placeholder(tf.float32, shape=[None, *x.shape[1:]])
        self._tfy = tf.placeholder(tf.float32, shape=[None, y.shape[1]])

        if train_rate is not None:
            train_rate = float(train_rate)
            train_len = int(len(x) * train_rate)
            shuffle_suffix = np.random.permutation(int(len(x)))
            x, y = x[shuffle_suffix], y[shuffle_suffix]
            x_train, y_train = x[:train_len], y[:train_len]
            x_test, y_test = x[train_len:], y[train_len:]
        else:
            x_train = x_test = x
            y_train = y_test = y
        y_train_classes = np.argmax(y_train, axis=1)
        y_test_classes = np.argmax(y_test, axis=1)

        if metrics is None:
            metrics = []
        self._metrics = self.get_metrics(metrics)
        self._metric_names = [_m.__name__ for _m in metrics]
        self._logs = {
            name: [[] for _ in range(len(metrics) + 1)]
            for name in ("Train", "Test")
        }

        bar = ProgressBar(max_value=max(1, epoch // record_period),
                          name="Epoch",
                          start=False)
        if self.verbose >= NNVerbose.EPOCH:
            bar.start()

        if preview:
            self._preview()

        args = ((x_train, y_train, y_train_classes, x_test, y_test,
                 y_test_classes, self.verbose >= NNVerbose.METRICS_DETAIL),
                (None, None, x_train, y_train, y_train_classes, x_test, y_test,
                 y_test_classes, self.verbose >= NNVerbose.METRICS))
        train_repeat = self._get_train_repeat(x, batch_size)
        with self._sess.as_default() as sess:
            self._y_pred = self._get_rs(self._tfx)
            self._inner_y = self._get_rs(self._tfx, predict=False)
            self._loss = self._layers[-1].calculate(self._tfy, self._inner_y)
            self._train_step = self._optimizer.minimize(self._loss)
            sess.run(tf.global_variables_initializer())
            for counter in range(epoch):
                if self.verbose >= NNVerbose.ITER and counter % record_period == 0:
                    sub_bar = ProgressBar(
                        max_value=train_repeat * record_period - 1,
                        name="Iteration")
                else:
                    sub_bar = None
                self.batch_training(x_train, y_train, batch_size, train_repeat,
                                    self._loss, self._train_step, sub_bar,
                                    *args[0])
                if (counter + 1) % record_period == 0:
                    self._batch_work(*args[1])
                    if self.verbose >= NNVerbose.EPOCH:
                        bar.update(counter // record_period + 1)
Ejemplo n.º 30
0
    def fit(self,
            x=None,
            y=None,
            lr=0.01,
            epoch=10,
            batch_size=128,
            train_rate=None,
            verbose=0,
            metrics=None,
            record_period=100):

        self.verbose = verbose
        self._optimizer = Adam(lr)
        self._tfx = tf.placeholder(tf.float32, shape=[None, x.shape[1]])
        self._tfy = tf.placeholder(tf.float32, shape=[None, y.shape[1]])

        if train_rate is not None:
            train_rate = float(train_rate)
            train_len = int(len(x) * train_rate)
            shuffle_suffix = np.random.permutation(int(len(x)))
            x, y = x[shuffle_suffix], y[shuffle_suffix]
            x_train, y_train = x[:train_len], y[:train_len]
            x_test, y_test = x[train_len:], y[train_len:]
        else:
            x_train = x_test = x
            y_train = y_test = y

        train_len = len(x_train)
        batch_size = min(batch_size, train_len)
        do_random_batch = train_len >= batch_size
        train_repeat = int(train_len / batch_size) + 1

        self._metrics = ["acc"] if metrics is None else metrics
        for i, metric in enumerate(self._metrics):
            if isinstance(metric, str):
                self._metrics[i] = self._available_metrics[metric]
        self._metric_names = [_m.__name__ for _m in self._metrics]
        self._logs = {
            name: [[] for _ in range(len(self._metrics) + 1)]
            for name in ("train", "test")
        }

        bar = ProgressBar(max_value=max(1, epoch // record_period),
                          name="Epoch",
                          start=False)
        if self.verbose >= NNVerbose.EPOCH:
            bar.start()

        with self._sess.as_default() as sess:

            # Define session
            self._cost = self.get_rs(self._tfx, self._tfy)
            self._y_pred = self.get_rs(self._tfx)
            self._train_step = self._optimizer.minimize(self._cost)
            sess.run(tf.global_variables_initializer())

            # Train
            sub_bar = ProgressBar(max_value=train_repeat * record_period - 1,
                                  name="Iteration",
                                  start=False)
            for counter in range(epoch):
                if self.verbose >= NNVerbose.EPOCH and counter % record_period == 0:
                    sub_bar.start()
                for _i in range(train_repeat):
                    if do_random_batch:
                        batch = np.random.choice(train_len, batch_size)
                        x_batch, y_batch = x_train[batch], y_train[batch]
                    else:
                        x_batch, y_batch = x_train, y_train
                    self._train_step.run(feed_dict={
                        self._tfx: x_batch,
                        self._tfy: y_batch
                    })
                    if self.verbose >= NNVerbose.EPOCH:
                        if sub_bar.update(
                        ) and self.verbose >= NNVerbose.METRICS_DETAIL:
                            self._append_log(x_train, y_train, "train")
                            self._append_log(x_test, y_test, "test")
                            self._print_metric_logs("train")
                            self._print_metric_logs("test")
                if self.verbose >= NNVerbose.EPOCH:
                    sub_bar.update()
                if (counter + 1) % record_period == 0:
                    self._append_log(x_train, y_train, "train")
                    self._append_log(x_test, y_test, "test")
                    if self.verbose >= NNVerbose.METRICS:
                        self._print_metric_logs("train")
                        self._print_metric_logs("test")
                    if self.verbose >= NNVerbose.EPOCH:
                        bar.update(counter // record_period + 1)
                        sub_bar = ProgressBar(
                            max_value=train_repeat * record_period - 1,
                            name="Iteration",
                            start=False)