def fit(self, im, om, generator, cell=LSTMCell, n_hidden=128, n_history=0, squeeze=None, activation=None, lr=0.01, epoch=10, n_iter=128, batch_size=64, optimizer="Adam", eps=1e-8, verbose=1): if squeeze: self._squeeze = True if callable(activation): self._activation = activation self._generator = generator self._im, self._om = im, om self._optimizer = OptFactory().get_optimizer_by_name(optimizer, lr) self._define_input(im, om) cell = cell(n_hidden) initial_state = cell.zero_state(tf.shape(self._input)[0], tf.float32) rnn_outputs, rnn_final_state = tf.nn.dynamic_rnn( cell, self._input, initial_state=initial_state) self._get_output(rnn_outputs, rnn_final_state, n_history) loss = self._get_loss(eps) train_step = self._optimizer.minimize(loss) self._log["iter_err"] = [] self._log["epoch_err"] = [] self._sess.run(tf.global_variables_initializer()) bar = ProgressBar(max_value=epoch, name="Epoch", start=False) if verbose >= 2: bar.start() for _ in range(epoch): epoch_err = 0 sub_bar = ProgressBar(max_value=n_iter, name="Iter", start=False) if verbose >= 2: sub_bar.start() for __ in range(n_iter): x_batch, y_batch = self._generator.gen(batch_size) iter_err = self._sess.run([loss, train_step], { self._tfx: x_batch, self._tfy: y_batch, })[0] self._log["iter_err"].append(iter_err) epoch_err += iter_err if verbose >= 2: sub_bar.update() self._log["epoch_err"].append(epoch_err / n_iter) if verbose >= 1: self._verbose() if verbose >= 2: bar.update()
def fit(self, x, n_clusters=None, epoch=None, norm=None, animation_params=None): if n_clusters is None: n_clusters = self._params["n_clusters"] if epoch is None: epoch = self._params["epoch"] if norm is not None: self._params["norm"] = norm *animation_properties, animation_params = self._get_animation_params(animation_params) x = np.atleast_2d(x) arange = np.arange(n_clusters)[..., None] x_high_dim, labels_cache, counter = x[:, None, ...], None, 0 self._centers = x[np.random.permutation(len(x))[:n_clusters]] bar = ProgressBar(max_value=epoch, name="KMeans") ims = [] for i in range(epoch): labels = self.predict(x_high_dim, high_dim=True) if labels_cache is None: labels_cache = labels else: if np.all(labels_cache == labels): bar.update(epoch) break else: labels_cache = labels for j, indices in enumerate(labels == arange): self._centers[j] = np.average(x[indices], axis=0) counter += 1 animation_params["extra"] = self._centers self._handle_animation(i, x, labels, ims, animation_params, *animation_properties) bar.update() self._counter = counter self._handle_mp4(ims, animation_properties)
def fit(self, x, y, sample_weight=None, c=None, lr=None, epoch=None, tol=None): if sample_weight is None: sample_weight = self._params["sw"] if c is None: c = self._params["c"] if lr is None: lr = self._params["lr"] if epoch is None: epoch = self._params["epoch"] if tol is None: tol = self._params["tol"] x, y = np.atleast_2d(x), np.asarray(y) if sample_weight is None: sample_weight = np.ones(len(y)) else: sample_weight = np.asarray(sample_weight) * len(y) self._w = np.zeros(x.shape[1]) self._b = 0 bar = ProgressBar(max_value=epoch, name="LinearSVM") for _ in range(epoch): _err = (1 - self.predict(x, get_raw_results=True) * y) * sample_weight _indices = np.random.permutation(len(y)) _idx = _indices[np.argmax(_err[_indices])] if _err[_idx] <= tol: bar.update(epoch) return _delta = lr * c * y[_idx] * sample_weight[_idx] self._w *= 1 - lr self._w += _delta * x[_idx] self._b += _delta bar.update()
def _get_prediction(self, x, name=None, batch_size=1e6, verbose=None): if verbose is None: verbose = self.verbose single_batch = int(batch_size / np.prod(x.shape[1:])) if not single_batch: single_batch = 1 if single_batch >= len(x): return self._get_activations(x, predict=True).pop() epoch = int(len(x) / single_batch) if not len(x) % single_batch: epoch += 1 name = "Prediction" if name is None else "Prediction ({})".format(name) sub_bar = ProgressBar(min_value=0, max_value=epoch, name=name) if verbose >= NNVerbose.METRICS: sub_bar.start() rs, count = [self._get_activations(x[:single_batch], predict=True).pop()], single_batch if verbose >= NNVerbose.METRICS: sub_bar.update() while count < len(x): count += single_batch if count >= len(x): rs.append(self._get_activations(x[count-single_batch:], predict=True).pop()) else: rs.append(self._get_activations(x[count-single_batch:count], predict=True).pop()) if verbose >= NNVerbose.METRICS: sub_bar.update() return np.vstack(rs)
def opt(self, epoch=None, eps=None): """ Main procedure of opt :param epoch : Maximum iteration ; default: 1000 :param eps : Tolerance ; default: 1e-8 :return : x*, f*, n_iter, feva """ if epoch is None: epoch = self._params["epoch"] if eps is None: eps = self._params["eps"] self._func.refresh_cache(self._x) self._loss_cache, self._grad_cache = self.func(0), self.func(1) bar = ProgressBar(max_value=epoch, name="Opt") bar.start() for _ in range(epoch): self.iter += 1 with warnings.catch_warnings(): warnings.filterwarnings("error") try: if self._core(eps): break self.log.append(self._loss_cache) except RuntimeWarning as err: print("\n", err, "\n") break except np.linalg.linalg.LinAlgError as err: print("\n", err, "\n") break bar.update() bar.update() bar.terminate() return self._x, self._loss_cache, self.iter, self.feva
def fit(self, x, y, sample_weight=None, tree=None, epoch=None, feature_bound=None, **kwargs): if sample_weight is None: sample_weight = self._params["sw"] if tree is None: tree = self._params["tree"] if epoch is None: epoch = self._params["epoch"] if feature_bound is None: feature_bound = self._params["feature_bound"] x, y = np.atleast_2d(x), np.asarray(y) n_sample = len(y) self._tree = tree bar = ProgressBar(max_value=epoch, name="RF") for _ in range(epoch): tmp_tree = RandomForest._cvd_trees[tree](**kwargs) _indices = np.random.randint(n_sample, size=n_sample) if sample_weight is None: _local_weight = None else: _local_weight = sample_weight[_indices] _local_weight /= _local_weight.sum() tmp_tree.fit(x[_indices], y[_indices], sample_weight=_local_weight, feature_bound=feature_bound) self._trees.append(deepcopy(tmp_tree)) bar.update()
def main(clf): dat_path = os.path.join("_Data", "dataset.dat") gen_dataset(dat_path) with open(dat_path, "rb") as _file: x, y = pickle.load(_file) x = [" ".join(sentence) for sentence in x] _indices = np.random.permutation(len(x)) x = list(np.array(x)[_indices]) y = list(np.array(y)[_indices]) data_len = len(x) batch_size = math.ceil(data_len * 0.1) _acc_lst, y_results = [], [] bar = ProgressBar(max_value=10, name=str(clf)) bar.start() for i in range(10): _next = (i + 1) * batch_size if i != 9 else data_len x_train = x[:i * batch_size] + x[(i + 1) * batch_size:] y_train = y[:i * batch_size] + y[(i + 1) * batch_size:] x_test, y_test = x[i * batch_size:_next], y[i * batch_size:_next] count_vec = CountVectorizer() counts_train = count_vec.fit_transform(x_train) x_test = count_vec.transform(x_test) tfidf_transformer = TfidfTransformer() x_train = tfidf_transformer.fit_transform(counts_train) clf.fit(x_train, y_train) y_pred = clf.predict(x_test) _acc_lst.append(clf.acc(y_test, y_pred)) y_results.append([y_test, y_pred]) del x_train, y_train, x_test, y_test, y_pred bar.update() return _acc_lst, y_results
def fit(self, x, y, sample_weight=None, lr=None, epoch=None): if sample_weight is None: sample_weight = self._params["sw"] if lr is None: lr = self._params["lr"] if epoch is None: epoch = self._params["epoch"] x, y = np.atleast_2d(x), np.asarray(y) if sample_weight is None: sample_weight = np.ones(len(y)) else: sample_weight = np.asarray(sample_weight) * len(y) self._w = np.zeros(x.shape[1]) self._b = 0 bar = ProgressBar(max_value=epoch, name="Perceptron") bar.start() for _ in range(epoch): y_pred = self.predict(x) _err = (y_pred != y) * sample_weight _indices = np.random.permutation(len(y)) _idx = _indices[np.argmax(_err[_indices])] if y_pred[_idx] == y[_idx]: bar.update(epoch) return _delta = lr * y[_idx] * sample_weight[_idx] self._w += _delta * x[_idx] self._b += _delta bar.update()
def run(clf): acc_records, y_records = [], [] bar = ProgressBar(max_value=10, name="Main") bar.start() for _ in range(10): if clf == "Naive Bayes": _clf = SKMultinomialNB(alpha=0.1) elif clf == "Non-linear SVM": _clf = SKSVM() else: _clf = SKLinearSVM() rs = main(_clf) acc_records.append(rs[0]) y_records += rs[1] bar.update() acc_records = np.array(acc_records) * 100 plt.figure() plt.boxplot(acc_records, vert=False, showmeans=True) plt.show() from Util.DataToolkit import DataToolkit idx = np.argmax(acc_records) # type: int print( metrics.classification_report(y_records[idx][0], y_records[idx][1], target_names=np.load( os.path.join("_Data", "LABEL_DIC.npy")))) toolkit = DataToolkit(acc_records[np.argmax(np.average(acc_records, axis=1))]) print("Acc Mean : {:8.6}".format(toolkit.mean)) print("Acc Variance : {:8.6}".format(toolkit.variance)) print("Done")
def _get_prediction(self, x, name=None, batch_size=1e6, verbose=None): if verbose is None: verbose = self.verbose single_batch = batch_size / np.prod(x.shape[1:]) #prod 将(2,)->2 single_batch = int(single_batch) if not single_batch: single_batch = 1 if single_batch >= len(x): return self._get_activations(x).pop() #返回pop epoch = int(len(x) / single_batch) if not len(x) % single_batch: epoch += 1 name = "Prediction" if name is None else "Prediction ({})".format(name) sub_bar = ProgressBar(max_value=epoch, name=name, start=False) if verbose >= NNVerbose.METRICS: sub_bar.start() rs, count = [self._get_prediction(x[:single_batch]).pop() ], single_batch if verbose >= NNVerbose.METRICS: sub_bar.update() """ count先加然后判断如果count>len(x),则训练剩余部分:[ count减去single_batch:] 如果小于len(x),则训练count个 """ while count < len(x): count += single_batch if count >= len(x): rs.append(self._get_prediction(x[count - single_batch:]).pop()) else: rs.append( self._get_prediction(x[count - single_batch:count]).pop()) if verbose >= NNVerbose.METRICS: sub_bar.update() return np.vstack(rs) #
def fit(self, x, y, sample_weight=None, c=None, lr=None, epoch=None, tol=None): if sample_weight is None: sample_weight = self._params["sw"] if c is None: c = self._params["c"] if lr is None: lr = self._params["lr"] if epoch is None: epoch = self._params["epoch"] if tol is None: tol = self._params["tol"] if sample_weight is None: sample_weight = tf.constant(np.ones(len(y)), dtype=tf.float32, name="sample_weight") else: sample_weight = tf.constant(np.asarray(sample_weight) * len(y), dtype=tf.float32, name="sample_weight") x, y = tf.constant(x, dtype=tf.float32), tf.constant(y, dtype=tf.float32) self._w = tf.Variable(np.zeros(x.shape[1]), dtype=tf.float32, name="w") self._b = tf.Variable(0., dtype=tf.float32, name="b") y_pred = self.predict(x, True, False) cost = tf.reduce_sum(tf.maximum(1 - y * y_pred, 0) * sample_weight) + c * tf.nn.l2_loss(self._w) train_step = tf.train.AdamOptimizer(learning_rate=lr).minimize(cost) self._sess.run(tf.global_variables_initializer()) bar = ProgressBar(max_value=epoch, name="TFLinearSVM") for _ in range(epoch): _l = self._sess.run([cost, train_step])[0] if _l < tol: bar.update(epoch) break bar.update()
def _get_prediction(self, x, name=None, batch_size=1e6, verbose=None): if verbose is None: verbose = self.verbose single_batch = batch_size / np.prod(x.shape[1:]) # type: float single_batch = int(single_batch) if not single_batch: single_batch = 1 if single_batch >= len(x): return self._sess.run(self._y_pred, {self._tfx: x}) epoch = int(len(x) / single_batch) if not len(x) % single_batch: epoch += 1 name = "Prediction" if name is None else "Prediction ({})".format(name) sub_bar = ProgressBar(max_value=epoch, name=name, start=False) if verbose >= NNVerbose.METRICS: sub_bar.start() rs = [self._sess.run(self._y_pred, {self._tfx: x[:single_batch]})] count = single_batch if verbose >= NNVerbose.METRICS: sub_bar.update() while count < len(x): count += single_batch if count >= len(x): rs.append( self._sess.run(self._y_pred, {self._tfx: x[count - single_batch:]})) else: rs.append( self._sess.run(self._y_pred, {self._tfx: x[count - single_batch:count]})) if verbose >= NNVerbose.METRICS: sub_bar.update() return np.vstack(rs)
def fit(self, x, n_clusters=None, epoch=None, norm=None): if n_clusters is None: n_clusters = self._params["n_clusters"] if epoch is None: epoch = self._params["epoch"] if norm is not None: self._params["norm"] = norm x = np.atleast_2d(x) arange = np.arange(n_clusters)[..., None] x_high_dim, labels_cache, counter = x[:, None, ...], None, 0 self._centers = x[np.random.permutation(len(x))[:n_clusters]] bar = ProgressBar(max_value=epoch, name="KMeans") bar.start() for _ in range(epoch): labels = self.predict(x_high_dim, high_dim=True) if labels_cache is None: labels_cache = labels elif np.all(labels_cache == labels): bar.update(epoch) break for i, indices in enumerate(labels == arange): self._centers[i] = np.average(x[indices], axis=0) counter += 1 bar.update() self._counter = counter
def fit(self, x, y, sample_weight=None, clf=None, epoch=None, eps=None, **kwargs): if sample_weight is None: sample_weight = self._params["sample_weight"] if clf is None: clf = self._params["clf"] if epoch is None: epoch = self._params["epoch"] if eps is None: eps = self._params["eps"] x, y = np.atleast_2d(x), np.asarray(y) if clf is None: clf = "Cart" kwargs = {"max_depth": 1} self._kwarg_cache = kwargs self._clf = clf if sample_weight is None: sample_weight = np.ones(len(y)) / len(y) else: sample_weight = np.asarray(sample_weight) bar = ProgressBar(max_value=epoch, name="AdaBoost") for _ in range(epoch): tmp_clf = AdaBoost._weak_clf[clf](**kwargs) tmp_clf.fit(x, y, sample_weight=sample_weight) y_pred = tmp_clf.predict(x) em = min(max((y_pred != y).astype(np.int8).dot(sample_weight[..., None])[0], eps), 1 - eps) am = 0.5 * log(1 / em - 1) sample_weight *= np.exp(-am * y * y_pred) sample_weight /= np.sum(sample_weight) self._clfs.append(deepcopy(tmp_clf)) self._clfs_weights.append(am) bar.update()
def _get_prediction(self, x, name=None, batch_size=1e6, verbose=None, out_of_sess=False, idx=-1): if verbose is None: verbose = self.verbose single_batch = int(batch_size / np.prod(x.shape[1:])) if not single_batch: single_batch = 1 _y_pred = self._y_pred if idx == -1 else self.get_rs(self._tfx, idx=idx) if single_batch >= len(x): if not out_of_sess: return _y_pred.eval(feed_dict={self._tfx: x}) with self._sess.as_default(): return self.get_rs(x, idx=idx).eval(feed_dict={self._tfx: x}) epoch = int(len(x) / single_batch) if not len(x) % single_batch: epoch += 1 name = "Prediction" if name is None else "Prediction ({})".format(name) sub_bar = ProgressBar(min_value=0, max_value=epoch, name=name) if verbose >= NNVerbose.METRICS: sub_bar.start() if not out_of_sess: rs = [_y_pred.eval(feed_dict={self._tfx: x[:single_batch]})] else: rs = [self.get_rs(x[:single_batch], idx=idx)] count = single_batch if verbose >= NNVerbose.METRICS: sub_bar.update() while count < len(x): count += single_batch if count >= len(x): if not out_of_sess: rs.append( _y_pred.eval( feed_dict={self._tfx: x[count - single_batch:]})) else: rs.append(self.get_rs(x[count - single_batch:], idx=idx)) else: if not out_of_sess: rs.append( _y_pred.eval(feed_dict={ self._tfx: x[count - single_batch:count] })) else: rs.append( self.get_rs(x[count - single_batch:count], idx=idx)) if verbose >= NNVerbose.METRICS: sub_bar.update() if out_of_sess: with self._sess.as_default(): rs = [_rs.eval() for _rs in rs] return np.vstack(rs)
def gen_dicts(self, data, n_batch=None, include_label=True, predict=False, add_noises=False, shuffle=True, name=None, count=None): n_batch = self.n_batch if n_batch is None else int(n_batch) if name is not None: bar = ProgressBar(max_value=len(data) // n_batch, name=name) else: bar = None for batch in self.gen_batches(data, shuffle, n_batch): if bar is not None: bar.update() yield self.get_feed_dict(batch, include_label, predict, count=count)
def fit(self, x, y, sample_weight=None, c=None, lr=None, optimizer=None, batch_size=None, epoch=None, tol=None, animation_params=None): if sample_weight is None: sample_weight = self._params["sample_weight"] if c is None: c = self._params["c"] if lr is None: lr = self._params["lr"] if batch_size is None: batch_size = self._params["batch_size"] if epoch is None: epoch = self._params["epoch"] if tol is None: tol = self._params["tol"] if optimizer is None: optimizer = self._params["optimizer"] *animation_properties, animation_params = self._get_animation_params( animation_params) x, y = np.atleast_2d(x), np.asarray(y, dtype=np.float32) if sample_weight is None: sample_weight = np.ones(len(y)) else: sample_weight = np.asarray(sample_weight) * len(y) self._w = np.zeros(x.shape[1], dtype=np.float32) self._b = np.zeros(1, dtype=np.float32) self._model_parameters = [self._w, self._b] self._optimizer = OptFactory().get_optimizer_by_name( optimizer, self._model_parameters, lr, epoch) bar = ProgressBar(max_value=epoch, name="LinearSVM") ims = [] train_repeat = self._get_train_repeat(x, batch_size) for i in range(epoch): self._optimizer.update() l = self._batch_training(x, y, batch_size, train_repeat, sample_weight, c) if l < tol: bar.terminate() break self._handle_animation(i, x, y, ims, animation_params, *animation_properties) bar.update() self._handle_mp4(ims, animation_properties)
def fit(self, x, y, c=None, lr=None, batch_size=None, epoch=None, tol=None, optimizer=None, animation_params=None): if c is None: c = self._params["c"] if lr is None: lr = self._params["lr"] if batch_size is None: batch_size = self._params["batch_size"] if epoch is None: epoch = self._params["epoch"] if tol is None: tol = self._params["tol"] if optimizer is None: optimizer = self._params["optimizer"] *animation_properties, animation_params = self._get_animation_params( animation_params) x, y = np.atleast_2d(x), np.asarray(y) y_2d = y[..., None] self._w = tf.Variable(np.zeros([x.shape[1], 1]), dtype=tf.float32, name="w") self._b = tf.Variable(0., dtype=tf.float32, name="b") self._tfx = tf.placeholder(tf.float32, [None, x.shape[1]]) self._tfy = tf.placeholder(tf.float32, [None, 1]) self._y_pred_raw = tf.matmul(self._tfx, self._w) + self._b self._y_pred = tf.sign(self._y_pred_raw) loss = tf.reduce_sum(tf.nn.relu( 1 - self._tfy * self._y_pred_raw)) + c * tf.nn.l2_loss(self._w) train_step = TFOptFac().get_optimizer_by_name(optimizer, lr).minimize(loss) self._sess.run(tf.global_variables_initializer()) bar = ProgressBar(max_value=epoch, name="TFLinearSVM") ims = [] train_repeat = self._get_train_repeat(x, batch_size) for i in range(epoch): l = self._batch_training(x, y_2d, batch_size, train_repeat, loss, train_step) if l < tol: bar.terminate() break self._handle_animation(i, x, y, ims, animation_params, *animation_properties) bar.update() self._handle_mp4(ims, animation_properties)
def fit(self, x, y, c=None, lr=None, batch_size=None, epoch=None, tol=None, optimizer=None, animation_params=None): if c is None: c = self._params["c"] if lr is None: lr = self._params["lr"] if batch_size is None: batch_size = self._params["batch_size"] if epoch is None: epoch = self._params["epoch"] if tol is None: tol = self._params["tol"] if optimizer is None: optimizer = self._params["optimizer"] *animation_properties, animation_params = self._get_animation_params( animation_params) x, y = np.atleast_2d(x), np.asarray(y, dtype=np.float32) y_2d = y[..., None] self._w = Variable(torch.rand([x.shape[1], 1]), requires_grad=True) self._b = Variable(torch.Tensor([0.]), requires_grad=True) self._model_parameters = [self._w, self._b] self._optimizer = PyTorchOptFac().get_optimizer_by_name( optimizer, self._model_parameters, lr, epoch) x, y, y_2d = self._arr_to_variable(False, x, y, y_2d) loss_function = lambda _y, _y_pred: self._loss(_y, _y_pred, c) bar = ProgressBar(max_value=epoch, name="TorchLinearSVM") ims = [] train_repeat = self._get_train_repeat(x, batch_size) for i in range(epoch): self._optimizer.update() l = self.batch_training(x, y_2d, batch_size, train_repeat, loss_function) if l < tol: bar.terminate() break self._handle_animation(i, x, y, ims, animation_params, *animation_properties) bar.update() self._handle_mp4(ims, animation_properties)
def fit(self, x, y, sample_weight=None, clf=None, epoch=None, eps=None, **kwargs): if sample_weight is None: sample_weight = self._params["sample_weight"] if clf is None: clf = self._params["clf"] if epoch is None: epoch = self._params["epoch"] if eps is None: eps = self._params["eps"] x, y = np.atleast_2d(x), np.asarray(y) # 默认使用10个CART决策树桩作为弱分类器 if clf is None: clf = "Cart" kwargs = {"max_depth": 1} self._clf = clf self._kwarg_cache = kwargs if sample_weight is None: sample_weight = np.ones(len(y)) / len(y) else: sample_weight = np.array(sample_weight) bar = ProgressBar(max_value=epoch, name="AdaBoost") # AdaBoost算法的主循环,epoch为迭代次数 for _ in range(epoch): # 根据样本权重训练弱分类器 tmp_clf = AdaBoost._weak_clf[clf](**kwargs) tmp_clf.fit(x, y, sample_weight) # 调用弱分类器的predict方法进行预测 y_pred = tmp_clf.predict(x) # 计算加权错误率,考虑到数值的稳定性,在边值情况加了一个小的常熟 em = min(max((y_pred != y).dot(sample_weight[:, None])[0], eps), 1 - eps) # 计算该弱分类器的话语权 am = 0.5 * log(1 / em - 1) # 更新样本权重并利用deepcopy将该弱分类器记录在列表总 sample_weight *= np.exp(-am * y * y_pred) sample_weight /= np.sum(sample_weight) self._clfs.append(deepcopy(tmp_clf)) self._clfs_weights.append(am) bar.update() self._clfs_weights = np.array(self._clfs_weights, dtype=np.float32)
def fit(self, x=None, y=None, x_test=None, y_test=None, n_epoch=None, n_batch=None, print_settings=True): if not self.settings_inited: self.init_all_settings() if n_epoch is not None: self.n_epoch = n_epoch if n_batch is not None: self.n_batch = n_batch x, y, x_test, y_test = self.prepare_data(x, y, x_test, y_test) self.build_model(x, y, x_test, y_test, print_settings) count = 0 with self._sess.as_default() as sess: # Prepare i = 0 train_writer, test_writer, train_merge_op, test_merge_op = self._prepare_tensorboard_verbose( sess) bar = ProgressBar(max_value=self.n_epoch, name="Main") train_info = [train_merge_op, train_writer] test_info = [test_merge_op, test_writer] train_metric, test_metric = self._get_metrics(x, y, x_test, y_test) if self.tensorboard_verbose > 0: self._do_tensorboard_verbose(count, train_info, test_info, train_metric, test_metric) # Train while i < self.n_epoch: for local_dict in self.gen_dicts(self.train_data, count=count): count += 1 self._sess.run(self._train_step, local_dict) if self.snapshot_step > 0 and count % self.snapshot_step == 0: if self.tensorboard_verbose > 0: train_metric, test_metric = self._get_metrics( x, y, x_test, y_test) self._do_tensorboard_verbose( count, train_info, test_info, train_metric, test_metric) i += 1 if self.snapshot_step == 0 and self.tensorboard_verbose > 0: self._do_tensorboard_verbose(count, train_info, test_info, train_metric, test_metric) if bar is not None: bar.update()
def fit(self, x, y, sample_weight=None, c=None, lr=None, epoch=None, tol=None, animation_params=None): if sample_weight is None: sample_weight = self._params["sample_weight"] if c is None: c = self._params["c"] if lr is None: lr = self._params["lr"] if epoch is None: epoch = self._params["epoch"] if tol is None: tol = self._params["tol"] *animation_properties, animation_params = self._get_animation_params( animation_params) x, y = np.atleast_2d(x), np.asarray(y) if sample_weight is None: sample_weight = np.ones(len(y)) else: sample_weight = np.asarray(sample_weight) * len(y) self._w = np.zeros(x.shape[1]) self._b = 0 ims = [] bar = ProgressBar(max_value=epoch, name="LinearSVM") for i in range(epoch): err = (1 - self.predict(x, get_raw_results=True) * y) * sample_weight indices = np.random.permutation(len(y)) idx = indices[np.argmax(err[indices])] if err[idx] <= tol: bar.update(epoch) break delta = lr * c * y[idx] * sample_weight[idx] self._w *= 1 - lr self._w += delta * x[idx] self._b += delta self._handle_animation(i, x, y, ims, animation_params, *animation_properties) bar.update() self._handle_mp4(ims, animation_properties)
def predict(self, x): x = NNDist._transfer_x(np.asarray(x)) rs = [] batch_size = math.floor(1e6 / np.prod(x.shape[1:])) epoch = int(math.ceil(len(x) / batch_size)) output = self._sess.graph.get_tensor_by_name(self._output) bar = ProgressBar(max_value=epoch, name="Predict") bar.start() for i in range(epoch): if i == epoch - 1: rs.append(self._sess.run(output, { self._entry: x[i * batch_size:] })) else: rs.append(self._sess.run(output, { self._entry: x[i * batch_size:(i + 1) * batch_size] })) bar.update() return np.vstack(rs).astype(np.float32)
def predict(self, x, get_raw_results=False, **kwargs): x = NNDist._transfer_x(np.asarray(x)) rs = [] batch_size = floor(1e6 / np.prod(x.shape[1:])) epoch = int(ceil(len(x) / batch_size)) output = self._sess.graph.get_tensor_by_name(self._output) bar = ProgressBar(max_value=epoch, name="Predict") for i in range(epoch): if i == epoch - 1: rs.append(self._sess.run(output, { self._entry: x[i * batch_size:] })) else: rs.append(self._sess.run(output, { self._entry: x[i * batch_size:(i + 1) * batch_size] })) bar.update() y_pred = np.vstack(rs).astype(np.float32) return y_pred if get_raw_results else np.argmax(y_pred, axis=1)
def predict(self, x): self._create_graph() x, rs = np.atleast_2d(x).astype(np.float32), [] with tf.Session() as sess: flattened_tensor = sess.graph.get_tensor_by_name(self._output) print("Predicting...") batch_size = math.floor(1e6 / np.prod(x.shape[1:])) epoch = math.ceil(len(x) / batch_size) # type: int bar = ProgressBar(max_value=epoch, name="Predict") for i in range(epoch): if i == epoch - 1: rs.append(sess.run(flattened_tensor, { self._entry: x[i*batch_size:] })) else: rs.append(sess.run(flattened_tensor, { self._entry: x[i*batch_size:(i+1)*batch_size] })) bar.update() return np.vstack(rs).astype(np.float32)
def fit(self, x, y, sample_weight=None, lr=None, epoch=None, animation_params=None): if sample_weight is None: sample_weight = self._params["sample_weight"] if lr is None: lr = self._params["lr"] if epoch is None: epoch = self._params["epoch"] *animation_properties, animation_params = self._get_animation_params( animation_params) x, y = np.atleast_2d(x), np.asarray(y) if sample_weight is None: sample_weight = np.ones(len(y)) else: sample_weight = np.asarray(sample_weight) * len(y) self._w = np.zeros(x.shape[1]) self._b = 0 ims = [] bar = ProgressBar(max_value=epoch, name="Perceptron") for i in range(epoch): y_pred = self.predict(x) _err = (y_pred != y) * sample_weight _indices = np.random.permutation(len(y)) _idx = _indices[np.argmax(_err[_indices])] if y_pred[_idx] == y[_idx]: bar.update(epoch) break _delta = lr * y[_idx] * sample_weight[_idx] self._w += _delta * x[_idx] self._b += _delta self._handle_animation(i, x, y, ims, animation_params, *animation_properties) bar.update() self._handle_mp4(ims, animation_properties)
def fit(self, x, y, sample_weight=None, lr=None, epoch=None, animation_params=None): if sample_weight is None: sample_weight = self._params["sample_weight"] if lr is None: lr = self._params["lr"] if epoch is None: epoch = self._params["epoch"] *animation_properties, animation_params = self._get_animation_params( animation_params) x, y = np.atleast_2d(x), np.asarray(y) if sample_weight is None: sample_weight = np.ones(len(y)) else: sample_weight = np.asarray(sample_weight) * len(y) self._w = np.random.random(x.shape[1]) self._b = 0. ims = [] bar = ProgressBar(max_value=epoch, name="Perceptron") for i in range(epoch): y_pred = self.predict(x, True) err = -y * y_pred * sample_weight idx = np.argmax(err) if err[idx] < 0: bar.terminate() break w_norm = np.linalg.norm(self._w) delta = lr * y[idx] * sample_weight[idx] / w_norm self._w += delta * (x[idx] - y_pred[idx] * self._w / w_norm**2) self._b += delta self._handle_animation(i, x, y, ims, animation_params, *animation_properties) bar.update() self._handle_mp4(ims, animation_properties)
def fit(self, x, y, sample_weight=None, kernel=None, epoch=None, x_test=None, y_test=None, metrics=None, animation_params=None, **kwargs): if sample_weight is None: sample_weight = self._params["sample_weight"] # type: list if kernel is None: kernel = self._params["kernel"] if epoch is None: epoch = self._params["epoch"] if x_test is None: x_test = self._params["x_test"] # type: list if y_test is None: y_test = self._params["y_test"] # type: list if metrics is None: metrics = self._params["metrics"] # type: list *animation_properties, animation_params = self._get_animation_params(animation_params) self._x, self._y = np.atleast_2d(x), np.asarray(y) if kernel == "poly": _p = kwargs.get("p", self._params["p"]) self._kernel_name = "Polynomial" self._kernel_param = "degree = {}".format(_p) self._kernel = lambda _x, _y: KernelBase._poly(_x, _y, _p) elif kernel == "rbf": _gamma = kwargs.get("gamma", 1 / self._x.shape[1]) self._kernel_name = "RBF" self._kernel_param = r"$\gamma = {:8.6}$".format(_gamma) self._kernel = lambda _x, _y: KernelBase._rbf(_x, _y, _gamma) else: raise NotImplementedError("Kernel '{}' has not defined".format(kernel)) if sample_weight is None: sample_weight = np.ones(len(y)) else: sample_weight = np.asarray(sample_weight) * len(y) self._alpha, self._w, self._prediction_cache = ( np.zeros(len(x)), np.zeros(len(x)), np.zeros(len(x))) self._gram = self._kernel(self._x, self._x) self._b = 0 self._prepare(sample_weight, **kwargs) fit_args, logs, ims = [], [], [] for name, arg in zip(self._fit_args_names, self._fit_args): if name in kwargs: arg = kwargs[name] fit_args.append(arg) if self._do_log: if metrics is not None: self.get_metrics(metrics) test_gram = None if x_test is not None and y_test is not None: x_cv, y_cv = np.atleast_2d(x_test), np.asarray(y_test) test_gram = self._kernel(self._x, x_cv) else: x_cv, y_cv = self._x, self._y else: y_cv = test_gram = None if self._is_torch: y_cv, self._x, self._y = self._torch_transform(y_cv) bar = ProgressBar(max_value=epoch, name=str(self)) for i in range(epoch): if self._fit(sample_weight, *fit_args): bar.terminate() break if self._do_log and metrics is not None: local_logs = [] for metric in metrics: if test_gram is None: if self._is_torch: local_y = self._y.data.numpy() else: local_y = self._y local_logs.append(metric(local_y, np.sign(self._prediction_cache))) else: if self._is_torch: local_y = y_cv.data.numpy() else: local_y = y_cv local_logs.append(metric(local_y, self.predict(test_gram, gram_provided=True))) logs.append(local_logs) self._handle_animation(i, self._x, self._y, ims, animation_params, *animation_properties) bar.update() self._handle_mp4(ims, animation_properties) return logs
def fit(self, x, y, lr=None, epoch=None, batch_size=None, train_rate=None, optimizer=None, metrics=None, record_period=None, verbose=None, preview=None): if lr is None: lr = self._params["lr"] if epoch is None: epoch = self._params["epoch"] if optimizer is None: optimizer = self._params["optimizer"] if batch_size is None: batch_size = self._params["batch_size"] if train_rate is None: train_rate = self._params["train_rate"] if metrics is None: metrics = self._params["metrics"] if record_period is None: record_period = self._params["record_period"] if verbose is None: verbose = self._params["verbose"] if preview is None: preview = self._params["preview"] x = NN._transfer_x(x) self.verbose = verbose self._optimizer = OptFactory().get_optimizer_by_name(optimizer, lr) self._tfx = tf.placeholder(tf.float32, shape=[None, *x.shape[1:]]) self._tfy = tf.placeholder(tf.float32, shape=[None, y.shape[1]]) if train_rate is not None: train_rate = float(train_rate) train_len = int(len(x) * train_rate) shuffle_suffix = np.random.permutation(int(len(x))) x, y = x[shuffle_suffix], y[shuffle_suffix] x_train, y_train = x[:train_len], y[:train_len] x_test, y_test = x[train_len:], y[train_len:] else: x_train = x_test = x y_train = y_test = y y_train_classes = np.argmax(y_train, axis=1) y_test_classes = np.argmax(y_test, axis=1) if metrics is None: metrics = [] self._metrics = self.get_metrics(metrics) self._metric_names = [_m.__name__ for _m in metrics] self._logs = { name: [[] for _ in range(len(metrics) + 1)] for name in ("Train", "Test") } bar = ProgressBar(max_value=max(1, epoch // record_period), name="Epoch", start=False) if self.verbose >= NNVerbose.EPOCH: bar.start() if preview: self._preview() args = ((x_train, y_train, y_train_classes, x_test, y_test, y_test_classes, self.verbose >= NNVerbose.METRICS_DETAIL), (None, None, x_train, y_train, y_train_classes, x_test, y_test, y_test_classes, self.verbose >= NNVerbose.METRICS)) train_repeat = self._get_train_repeat(x, batch_size) with self._sess.as_default() as sess: self._y_pred = self._get_rs(self._tfx) self._inner_y = self._get_rs(self._tfx, predict=False) self._loss = self._layers[-1].calculate(self._tfy, self._inner_y) self._train_step = self._optimizer.minimize(self._loss) sess.run(tf.global_variables_initializer()) for counter in range(epoch): if self.verbose >= NNVerbose.ITER and counter % record_period == 0: sub_bar = ProgressBar( max_value=train_repeat * record_period - 1, name="Iteration") else: sub_bar = None self.batch_training(x_train, y_train, batch_size, train_repeat, self._loss, self._train_step, sub_bar, *args[0]) if (counter + 1) % record_period == 0: self._batch_work(*args[1]) if self.verbose >= NNVerbose.EPOCH: bar.update(counter // record_period + 1)
def fit(self, x=None, y=None, lr=0.01, epoch=10, batch_size=128, train_rate=None, verbose=0, metrics=None, record_period=100): self.verbose = verbose self._optimizer = Adam(lr) self._tfx = tf.placeholder(tf.float32, shape=[None, x.shape[1]]) self._tfy = tf.placeholder(tf.float32, shape=[None, y.shape[1]]) if train_rate is not None: train_rate = float(train_rate) train_len = int(len(x) * train_rate) shuffle_suffix = np.random.permutation(int(len(x))) x, y = x[shuffle_suffix], y[shuffle_suffix] x_train, y_train = x[:train_len], y[:train_len] x_test, y_test = x[train_len:], y[train_len:] else: x_train = x_test = x y_train = y_test = y train_len = len(x_train) batch_size = min(batch_size, train_len) do_random_batch = train_len >= batch_size train_repeat = int(train_len / batch_size) + 1 self._metrics = ["acc"] if metrics is None else metrics for i, metric in enumerate(self._metrics): if isinstance(metric, str): self._metrics[i] = self._available_metrics[metric] self._metric_names = [_m.__name__ for _m in self._metrics] self._logs = { name: [[] for _ in range(len(self._metrics) + 1)] for name in ("train", "test") } bar = ProgressBar(max_value=max(1, epoch // record_period), name="Epoch", start=False) if self.verbose >= NNVerbose.EPOCH: bar.start() with self._sess.as_default() as sess: # Define session self._cost = self.get_rs(self._tfx, self._tfy) self._y_pred = self.get_rs(self._tfx) self._train_step = self._optimizer.minimize(self._cost) sess.run(tf.global_variables_initializer()) # Train sub_bar = ProgressBar(max_value=train_repeat * record_period - 1, name="Iteration", start=False) for counter in range(epoch): if self.verbose >= NNVerbose.EPOCH and counter % record_period == 0: sub_bar.start() for _i in range(train_repeat): if do_random_batch: batch = np.random.choice(train_len, batch_size) x_batch, y_batch = x_train[batch], y_train[batch] else: x_batch, y_batch = x_train, y_train self._train_step.run(feed_dict={ self._tfx: x_batch, self._tfy: y_batch }) if self.verbose >= NNVerbose.EPOCH: if sub_bar.update( ) and self.verbose >= NNVerbose.METRICS_DETAIL: self._append_log(x_train, y_train, "train") self._append_log(x_test, y_test, "test") self._print_metric_logs("train") self._print_metric_logs("test") if self.verbose >= NNVerbose.EPOCH: sub_bar.update() if (counter + 1) % record_period == 0: self._append_log(x_train, y_train, "train") self._append_log(x_test, y_test, "test") if self.verbose >= NNVerbose.METRICS: self._print_metric_logs("train") self._print_metric_logs("test") if self.verbose >= NNVerbose.EPOCH: bar.update(counter // record_period + 1) sub_bar = ProgressBar( max_value=train_repeat * record_period - 1, name="Iteration", start=False)