Beispiel #1
0
def get_optimizer(optimizer):
    """获取使用的优化器实例.

    Arguments:
        optimizer: str or classicML.optimizers.Optimizer 实例,
            优化器.

    Raises:
        AttributeError: 模型编译的参数输入错误.
    """
    if isinstance(optimizer, str):
        if optimizer in ('gd', 'gradient_descent'):
            return optimizers.GradientDescent()
        elif optimizer in ('newton', 'newton_method'):
            return optimizers.NewtonMethod()
        elif optimizer in ('sgd', 'stochastic_gradient_descent'):
            return optimizers.StochasticGradientDescent()
        elif optimizer == 'adam':
            return optimizers.Adam()
        elif optimizer == 'rbf':
            return optimizers.RadialBasisFunctionOptimizer()
        elif optimizer == 'SMO':
            return optimizers.SequentialMinimalOptimization()
        else:
            CLASSICML_LOGGER.error('优化器调用错误')
            raise AttributeError
    elif isinstance(optimizer, optimizers.Optimizer):
        return optimizer
    else:
        CLASSICML_LOGGER.error('优化器调用错误')
        raise AttributeError
    def predict(self, x):
        """使用分类器进行预测.

        Arguments:
            x: numpy.ndarray, array-like, 特征数据.

        Returns:
            SupportVectorClassifier预测的张量数组.

        Raises:
            ValueError: 模型没有训练的错误.
        """
        if self.is_trained is False and self.is_loaded is False:
            CLASSICML_LOGGER.error('模型没有训练')
            raise ValueError('你必须先进行训练')

        number_of_sample = x.shape[0]
        y_pred = np.ones((number_of_sample, ), dtype=int)

        for sample in range(number_of_sample):
            kappa = self.kernel(x[sample], self.support_vector)
            fx = np.matmul(
                (self.support_alpha.reshape(-1, 1) * self.support_y).T,
                kappa.T) + self.b
            if fx < 0:
                y_pred[sample] = -1

        return y_pred
    def save_weights(self, filepath):
        """将模型权重保存为一个HDF5文件.

        Arguments:
            filepath: str, 权重文件保存的路径.

        Raises:
            TypeError: 模型权重保存失败.

        Notes:
            模型将不会保存关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath,
            mode='w',
            model_name='AveragedOneDependentEstimator')
        # 保存模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            compile_ds.attrs['smoothing'] = self.smoothing
            compile_ds.attrs['m'] = self.m
            weights_ds.attrs['_attribute_list'] = np.void(
                dumps(self._attribute_list))
        except TypeError:
            CLASSICML_LOGGER.error('模型权重保存失败, 请检查文件是否损坏')
            raise TypeError('模型权重保存失败')
def plot_support_vector_classifier(svc, x, y, x_label=None, y_label=None):
    """可视化支持向量分类器二维示意图.

    Arguments:
        svc: classicML.models.SVC, 支持向量分类器实例.
        x: numpy.ndarray, array-like, 特征数据.
        y: numpy.ndarray, array-like, 标签.
        x_label: str, default=None,
            横轴的标签.
        y_label: str, default=None,
            纵轴的标签.

    Raises:
        ValueError: 模型没有训练的错误.
    """
    if svc.is_trained is False and svc.is_loaded is False:
        CLASSICML_LOGGER.error('模型没有训练')
        raise ValueError('你必须先进行训练')

    ax = plt.subplot()
    _set_svc_axis(ax)

    # 绘制样本点和支持向量
    _plot_sample_and_support_scatter(x, y, svc.support)

    # 绘制决策边界
    CS = _plot_decision_boundary(svc)

    _svc_plot_config(CS, svc.kernel.name, svc.C, x_label, y_label)
    plt.show()
Beispiel #5
0
def get_kernel(kernel, gamma):
    """获取使用的核函数实例.

    Arguments:
        kernel: str or classicML.kernels.Kernel 实例,
            核函数.
        gamma: float, 核函数系数.

    Raises:
        AttributeError: 模型编译的参数输入错误.
    """
    if isinstance(kernel, str):
        if kernel == 'linear':
            return kernels.Linear()
        elif kernel == 'rbf':
            return kernels.RBF(gamma=gamma)
        elif kernel == 'gaussian':
            return kernels.Gaussian(gamma=gamma)
        elif kernel == 'poly':
            return kernels.Polynomial(gamma=gamma)
        elif kernel == 'sigmoid':
            return kernels.Sigmoid(gamma=gamma)
        else:
            CLASSICML_LOGGER.error('核函数调用错误')
            raise AttributeError
    elif isinstance(kernel, kernels.Kernel):
        return kernel
    else:
        CLASSICML_LOGGER.error('核函数调用错误')
        raise AttributeError
    def load_weights(self, filepath):
        """加载模型参数.

        Arguments:
            filepath: str, 权重文件加载的路径.

        Raises:
            KeyError: 模型权重加载失败.

        Notes:
            模型将不会加载关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath, mode='r', model_name='SupportVectorClassifier')
        # 加载模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            self.C = compile_ds.attrs['C']
            self.gamma = compile_ds.attrs['gamma']
            self.kernel = get_kernel(compile_ds.attrs['kernel'], self.gamma)
            self.tol = compile_ds.attrs['tol']

            self.support = weights_ds.attrs['support']
            self.support_vector = weights_ds.attrs['support_vector']
            self.support_alpha = weights_ds.attrs['support_alpha']
            self.support_y = weights_ds.attrs['support_y']
            self.b = weights_ds.attrs['b']
            # 标记加载完成
            self.is_loaded = True
        except KeyError:
            CLASSICML_LOGGER.error('模型权重加载失败, 请检查文件是否损坏')
            raise KeyError('模型权重加载失败')
Beispiel #7
0
def plot_bayes(bayes, x, y):
    """可视化朴素贝叶斯分类器或超父独依赖估计器的二维示意图.

    Arguments:
        bayes: classicML.models.NB or classicML.models.SPODE,
            朴素贝叶斯分类器或超父独依赖估计器实例.
        x: numpy.ndarray, array-like, 特征数据.
        y: numpy.ndarray, array-like, 标签.

    Raises:
        ValueError: 模型没有训练的错误.
    """
    if bayes.is_trained is False and bayes.is_loaded is False:
        CLASSICML_LOGGER.error('模型没有训练')
        raise ValueError('你必须先进行训练')

    ax = plt.subplot()
    _set_bayes_axis(ax)

    # 绘制背景分界图
    _plot_background(bayes, x)

    # 绘制样本点
    x_label, y_label = _plot_scatter(x, y)

    _bayes_plot_config(x_label, y_label)
    plt.show()
    def load_weights(self, filepath):
        """加载模型参数.

        Arguments:
            filepath: str, 权重文件加载的路径.

        Raises:
            KeyError: 模型权重加载失败.

        Notes:
            模型将不会加载关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath, mode='r', model_name='NaiveBayesClassifier')
        # 加载模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            self.smoothing = compile_ds.attrs['smoothing']
            self.p_0 = weights_ds.attrs['p_0']
            self.p_1 = weights_ds.attrs['p_1']
            self.pi_0 = loads(weights_ds.attrs['pi_0'].tobytes())
            self.pi_1 = loads(weights_ds.attrs['pi_1'].tobytes())

            # 标记加载完成
            self.is_loaded = True
        except KeyError:
            CLASSICML_LOGGER.error('模型权重加载失败, 请检查文件是否损坏')
            raise KeyError('模型权重加载失败')
    def save_weights(self, filepath):
        """将模型权重保存为一个HDF5文件.

        Arguments:
            filepath: str, 权重文件保存的路径.

        Raises:
            TypeError: 模型权重保存失败.

        Notes:
            模型将不会保存关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath, mode='w', model_name='NaiveBayesClassifier')
        # 保存模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            compile_ds.attrs['smoothing'] = self.smoothing
            weights_ds.attrs['p_0'] = self.p_0
            weights_ds.attrs['p_1'] = self.p_1
            weights_ds.attrs['pi_0'] = np.void(dumps(self.pi_0))
            weights_ds.attrs['pi_1'] = np.void(dumps(self.pi_1))
        except TypeError:
            CLASSICML_LOGGER.error('模型权重保存失败, 请检查文件是否损坏')
            raise TypeError('模型权重保存失败')
    def predict(self, x, probability=False):
        """使用朴素贝叶斯分类器进行预测.

        Arguments:
            x: numpy.ndarray or pandas.DataFrame, array-like, 特征数据.
            probability: bool, default=False,
                是否使用归一化的概率形式.

        Returns:
            NaiveBayesClassifier的预测结果,
            不使用概率形式将返回0或1的标签数组, 使用将返回反正例概率的数组.

        Raises:
            ValueError: 模型没有训练的错误.
        """
        if self.is_trained is False and self.is_loaded is False:
            CLASSICML_LOGGER.error('模型没有训练')
            raise ValueError('你必须先进行训练')

        # 避免下溢进行对数处理.
        p_0 = np.log(self.p_0)
        p_1 = np.log(self.p_1)

        y_pred = list()
        if len(x.shape) == 1:
            y_pred.append(self._predict(x, p_0, p_1, probability))
        else:
            for i in range(x.shape[0]):
                x_test = x.iloc[i, :]
                y_pred.append(self._predict(x_test, p_0, p_1, probability))

        return y_pred
Beispiel #11
0
def get_initializer(initializer, seed):
    """获取使用的初始化器实例.

    Arguments:
        initializer: str or classicML.initializers.Initializer 实例,
            初始化器.
        seed: int, 初始化器的随机种子.

    Raises:
        AttributeError: 模型编译的参数输入错误.
    """
    if isinstance(initializer, str):
        if initializer == 'random_normal':
            return initializers.RandomNormal(seed=seed)
        elif initializer == 'he_normal':
            return initializers.HeNormal(seed=seed)
        elif initializer in ('xavier_normal', 'glorot_normal'):
            return initializers.XavierNormal(seed=seed)
        elif initializer == 'rbf_normal':
            return initializers.RBFNormal(seed=seed)
        else:
            CLASSICML_LOGGER.error('初始化器调用错误')
            raise AttributeError
    elif isinstance(initializer, initializers.Initializer):
        return initializer
    elif initializer is None:
        return initializers.RandomNormal(seed=seed)
    else:
        CLASSICML_LOGGER.error('初始化器调用错误')
        raise AttributeError
    def save_weights(self, filepath):
        """将模型权重保存为一个HDF5文件.

        Arguments:
            filepath: str, 权重文件保存的路径.

        Raises:
            TypeError: 模型权重保存失败.

        Notes:
            模型将不会保存关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath, mode='w', model_name='SupportVectorClassifier')
        # 保存模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            compile_ds.attrs['C'] = self.C
            compile_ds.attrs['kernel'] = self.kernel.name
            compile_ds.attrs['gamma'] = self.gamma
            compile_ds.attrs['tol'] = self.tol

            weights_ds.attrs['support'] = self.support
            weights_ds.attrs['support_vector'] = self.support_vector
            weights_ds.attrs['support_alpha'] = self.support_alpha
            weights_ds.attrs['support_y'] = np.asarray(self.support_y,
                                                       dtype=np.float64)
            weights_ds.attrs['b'] = np.float64(self.b)
        except TypeError:
            CLASSICML_LOGGER.error('模型权重保存失败, 请检查文件是否损坏')
            raise TypeError('模型权重保存失败')
    def save_weights(self, filepath):
        """将模型权重保存为一个HDF5文件.

        Arguments:
            filepath: str, 权重文件保存的路径.

        Raises:
            TypeError: 模型权重保存失败.

        References:
            - [如何存储原始的二进制数据](https://docs.h5py.org/en/2.3/strings.html)

        Notes:
            模型将不会保存关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath, mode='w', model_name='DecisionTreeClassifier')
        # 保存模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            compile_ds.attrs['criterion'] = self.criterion
            if self.pruner is not None:
                compile_ds.attrs['pruning'] = self.pruner.name
            weights_ds.attrs['tree'] = np.void(dumps(self.tree))
        except TypeError:
            CLASSICML_LOGGER.error('模型权重保存失败, 请检查文件是否损坏')
            raise TypeError('模型权重保存失败')
    def predict(self, x):
        """使用决策树分类器进行预测.

        Arguments:
            x: numpy.ndarray or pandas.DataFrame, array-like,
                特征数据.

        Returns:
            DecisionTreeClassifier预测的结果.

        Raises:
            ValueError: 模型没有训练的错误.
        """
        if self.is_trained is False and self.is_loaded is False:
            CLASSICML_LOGGER.error('模型没有训练')
            raise ValueError('你必须先进行训练')

        # 修正数据类型.
        if isinstance(x, list):
            x = np.expand_dims(x, axis=0)
        elif isinstance(x, pd.DataFrame):
            x = x.values
        elif isinstance(x, pd.Series):
            x = np.expand_dims(x.values, axis=0)

        y_pred = list()
        for feature in x:
            y_pred.append(self._predict(feature, self.tree))

        return y_pred
    def load_weights(self, filepath):
        """加载模型参数.

        Arguments:
            filepath: str, 权重文件加载的路径.

        Raises:
            KeyError: 模型权重加载失败.

        Notes:
            模型将不会加载关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath, mode='r', model_name='DecisionTreeClassifier')
        # 加载模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            self.criterion = compile_ds.attrs['criterion']
            if self.pruner is not None:
                self.pruner = get_pruner(compile_ds.attrs['pruning'])

            self.tree = loads(weights_ds.attrs['tree'].tobytes())

            # 标记加载完成
            self.is_loaded = True
        except KeyError:
            CLASSICML_LOGGER.error('模型权重加载失败, 请检查文件是否损坏')
            raise KeyError('模型权重加载失败')
Beispiel #16
0
    def load_weights(self, filepath):
        """加载模型参数.

        Arguments:
            filepath: str, 权重文件加载的路径.

        Raises:
            KeyError: 模型权重加载失败.

        Notes:
            模型将不会加载关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath, mode='r', model_name='LogisticRegression')
        # 加载模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            self.optimizer = get_optimizer(compile_ds.attrs['optimizer'])
            self.loss = get_loss(compile_ds.attrs['loss'])
            self.metric = get_metric(compile_ds.attrs['metric'])
            self.beta = weights_ds.attrs['beta']
            # 标记加载完成
            self.is_loaded = True
        except KeyError:
            CLASSICML_LOGGER.error('模型权重加载失败, 请检查文件是否损坏')
            raise KeyError('模型权重加载失败')
Beispiel #17
0
    def save_weights(self, filepath):
        """将模型权重保存为一个HDF5文件.

        Arguments:
            filepath: str, 权重文件保存的路径.

        Raises:
            TypeError: 模型权重保存失败.

        Notes:
            模型将不会保存关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath, mode='w', model_name='LogisticRegression')
        # 保存模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            compile_ds.attrs['optimizer'] = self.optimizer.name
            compile_ds.attrs['loss'] = self.loss.name
            compile_ds.attrs['metric'] = self.metric.name
            weights_ds.attrs['beta'] = self.beta
        except TypeError:
            CLASSICML_LOGGER.error('模型权重保存失败, 请检查文件是否损坏')
            raise TypeError('模型权重保存失败')
    def predict(self, x, **kwargs):
        """使用平均独依赖估计器进行预测.

        Arguments:
            x: numpy.ndarray or pandas.DataFrame, array-like, 特征数据.

        Returns:
            AverageOneDependentEstimator的预测结果.

        Raises:
            ValueError: 模型没有训练的错误.
        """
        if self.is_trained is False and self.is_loaded is False:
            CLASSICML_LOGGER.error('模型没有训练')
            raise ValueError('你必须先进行训练')

        y_pred = list()
        if len(x.shape) == 1:
            y_pred.append(self._predict(x))
        else:
            for i in range(x.shape[0]):
                x_test = x.iloc[i, :]
                y_pred.append(self._predict(x_test))

        return y_pred
    def predict(self, x):
        """模型进行预测.

        Arguments:
            x: numpy.ndarray, array-like, 特征数据.

        Returns:
            LDA预测的标签张量.

        Raises:
            ValueError: 模型没有训练的错误.
        """
        if self.is_trained is False and self.is_loaded is False:
            CLASSICML_LOGGER.error('模型没有权重')
            raise ValueError('你必须先进行训练')

        coord = np.dot(x, self.w.T)

        center_0 = np.dot(self.w, self.mu_0.T)
        center_1 = np.dot(self.w, self.mu_1.T)

        y_pred = np.abs(coord - center_0) > np.abs(coord - center_1)
        y_pred = np.squeeze(y_pred)
        y_pred = y_pred.astype(int)

        return y_pred
    def load_weights(self, filepath):
        """加载模型参数.

        Arguments:
            filepath: str, 权重文件加载的路径.

        Raises:
            KeyError: 模型权重加载失败.

        Notes:
            模型将不会加载关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath,
            mode='r',
            model_name='AveragedOneDependentEstimator')
        # 加载模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            self.smoothing = compile_ds.attrs['smoothing']
            self.m = compile_ds.attrs['m']
            self._attribute_list = loads(
                weights_ds.attrs['_attribute_list'].tobytes())

            # 标记加载完成
            self.is_loaded = True
        except KeyError:
            CLASSICML_LOGGER.error('模型权重加载失败, 请检查文件是否损坏')
            raise KeyError('模型权重加载失败')
    def save_weights(self, filepath):
        """将模型权重保存为一个HDF5文件.

        Arguments:
            filepath: str, 权重文件保存的路径.

        Raises:
            TypeError: 模型权重保存失败.

        Notes:
            模型将不会保存关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath,
            mode='w',
            model_name='RadialBasisFunctionNetwork')
        # 保存模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            compile_ds.attrs['hidden_units'] = self.hidden_units
            compile_ds.attrs['optimizer'] = self.optimizer.name
            compile_ds.attrs['loss'] = self.loss.name
            compile_ds.attrs['metric'] = self.metric.name
            for parameter in self.parameters:
                weights_ds.attrs[parameter] = self.parameters[parameter]
        except TypeError:
            CLASSICML_LOGGER.error('模型权重保存失败, 请检查文件是否损坏')
            raise TypeError('模型权重保存失败')
    def load_weights(self, filepath):
        """加载模型参数.

        Arguments:
            filepath: str, 权重文件加载的路径.

        Raises:
            KeyError: 模型权重加载失败.

        Notes:
            模型将不会加载关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath,
            mode='r',
            model_name='RadialBasisFunctionNetwork')
        # 加载模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            self.hidden_units = compile_ds.attrs['hidden_units']
            self.optimizer = get_optimizer(compile_ds.attrs['optimizer'])
            self.loss = get_loss(compile_ds.attrs['loss'])
            self.metric = get_metric(compile_ds.attrs['metric'])
            for attr in weights_ds.attrs:
                self.parameters[attr] = weights_ds.attrs[attr]
            # 标记加载完成
            self.is_loaded = True
        except KeyError:
            CLASSICML_LOGGER.error('模型权重加载失败, 请检查文件是否损坏')
            raise KeyError('模型权重加载失败')
    def fit(self, x, y, x_validation=None, y_validation=None):
        """训练决策树分类器.

        Arguments:
            x: numpy.ndarray or pandas.DataFrame, array-like,
                特征数据.
            y: numpy.ndarray or pandas.DataFrame, array-like,
                标签.
            x_validation: numpy.ndarray or pandas.DataFrame, array-like,
                剪枝使用的验证特征数据.
            y_validation: numpy.ndarray or pandas.DataFrame, array-like,
                剪枝使用的验证标签.

        Returns:
            DecisionTreeClassifier实例.

        Raises:
            AttributeError: 没有验证集.
        """
        if isinstance(x, np.ndarray) and self.attribute_name is None:
            CLASSICML_LOGGER.warn(
                "属性名称缺失, 请使用pandas.DataFrame; 或检查 self.attributes_name")
        if (self.pruner is not None) and (x_validation is None
                                          or y_validation is None):
            CLASSICML_LOGGER.error("没有验证集, 无法对决策树进行剪枝")
            raise AttributeError('没有验证集')

        # 为特征数据添加属性信息.
        x = pd.DataFrame(x, columns=self.attribute_name)
        x.reset_index(drop=True, inplace=True)
        self.generator._x = x

        y = pd.Series(y)
        y.reset_index(drop=True, inplace=True)

        # 为验证数据添加属性信息.
        if x_validation is not None:
            x_validation = pd.DataFrame(x_validation,
                                        columns=self.attribute_name)
            x_validation.reset_index(drop=True, inplace=True)

            y_validation = pd.Series(y_validation)
            y_validation.reset_index(drop=True, inplace=True)

        # 没有使用权重文件, 则生成决策树分类器.
        if self.is_loaded is False:
            self.tree = self.generator(x, y)

        # 进行剪枝.
        if self.pruner:
            self.tree = self.pruner(x, y, x_validation, y_validation,
                                    self.tree)

        # 标记训练完成
        self.is_trained = True

        return self
Beispiel #24
0
def get_criterion(criterion):
    """获取使用的划分选择方式.

    Arguments:
        criterion: str,
            决策树学习的划分方式.

    Raises:
        AttributeError: 选择错误.
    """
    if criterion == 'gini':
        return tree.criteria.Gini()
    elif criterion == 'gain':
        return tree.criteria.Gain()
    elif criterion == 'entropy':
        return tree.criteria.Entropy()
    else:
        CLASSICML_LOGGER.error('选择错误')
        raise AttributeError
    def predict(self, x):
        """使用径向基函数网络进行预测.

        Arguments:
            x: numpy.ndarray, array-like, 特征数据.

        Returns:
            径向基函数网络预测的概率.

        Raises:
            ValueError: 模型没有训练的错误.
        """
        if self.is_trained is False and self.is_loaded is False:
            CLASSICML_LOGGER.error('模型没有训练')
            raise ValueError('你必须先进行训练')

        y_pred, _ = self.optimizer.forward(x, self.parameters)

        return y_pred
    def predict(self, x, probability=False):
        """使用超父独依赖估计器进行预测.

        Arguments:
            x: numpy.ndarray or pandas.DataFrame, array-like, 特征数据.
            probability: bool, default=False,
                是否使用归一化的概率形式.

        Returns:
            SuperParentOneDependentEstimator的预测结果,
            不使用概率形式将返回0或1的标签数组, 使用将返回反正例概率的数组.

        Raises:
            ValueError: 模型没有训练的错误.
        """
        if self.is_trained is False and self.is_loaded is False:
            CLASSICML_LOGGER.error('模型没有训练')
            raise ValueError('你必须先进行训练')

        y_pred = list()

        if len(x.shape) == 1:
            p_0, p_1 = self._predict(x)
            if probability:
                y_pred.append([p_0 / (p_0 + p_1), p_1 / (p_0 + p_1)])
            else:
                if p_0 > p_1:
                    y_pred.append(0)
                else:
                    y_pred.append(1)
        else:
            for i in range(x.shape[0]):
                x_test = x.iloc[i, :]
                p_0, p_1 = self._predict(x_test)
                if probability:
                    y_pred.append([p_0 / (p_0 + p_1), p_1 / (p_0 + p_1)])
                else:
                    if p_0 > p_1:
                        y_pred.append(0)
                    else:
                        y_pred.append(1)

        return y_pred
Beispiel #27
0
def get_pruner(pruning):
    """获取剪枝器.

    Arguments:
        pruning: str,
            决策树剪枝的方式.

    Raises:
        AttributeError: 选择错误.
    """
    if pruning == 'post':
        return tree.pruners.PostPruner()
    elif pruning == 'pre':
        return tree.pruners.PrePruner()
    elif pruning is None:
        return None
    else:
        CLASSICML_LOGGER.error('选择错误')
        raise AttributeError
Beispiel #28
0
def _parse(model_name, fp):
    """解析描述信息组, 核验文件.

    Arguments:
        model_name: str, 模型的名称.
        fp: h5py._hl.files.File, 文件指针.

    """
    description_gp = fp['description']

    file_cml_version = re.findall('\\d+\\.\\d+\\.\\d+', description_gp.attrs['version'])[0]
    file_backend_version = 'backend.io' + re.findall('\\d+\\.\\d+', description_gp.attrs['version'])[-1]

    if (file_cml_version < min_cml_version) or (file_backend_version < min__version__):
        CLASSICML_LOGGER.error('文件核验失败, 模型版本过低')
        raise ValueError('文件核验失败')
    if description_gp.attrs['model_name'] != model_name:
        CLASSICML_LOGGER.error('文件核验失败, 模型不匹配')
        raise ValueError('文件核验失败')
Beispiel #29
0
    def predict(self, x):
        """模型进行预测.

        Arguments:
            x: numpy.ndarray, array-like, 特征数据.

        Returns:
            LogisticRegression预测的概率.

        Raises:
            ValueError: 模型没有训练的错误.
        """
        if self.is_trained is False and self.is_loaded is False:
            CLASSICML_LOGGER.error('模型没有训练')
            raise ValueError('你必须先进行训练')

        y_pred, _ = self.optimizer.forward(x, self.beta)
        y_pred = np.squeeze(y_pred)

        return y_pred
def plot_logistic_regression(logistic_regression,
                             x,
                             y,
                             x_label=None,
                             y_label=None):
    """可视化逻辑回归二维示意图.

    Arguments:
        logistic_regression: classicML.models.LogisticRegression, 逻辑回归实例.
        x: numpy.ndarray, array-like, 特征数据.
        y: numpy.ndarray, array-like, 标签.
        x_label: str, default=None,
            横轴的标签.
        y_label: str, default=None,
            纵轴的标签.

    Raises:
        ValueError: 模型没有训练的错误.
    """
    if logistic_regression.is_trained is False and logistic_regression.is_loaded is False:
        CLASSICML_LOGGER.error('模型没有训练')
        raise ValueError('你必须先进行训练')

    x = np.asarray(x)
    y = np.asarray(y)
    if x.shape[1] != 2:
        CLASSICML_LOGGER.error('x的维度异常')
        raise ValueError('无法可视化')

    # 绘制样本点
    _plot_sample_scatter(x, y)

    # 绘制回归方程
    x_coord = np.linspace(0, 1)
    # 0 = x1 * beta[0] + x2 * beta[1] + beta[2]
    y_coord = -(logistic_regression.beta[0] * x_coord +
                logistic_regression.beta[2]) / logistic_regression.beta[1]
    plt.plot(x_coord, y_coord, c='orange', label='回归方程-logistic regression')

    _logistic_regression_plot_config(x_label, y_label)
    plt.show()