コード例 #1
0
ファイル: training.py プロジェクト: dongshiwen1998/classicML
def get_loss(loss):
    """获取使用的损失函数实例.

    Arguments:
        loss: str or classicML.losses.Loss 实例,
            损失函数.
    """
    if isinstance(loss, str):
        if loss in ('mse', 'mean_squared_error'):
            return losses.MeanSquaredError()
        elif loss == 'log_likelihood':
            return losses.LogLikelihood()
        elif loss == 'binary_crossentropy':
            return losses.BinaryCrossentropy()
        elif loss == 'categorical_crossentropy':
            return losses.CategoricalCrossentropy()
        elif loss == 'crossentropy':
            return losses.Crossentropy()
        else:
            CLASSICML_LOGGER.warn('你没有输入损失函数或者输入的损失函数不正确, 将使用默认的损失函数')
            return losses.Crossentropy()
    elif isinstance(loss, losses.Loss):
        return loss
    else:
        CLASSICML_LOGGER.warn('你没有输入损失函数或者输入的损失函数不正确, 将使用默认的损失函数')
        return losses.Crossentropy()
コード例 #2
0
ファイル: training.py プロジェクト: dongshiwen1998/classicML
def get_kernel(kernel, gamma):
    """获取使用的核函数实例.

    Arguments:
        kernel: str or classicML.kernels.Kernel 实例,
            核函数.
        gamma: float, 核函数系数.

    Raises:
        AttributeError: 模型编译的参数输入错误.
    """
    if isinstance(kernel, str):
        if kernel == 'linear':
            return kernels.Linear()
        elif kernel == 'rbf':
            return kernels.RBF(gamma=gamma)
        elif kernel == 'gaussian':
            return kernels.Gaussian(gamma=gamma)
        elif kernel == 'poly':
            return kernels.Polynomial(gamma=gamma)
        elif kernel == 'sigmoid':
            return kernels.Sigmoid(gamma=gamma)
        else:
            CLASSICML_LOGGER.error('核函数调用错误')
            raise AttributeError
    elif isinstance(kernel, kernels.Kernel):
        return kernel
    else:
        CLASSICML_LOGGER.error('核函数调用错误')
        raise AttributeError
コード例 #3
0
def plot_support_vector_classifier(svc, x, y, x_label=None, y_label=None):
    """可视化支持向量分类器二维示意图.

    Arguments:
        svc: classicML.models.SVC, 支持向量分类器实例.
        x: numpy.ndarray, array-like, 特征数据.
        y: numpy.ndarray, array-like, 标签.
        x_label: str, default=None,
            横轴的标签.
        y_label: str, default=None,
            纵轴的标签.

    Raises:
        ValueError: 模型没有训练的错误.
    """
    if svc.is_trained is False and svc.is_loaded is False:
        CLASSICML_LOGGER.error('模型没有训练')
        raise ValueError('你必须先进行训练')

    ax = plt.subplot()
    _set_svc_axis(ax)

    # 绘制样本点和支持向量
    _plot_sample_and_support_scatter(x, y, svc.support)

    # 绘制决策边界
    CS = _plot_decision_boundary(svc)

    _svc_plot_config(CS, svc.kernel.name, svc.C, x_label, y_label)
    plt.show()
コード例 #4
0
ファイル: training.py プロジェクト: dongshiwen1998/classicML
def get_initializer(initializer, seed):
    """获取使用的初始化器实例.

    Arguments:
        initializer: str or classicML.initializers.Initializer 实例,
            初始化器.
        seed: int, 初始化器的随机种子.

    Raises:
        AttributeError: 模型编译的参数输入错误.
    """
    if isinstance(initializer, str):
        if initializer == 'random_normal':
            return initializers.RandomNormal(seed=seed)
        elif initializer == 'he_normal':
            return initializers.HeNormal(seed=seed)
        elif initializer in ('xavier_normal', 'glorot_normal'):
            return initializers.XavierNormal(seed=seed)
        elif initializer == 'rbf_normal':
            return initializers.RBFNormal(seed=seed)
        else:
            CLASSICML_LOGGER.error('初始化器调用错误')
            raise AttributeError
    elif isinstance(initializer, initializers.Initializer):
        return initializer
    elif initializer is None:
        return initializers.RandomNormal(seed=seed)
    else:
        CLASSICML_LOGGER.error('初始化器调用错误')
        raise AttributeError
コード例 #5
0
    def save_weights(self, filepath):
        """将模型权重保存为一个HDF5文件.

        Arguments:
            filepath: str, 权重文件保存的路径.

        Raises:
            TypeError: 模型权重保存失败.

        Notes:
            模型将不会保存关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath,
            mode='w',
            model_name='RadialBasisFunctionNetwork')
        # 保存模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            compile_ds.attrs['hidden_units'] = self.hidden_units
            compile_ds.attrs['optimizer'] = self.optimizer.name
            compile_ds.attrs['loss'] = self.loss.name
            compile_ds.attrs['metric'] = self.metric.name
            for parameter in self.parameters:
                weights_ds.attrs[parameter] = self.parameters[parameter]
        except TypeError:
            CLASSICML_LOGGER.error('模型权重保存失败, 请检查文件是否损坏')
            raise TypeError('模型权重保存失败')
コード例 #6
0
    def save_weights(self, filepath):
        """将模型权重保存为一个HDF5文件.

        Arguments:
            filepath: str, 权重文件保存的路径.

        Raises:
            TypeError: 模型权重保存失败.

        Notes:
            模型将不会保存关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath,
            mode='w',
            model_name='AveragedOneDependentEstimator')
        # 保存模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            compile_ds.attrs['smoothing'] = self.smoothing
            compile_ds.attrs['m'] = self.m
            weights_ds.attrs['_attribute_list'] = np.void(
                dumps(self._attribute_list))
        except TypeError:
            CLASSICML_LOGGER.error('模型权重保存失败, 请检查文件是否损坏')
            raise TypeError('模型权重保存失败')
コード例 #7
0
    def save_weights(self, filepath):
        """将模型权重保存为一个HDF5文件.

        Arguments:
            filepath: str, 权重文件保存的路径.

        Raises:
            TypeError: 模型权重保存失败.

        Notes:
            模型将不会保存关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath, mode='w', model_name='SupportVectorClassifier')
        # 保存模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            compile_ds.attrs['C'] = self.C
            compile_ds.attrs['kernel'] = self.kernel.name
            compile_ds.attrs['gamma'] = self.gamma
            compile_ds.attrs['tol'] = self.tol

            weights_ds.attrs['support'] = self.support
            weights_ds.attrs['support_vector'] = self.support_vector
            weights_ds.attrs['support_alpha'] = self.support_alpha
            weights_ds.attrs['support_y'] = np.asarray(self.support_y,
                                                       dtype=np.float64)
            weights_ds.attrs['b'] = np.float64(self.b)
        except TypeError:
            CLASSICML_LOGGER.error('模型权重保存失败, 请检查文件是否损坏')
            raise TypeError('模型权重保存失败')
コード例 #8
0
ファイル: bayes.py プロジェクト: dongshiwen1998/classicML
def plot_bayes(bayes, x, y):
    """可视化朴素贝叶斯分类器或超父独依赖估计器的二维示意图.

    Arguments:
        bayes: classicML.models.NB or classicML.models.SPODE,
            朴素贝叶斯分类器或超父独依赖估计器实例.
        x: numpy.ndarray, array-like, 特征数据.
        y: numpy.ndarray, array-like, 标签.

    Raises:
        ValueError: 模型没有训练的错误.
    """
    if bayes.is_trained is False and bayes.is_loaded is False:
        CLASSICML_LOGGER.error('模型没有训练')
        raise ValueError('你必须先进行训练')

    ax = plt.subplot()
    _set_bayes_axis(ax)

    # 绘制背景分界图
    _plot_background(bayes, x)

    # 绘制样本点
    x_label, y_label = _plot_scatter(x, y)

    _bayes_plot_config(x_label, y_label)
    plt.show()
コード例 #9
0
    def predict(self, x):
        """使用决策树分类器进行预测.

        Arguments:
            x: numpy.ndarray or pandas.DataFrame, array-like,
                特征数据.

        Returns:
            DecisionTreeClassifier预测的结果.

        Raises:
            ValueError: 模型没有训练的错误.
        """
        if self.is_trained is False and self.is_loaded is False:
            CLASSICML_LOGGER.error('模型没有训练')
            raise ValueError('你必须先进行训练')

        # 修正数据类型.
        if isinstance(x, list):
            x = np.expand_dims(x, axis=0)
        elif isinstance(x, pd.DataFrame):
            x = x.values
        elif isinstance(x, pd.Series):
            x = np.expand_dims(x.values, axis=0)

        y_pred = list()
        for feature in x:
            y_pred.append(self._predict(feature, self.tree))

        return y_pred
コード例 #10
0
    def load_weights(self, filepath):
        """加载模型参数.

        Arguments:
            filepath: str, 权重文件加载的路径.

        Raises:
            KeyError: 模型权重加载失败.

        Notes:
            模型将不会加载关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath, mode='r', model_name='DecisionTreeClassifier')
        # 加载模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            self.criterion = compile_ds.attrs['criterion']
            if self.pruner is not None:
                self.pruner = get_pruner(compile_ds.attrs['pruning'])

            self.tree = loads(weights_ds.attrs['tree'].tobytes())

            # 标记加载完成
            self.is_loaded = True
        except KeyError:
            CLASSICML_LOGGER.error('模型权重加载失败, 请检查文件是否损坏')
            raise KeyError('模型权重加载失败')
コード例 #11
0
    def predict(self, x):
        """模型进行预测.

        Arguments:
            x: numpy.ndarray, array-like, 特征数据.

        Returns:
            LDA预测的标签张量.

        Raises:
            ValueError: 模型没有训练的错误.
        """
        if self.is_trained is False and self.is_loaded is False:
            CLASSICML_LOGGER.error('模型没有权重')
            raise ValueError('你必须先进行训练')

        coord = np.dot(x, self.w.T)

        center_0 = np.dot(self.w, self.mu_0.T)
        center_1 = np.dot(self.w, self.mu_1.T)

        y_pred = np.abs(coord - center_0) > np.abs(coord - center_1)
        y_pred = np.squeeze(y_pred)
        y_pred = y_pred.astype(int)

        return y_pred
コード例 #12
0
    def save_weights(self, filepath):
        """将模型权重保存为一个HDF5文件.

        Arguments:
            filepath: str, 权重文件保存的路径.

        Raises:
            TypeError: 模型权重保存失败.

        Notes:
            模型将不会保存关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath, mode='w', model_name='LogisticRegression')
        # 保存模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            compile_ds.attrs['optimizer'] = self.optimizer.name
            compile_ds.attrs['loss'] = self.loss.name
            compile_ds.attrs['metric'] = self.metric.name
            weights_ds.attrs['beta'] = self.beta
        except TypeError:
            CLASSICML_LOGGER.error('模型权重保存失败, 请检查文件是否损坏')
            raise TypeError('模型权重保存失败')
コード例 #13
0
    def load_weights(self, filepath):
        """加载模型参数.

        Arguments:
            filepath: str, 权重文件加载的路径.

        Raises:
            KeyError: 模型权重加载失败.

        Notes:
            模型将不会加载关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath, mode='r', model_name='SupportVectorClassifier')
        # 加载模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            self.C = compile_ds.attrs['C']
            self.gamma = compile_ds.attrs['gamma']
            self.kernel = get_kernel(compile_ds.attrs['kernel'], self.gamma)
            self.tol = compile_ds.attrs['tol']

            self.support = weights_ds.attrs['support']
            self.support_vector = weights_ds.attrs['support_vector']
            self.support_alpha = weights_ds.attrs['support_alpha']
            self.support_y = weights_ds.attrs['support_y']
            self.b = weights_ds.attrs['b']
            # 标记加载完成
            self.is_loaded = True
        except KeyError:
            CLASSICML_LOGGER.error('模型权重加载失败, 请检查文件是否损坏')
            raise KeyError('模型权重加载失败')
コード例 #14
0
    def wrapper(*args, **kwargs):
        start_time = time.perf_counter()  # 注意将记录time.sleep()的时间
        return_values = function(*args, **kwargs)
        end_time = time.perf_counter()
        CLASSICML_LOGGER.info('耗时 {:.5f} s'.format(end_time - start_time))

        return return_values
コード例 #15
0
ファイル: training.py プロジェクト: dongshiwen1998/classicML
def get_optimizer(optimizer):
    """获取使用的优化器实例.

    Arguments:
        optimizer: str or classicML.optimizers.Optimizer 实例,
            优化器.

    Raises:
        AttributeError: 模型编译的参数输入错误.
    """
    if isinstance(optimizer, str):
        if optimizer in ('gd', 'gradient_descent'):
            return optimizers.GradientDescent()
        elif optimizer in ('newton', 'newton_method'):
            return optimizers.NewtonMethod()
        elif optimizer in ('sgd', 'stochastic_gradient_descent'):
            return optimizers.StochasticGradientDescent()
        elif optimizer == 'adam':
            return optimizers.Adam()
        elif optimizer == 'rbf':
            return optimizers.RadialBasisFunctionOptimizer()
        elif optimizer == 'SMO':
            return optimizers.SequentialMinimalOptimization()
        else:
            CLASSICML_LOGGER.error('优化器调用错误')
            raise AttributeError
    elif isinstance(optimizer, optimizers.Optimizer):
        return optimizer
    else:
        CLASSICML_LOGGER.error('优化器调用错误')
        raise AttributeError
コード例 #16
0
    def predict(self, x, **kwargs):
        """使用平均独依赖估计器进行预测.

        Arguments:
            x: numpy.ndarray or pandas.DataFrame, array-like, 特征数据.

        Returns:
            AverageOneDependentEstimator的预测结果.

        Raises:
            ValueError: 模型没有训练的错误.
        """
        if self.is_trained is False and self.is_loaded is False:
            CLASSICML_LOGGER.error('模型没有训练')
            raise ValueError('你必须先进行训练')

        y_pred = list()
        if len(x.shape) == 1:
            y_pred.append(self._predict(x))
        else:
            for i in range(x.shape[0]):
                x_test = x.iloc[i, :]
                y_pred.append(self._predict(x_test))

        return y_pred
コード例 #17
0
    def load_weights(self, filepath):
        """加载模型参数.

        Arguments:
            filepath: str, 权重文件加载的路径.

        Raises:
            KeyError: 模型权重加载失败.

        Notes:
            模型将不会加载关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath,
            mode='r',
            model_name='AveragedOneDependentEstimator')
        # 加载模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            self.smoothing = compile_ds.attrs['smoothing']
            self.m = compile_ds.attrs['m']
            self._attribute_list = loads(
                weights_ds.attrs['_attribute_list'].tobytes())

            # 标记加载完成
            self.is_loaded = True
        except KeyError:
            CLASSICML_LOGGER.error('模型权重加载失败, 请检查文件是否损坏')
            raise KeyError('模型权重加载失败')
コード例 #18
0
    def load_weights(self, filepath):
        """加载模型参数.

        Arguments:
            filepath: str, 权重文件加载的路径.

        Raises:
            KeyError: 模型权重加载失败.

        Notes:
            模型将不会加载关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath,
            mode='r',
            model_name='RadialBasisFunctionNetwork')
        # 加载模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            self.hidden_units = compile_ds.attrs['hidden_units']
            self.optimizer = get_optimizer(compile_ds.attrs['optimizer'])
            self.loss = get_loss(compile_ds.attrs['loss'])
            self.metric = get_metric(compile_ds.attrs['metric'])
            for attr in weights_ds.attrs:
                self.parameters[attr] = weights_ds.attrs[attr]
            # 标记加载完成
            self.is_loaded = True
        except KeyError:
            CLASSICML_LOGGER.error('模型权重加载失败, 请检查文件是否损坏')
            raise KeyError('模型权重加载失败')
コード例 #19
0
    def save_weights(self, filepath):
        """将模型权重保存为一个HDF5文件.

        Arguments:
            filepath: str, 权重文件保存的路径.

        Raises:
            TypeError: 模型权重保存失败.

        Notes:
            模型将不会保存关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath, mode='w', model_name='NaiveBayesClassifier')
        # 保存模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            compile_ds.attrs['smoothing'] = self.smoothing
            weights_ds.attrs['p_0'] = self.p_0
            weights_ds.attrs['p_1'] = self.p_1
            weights_ds.attrs['pi_0'] = np.void(dumps(self.pi_0))
            weights_ds.attrs['pi_1'] = np.void(dumps(self.pi_1))
        except TypeError:
            CLASSICML_LOGGER.error('模型权重保存失败, 请检查文件是否损坏')
            raise TypeError('模型权重保存失败')
コード例 #20
0
    def load_weights(self, filepath):
        """加载模型参数.

        Arguments:
            filepath: str, 权重文件加载的路径.

        Raises:
            KeyError: 模型权重加载失败.

        Notes:
            模型将不会加载关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath, mode='r', model_name='NaiveBayesClassifier')
        # 加载模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            self.smoothing = compile_ds.attrs['smoothing']
            self.p_0 = weights_ds.attrs['p_0']
            self.p_1 = weights_ds.attrs['p_1']
            self.pi_0 = loads(weights_ds.attrs['pi_0'].tobytes())
            self.pi_1 = loads(weights_ds.attrs['pi_1'].tobytes())

            # 标记加载完成
            self.is_loaded = True
        except KeyError:
            CLASSICML_LOGGER.error('模型权重加载失败, 请检查文件是否损坏')
            raise KeyError('模型权重加载失败')
コード例 #21
0
    def predict(self, x, probability=False):
        """使用朴素贝叶斯分类器进行预测.

        Arguments:
            x: numpy.ndarray or pandas.DataFrame, array-like, 特征数据.
            probability: bool, default=False,
                是否使用归一化的概率形式.

        Returns:
            NaiveBayesClassifier的预测结果,
            不使用概率形式将返回0或1的标签数组, 使用将返回反正例概率的数组.

        Raises:
            ValueError: 模型没有训练的错误.
        """
        if self.is_trained is False and self.is_loaded is False:
            CLASSICML_LOGGER.error('模型没有训练')
            raise ValueError('你必须先进行训练')

        # 避免下溢进行对数处理.
        p_0 = np.log(self.p_0)
        p_1 = np.log(self.p_1)

        y_pred = list()
        if len(x.shape) == 1:
            y_pred.append(self._predict(x, p_0, p_1, probability))
        else:
            for i in range(x.shape[0]):
                x_test = x.iloc[i, :]
                y_pred.append(self._predict(x_test, p_0, p_1, probability))

        return y_pred
コード例 #22
0
    def save_weights(self, filepath):
        """将模型权重保存为一个HDF5文件.

        Arguments:
            filepath: str, 权重文件保存的路径.

        Raises:
            TypeError: 模型权重保存失败.

        References:
            - [如何存储原始的二进制数据](https://docs.h5py.org/en/2.3/strings.html)

        Notes:
            模型将不会保存关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath, mode='w', model_name='DecisionTreeClassifier')
        # 保存模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            compile_ds.attrs['criterion'] = self.criterion
            if self.pruner is not None:
                compile_ds.attrs['pruning'] = self.pruner.name
            weights_ds.attrs['tree'] = np.void(dumps(self.tree))
        except TypeError:
            CLASSICML_LOGGER.error('模型权重保存失败, 请检查文件是否损坏')
            raise TypeError('模型权重保存失败')
コード例 #23
0
    def predict(self, x):
        """使用分类器进行预测.

        Arguments:
            x: numpy.ndarray, array-like, 特征数据.

        Returns:
            SupportVectorClassifier预测的张量数组.

        Raises:
            ValueError: 模型没有训练的错误.
        """
        if self.is_trained is False and self.is_loaded is False:
            CLASSICML_LOGGER.error('模型没有训练')
            raise ValueError('你必须先进行训练')

        number_of_sample = x.shape[0]
        y_pred = np.ones((number_of_sample, ), dtype=int)

        for sample in range(number_of_sample):
            kappa = self.kernel(x[sample], self.support_vector)
            fx = np.matmul(
                (self.support_alpha.reshape(-1, 1) * self.support_y).T,
                kappa.T) + self.b
            if fx < 0:
                y_pred[sample] = -1

        return y_pred
コード例 #24
0
    def load_weights(self, filepath):
        """加载模型参数.

        Arguments:
            filepath: str, 权重文件加载的路径.

        Raises:
            KeyError: 模型权重加载失败.

        Notes:
            模型将不会加载关于优化器的超参数.
        """
        # 初始化权重文件.
        parameters_gp = io.initialize_weights_file(
            filepath=filepath, mode='r', model_name='LogisticRegression')
        # 加载模型参数.
        try:
            compile_ds = parameters_gp['compile']
            weights_ds = parameters_gp['weights']

            self.optimizer = get_optimizer(compile_ds.attrs['optimizer'])
            self.loss = get_loss(compile_ds.attrs['loss'])
            self.metric = get_metric(compile_ds.attrs['metric'])
            self.beta = weights_ds.attrs['beta']
            # 标记加载完成
            self.is_loaded = True
        except KeyError:
            CLASSICML_LOGGER.error('模型权重加载失败, 请检查文件是否损坏')
            raise KeyError('模型权重加载失败')
コード例 #25
0
    def fit(self, x, y, x_validation=None, y_validation=None):
        """训练决策树分类器.

        Arguments:
            x: numpy.ndarray or pandas.DataFrame, array-like,
                特征数据.
            y: numpy.ndarray or pandas.DataFrame, array-like,
                标签.
            x_validation: numpy.ndarray or pandas.DataFrame, array-like,
                剪枝使用的验证特征数据.
            y_validation: numpy.ndarray or pandas.DataFrame, array-like,
                剪枝使用的验证标签.

        Returns:
            DecisionTreeClassifier实例.

        Raises:
            AttributeError: 没有验证集.
        """
        if isinstance(x, np.ndarray) and self.attribute_name is None:
            CLASSICML_LOGGER.warn(
                "属性名称缺失, 请使用pandas.DataFrame; 或检查 self.attributes_name")
        if (self.pruner is not None) and (x_validation is None
                                          or y_validation is None):
            CLASSICML_LOGGER.error("没有验证集, 无法对决策树进行剪枝")
            raise AttributeError('没有验证集')

        # 为特征数据添加属性信息.
        x = pd.DataFrame(x, columns=self.attribute_name)
        x.reset_index(drop=True, inplace=True)
        self.generator._x = x

        y = pd.Series(y)
        y.reset_index(drop=True, inplace=True)

        # 为验证数据添加属性信息.
        if x_validation is not None:
            x_validation = pd.DataFrame(x_validation,
                                        columns=self.attribute_name)
            x_validation.reset_index(drop=True, inplace=True)

            y_validation = pd.Series(y_validation)
            y_validation.reset_index(drop=True, inplace=True)

        # 没有使用权重文件, 则生成决策树分类器.
        if self.is_loaded is False:
            self.tree = self.generator(x, y)

        # 进行剪枝.
        if self.pruner:
            self.tree = self.pruner(x, y, x_validation, y_validation,
                                    self.tree)

        # 标记训练完成
        self.is_trained = True

        return self
コード例 #26
0
    def wrapper(*args, **kwargs):
        return_values = function(*args, **kwargs)

        pid = os.getpid()
        current_process = psutil.Process(pid)
        process_memory = current_process.memory_full_info()
        CLASSICML_LOGGER.info('占用内存 {:.5f} MB'.format(process_memory.uss /
                                                      1024 / 1024))
        return return_values
コード例 #27
0
    def fit(self, x, y):
        """训练平均独依赖估计器.

        Arguments:
            x: numpy.ndarray or pandas.DataFrame, array-like, 特征数据.
            y: numpy.ndarray or pandas.DataFrame, array-like, 标签.

        Returns:
            AverageOneDependentEstimator实例.
        """
        if isinstance(x, np.ndarray) and self.attribute_name is None:
            CLASSICML_LOGGER.warn(
                "属性名称缺失, 请使用pandas.DataFrame; 或检查 self.attributes_name")

        # TODO(Steve R. Sun, tag:code): 暂时没有找到合理的断点续训的理论支持.
        self._attribute_list = list()

        # 为特征数据添加属性信息.
        x = pd.DataFrame(x, columns=self.attribute_name)
        x.reset_index(drop=True, inplace=True)
        y = pd.Series(y)
        y.reset_index(drop=True, inplace=True)

        number_of_samples, number_of_attributes = x.shape

        # 获取离散属性的全部取值.
        discrete_unique_values = dict()
        for attribute in range(number_of_attributes):
            xi = x.iloc[:, attribute]
            if (type_of_target(xi.values) != 'continuous') and (
                    pd.value_counts(xi).values > self.m).all():
                discrete_unique_values.update(
                    {x.columns[attribute]: xi.unique()})

        # 每个属性作为超父类构建SPODE.
        for index, key in enumerate(discrete_unique_values.keys()):
            self.super_parent_name = key
            super(AveragedOneDependentEstimator, self).fit(x, y)
            current_attribute_list = self._list_of_p_c

            self._attribute_list.append(current_attribute_list)

        self.is_trained = True

        return self
コード例 #28
0
ファイル: io.py プロジェクト: dongshiwen1998/classicML
def _parse(model_name, fp):
    """解析描述信息组, 核验文件.

    Arguments:
        model_name: str, 模型的名称.
        fp: h5py._hl.files.File, 文件指针.

    """
    description_gp = fp['description']

    file_cml_version = re.findall('\\d+\\.\\d+\\.\\d+', description_gp.attrs['version'])[0]
    file_backend_version = 'backend.io' + re.findall('\\d+\\.\\d+', description_gp.attrs['version'])[-1]

    if (file_cml_version < min_cml_version) or (file_backend_version < min__version__):
        CLASSICML_LOGGER.error('文件核验失败, 模型版本过低')
        raise ValueError('文件核验失败')
    if description_gp.attrs['model_name'] != model_name:
        CLASSICML_LOGGER.error('文件核验失败, 模型不匹配')
        raise ValueError('文件核验失败')
コード例 #29
0
    def predict(self, x):
        """使用径向基函数网络进行预测.

        Arguments:
            x: numpy.ndarray, array-like, 特征数据.

        Returns:
            径向基函数网络预测的概率.

        Raises:
            ValueError: 模型没有训练的错误.
        """
        if self.is_trained is False and self.is_loaded is False:
            CLASSICML_LOGGER.error('模型没有训练')
            raise ValueError('你必须先进行训练')

        y_pred, _ = self.optimizer.forward(x, self.parameters)

        return y_pred
コード例 #30
0
    def predict(self, x, probability=False):
        """使用超父独依赖估计器进行预测.

        Arguments:
            x: numpy.ndarray or pandas.DataFrame, array-like, 特征数据.
            probability: bool, default=False,
                是否使用归一化的概率形式.

        Returns:
            SuperParentOneDependentEstimator的预测结果,
            不使用概率形式将返回0或1的标签数组, 使用将返回反正例概率的数组.

        Raises:
            ValueError: 模型没有训练的错误.
        """
        if self.is_trained is False and self.is_loaded is False:
            CLASSICML_LOGGER.error('模型没有训练')
            raise ValueError('你必须先进行训练')

        y_pred = list()

        if len(x.shape) == 1:
            p_0, p_1 = self._predict(x)
            if probability:
                y_pred.append([p_0 / (p_0 + p_1), p_1 / (p_0 + p_1)])
            else:
                if p_0 > p_1:
                    y_pred.append(0)
                else:
                    y_pred.append(1)
        else:
            for i in range(x.shape[0]):
                x_test = x.iloc[i, :]
                p_0, p_1 = self._predict(x_test)
                if probability:
                    y_pred.append([p_0 / (p_0 + p_1), p_1 / (p_0 + p_1)])
                else:
                    if p_0 > p_1:
                        y_pred.append(0)
                    else:
                        y_pred.append(1)

        return y_pred