コード例 #1
0
ファイル: ml_train.py プロジェクト: hborkows/INZ
def train_model(model: ClassifierMixin, data_time_range: List[str],
                output_path: str):
    es_host = ESConnection(es_host='http://localhost:9200')

    dataset = ml_utils.get_data(start_time=data_time_range[0],
                                end_time=data_time_range[1],
                                es_host=es_host)
    dataset.to_pickle('data/dataset.pkl')
    dataset = pd.read_pickle('data/dataset.pkl')
    print(len(dataset.columns))

    y = dataset['target']
    X = dataset.drop(columns=['target'])

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=17)
    print('Training model')
    model = model.fit(X_train, y_train)
    print('Finished training')
    prediction = model.predict(X_test)
    print(confusion_matrix(y_test, prediction))

    dump(model, output_path + '/' + type(model).__name__ + '.joblib')
コード例 #2
0
def get_score(dataset: np.array, answers: np.array, parametrs: int, model: base.ClassifierMixin, score_func)\
        -> (float, float, float, float, float):
    selecter = feature_selection.SelectKBest(score_func=score_func,
                                             k=parametrs)
    selecter.fit(dataset, answers)
    transformed_dataset = selecter.transform(dataset)
    x_train, x_test, y_train, y_test = model_selection.train_test_split(
        transformed_dataset, answers, random_state=0, stratify=answers)

    model.fit(x_train, y_train)
    prediction = model.predict(x_test)
    simple_score = metrics.f1_score(y_test, prediction, average='weighted')

    buffer_test = preprocessing.minmax_scale(transformed_dataset,
                                             feature_range=(0, 1),
                                             axis=0)
    nptraining = np.array(buffer_test, 'float32')
    nptarget = np.array(answers, 'float32')
    print('sample_score is done')
    k5_score = kfold_cv(5, nptraining, nptarget, model)
    print('k5_score is done')
    k10_score = kfold_cv(10, nptraining, nptarget, model)
    print('k10_score is done')
    k20_score = kfold_cv(20, nptraining, nptarget, model)
    print('k20_score is done')
    random_score = random_sampling_cv(nptraining, nptarget, model)
    return simple_score, k5_score, k10_score, k20_score, random_score
コード例 #3
0
    def __init__(self,
                 estimator=None,
                 max_depth=20,
                 min_samples_split=2,
                 min_samples_leaf=2,
                 min_weight_fraction_leaf=0.0,
                 fit_improve_algo='auto',
                 p1p2=0.09,
                 gamma=1.,
                 verbose=0):
        "constructor"
        ClassifierMixin.__init__(self)
        BaseEstimator.__init__(self)
        # logistic regression
        if estimator is None:
            self.estimator = LogisticRegression()
        else:
            self.estimator = estimator
        if max_depth is None:
            raise ValueError("'max_depth' cannot be None.")
        if max_depth > 1024:
            raise ValueError("'max_depth' must be <= 1024.")
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.min_weight_fraction_leaf = min_weight_fraction_leaf
        self.fit_improve_algo = fit_improve_algo
        self.p1p2 = p1p2
        self.gamma = gamma
        self.verbose = verbose

        if self.fit_improve_algo not in DecisionTreeLogisticRegression._fit_improve_algo_values:
            raise ValueError("fit_improve_algo='{}' not in {}".format(
                self.fit_improve_algo,
                DecisionTreeLogisticRegression._fit_improve_algo_values))
コード例 #4
0
 def __init__(self, estimator=None, clus=None, **kwargs):
     """
     @param  estimator   :epkg:`sklearn:linear_model:LogisiticRegression`
                         by default
     @param  clus        clustering applied on each class,
                         by default k-means with two classes
     @param  kwargs      sent to :meth:`set_params
                         <mlinsights.mlmodel.classification_kmeans.
                         ClassifierAfterKMeans.set_params>`,
                         see its documentation to understand how to
                         specify parameters
     """
     ClassifierMixin.__init__(self)
     BaseEstimator.__init__(self)
     if estimator is None:
         estimator = LogisticRegression()
     if clus is None:
         clus = KMeans(n_clusters=2)
     self.estimator = estimator
     self.clus = clus
     if not hasattr(clus, "transform"):
         raise AttributeError(  # pragma: no cover
             "clus does not have a transform method.")
     if kwargs:
         self.set_params(**kwargs)
 def __init__(self, estimator=None, threshold=0.75):
     ClassifierMixin.__init__(self)
     BaseEstimator.__init__(self)
     if estimator is None:
         estimator = LogisticRegression(solver='liblinear')
     self.estimator = estimator
     self.threshold = threshold
コード例 #6
0
    def __init__(self,
                 ixname='ix',
                 source_suffix='source',
                 target_suffix='target',
                 **kwargs):
        ClassifierMixin.__init__(self)
        self.ixname = ixname
        self.source_suffix = source_suffix
        self.target_suffix = target_suffix
        self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames(
            ixname=self.ixname,
            source_suffix=self.source_suffix,
            target_suffix=self.target_suffix)
        # clusters
        self.clusters = None

        # number of unique clusters
        self.n_clusters = None

        # clusters where no match has been found
        self.nomatch = None

        # clusters where all elements are positive matches
        self.allmatch = None

        # clusters where there is positive and negative values (matche and non-match)
        self.mixedmatch = None

        # Clusters not found (added in no matc)
        self.notfound = None

        self.fitted = False
        pass
コード例 #7
0
ファイル: pipesbsclf.py プロジェクト: ogierpaul/suricate
    def __init__(self,
                 transformer,
                 classifier,
                 ixname='ix',
                 source_suffix='source',
                 target_suffix='target',
                 **kwargs):
        """

        Args:
            transformer (TransformerMixin):
            classifier (ClassifierMixin):
            ixname (str):
            source_suffix (str):
            target_suffix (str):
            n_jobs (int):
            pruning_ths (float): return only the pairs which have a score greater than the store_ths
        """
        ClassifierMixin.__init__(self)
        self.ixname = ixname
        self.source_suffix = source_suffix
        self.target_suffix = target_suffix
        self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames(
            ixname=self.ixname,
            source_suffix=self.source_suffix,
            target_suffix=self.target_suffix)
        self.fitted = False
        self.transformer = transformer
        self.classifier = classifier
        pass
コード例 #8
0
    def __init__(self, binner=None, estimator=None, n_jobs=None,
                 random_state=None, verbose=False):
        """
        @param      binner              transformer or predictor which creates the buckets
        @param      estimator           predictor trained on every bucket
        @param      n_jobs              number of parallel jobs (for training and predicting)
        @param      random_state        to pick up random examples when buckets do not
                                        contain enough examples of each class
        @param      verbose             boolean or use ``'tqdm'`` to use :epkg:`tqdm`
                                        to fit the estimators

        *binner* allows the following values:

        - ``tree``: the model is :epkg:`sklearn:tree:DecisionTreeClassifier`
        - ``'bins'``: the model :epkg:`sklearn:preprocessing:KBinsDiscretizer`
        - any instanciated model

        *estimator* allows the following values:

        - ``None``: the model is :epkg:`sklearn:linear_model:LogisticRegression`
        - any instanciated model
        """
        if estimator is None:
            estimator = LogisticRegression()
        if binner in ('tree', None):
            binner = DecisionTreeClassifier(min_samples_leaf=5)
        ClassifierMixin.__init__(self)
        PiecewiseEstimator.__init__(
            self, binner=binner, estimator=estimator,
            n_jobs=n_jobs, verbose=verbose)
        self.random_state = random_state
コード例 #9
0
ファイル: partialclf.py プロジェクト: ogierpaul/suricate
 def __init__(self,
              classifier,
              ixname='ix',
              source_suffix='source',
              target_suffix='target',
              **kwargs):
     """
     This is a wrapper around a classifier that allows it to train on partial data
     where X and y do not have the same index, (because of pruning steps,...)
     It will train (fit) the classifier on the common index
     Args:
         classifier (ClassifierMixin): Classifier to use. Should be the output of the pipeline
         ixname (str):
         source_suffix (str):
         target_suffix (str):
     """
     ClassifierMixin.__init__(self)
     self.ixname = ixname
     self.source_suffix = source_suffix
     self.target_suffix = target_suffix
     self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames(
         ixname=self.ixname,
         source_suffix=self.source_suffix,
         target_suffix=self.target_suffix)
     self.fitted = False
     self.classifier = classifier
     pass
コード例 #10
0
ファイル: pruningpipe.py プロジェクト: ogierpaul/suricate
    def __init__(self,
                 connector,
                 pruningclf,
                 sbsmodel,
                 classifier,
                 ixname='ix',
                 source_suffix='source',
                 target_suffix='target',
                 **kwargs):
        """

        Args:
            connector (ConnectorMixin): Connector (Scorer) used to do the calculation,
            pruningclf (Explorer): Classifier used to do the pruning (0=no match, 1: potential match, 2: sure match)
            sbsmodel (TransformerMixin): Side-by-Side scorer, Can be FeatureUnion, Pipeline...
            classifier (ClassifierMixin): Classifier used to do the prediction
            ixname (str): 'ix'
            source_suffix (str): 'left'
            target_suffix (str): 'right'
        """
        ClassifierMixin.__init__(self)
        self.ixname = ixname
        self.source_suffix = source_suffix
        self.target_suffix = target_suffix
        self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames(
            ixname=self.ixname,
            source_suffix=self.source_suffix,
            target_suffix=self.target_suffix)
        self.fitted = False
        self.connector = connector
        self.pruningclf = pruningclf
        self.sbsmodel = sbsmodel
        self.classifier = classifier
        pass
コード例 #11
0
def get_preds_probas(est: ClassifierMixin, X_test: DataFrame, y_test: Series,
                     mapper_dict: Dict) -> DataFrame:
    """
    Get prediction probabilities (if available) or return true and predicted
    labels
    """
    df_preds = DataFrame(est.predict(X_test), index=X_test.index)
    if hasattr(est.named_steps["clf"], "predict_proba"):
        # Get prediction probabilities (if available)
        df_probas = DataFrame(est.predict_proba(X_test), index=X_test.index)

        # Append prediction and prediction probabilities
        df_summ = concat([df_preds, df_probas], axis=1)
        df_summ.columns = ["predicted_label"] + [
            f"probability_of_{i}" for i in range(0, len(np.unique(y_test)))
        ]

        # Get label (class) with maximum prediction probability for each row
        df_summ["max_class_number_manually"] = df_probas.idxmax(axis=1)
        df_summ["probability_of_max_class"] = df_probas.max(axis=1)

        # Compare .predict_proba() and manually extracted prediction
        # probability
        lhs = df_summ["max_class_number_manually"]
        rhs = df_summ["predicted_label"].replace(mapper_dict)
        assert (lhs == rhs).eq(True).all()
    else:
        df_summ = df_preds.copy()
    # Get true label
    df_summ.insert(0, "true_label", y_test)
    return df_summ
コード例 #12
0
def random_sampling_cv(dataset: np.ndarray, answers: np.ndarray,
                       model: base.ClassifierMixin) -> float:
    x_train, x_test, y_train, y_test = model_selection.train_test_split(
        dataset, answers, shuffle=True, stratify=answers)
    model.fit(x_train, y_train)
    prediction = model.predict(x_test)

    f1_score = metrics.f1_score(y_test, prediction, average='weighted')
    return f1_score
コード例 #13
0
ファイル: main.py プロジェクト: matbur/um
def get_score(
    model: ClassifierMixin,
    X_train: pd.DataFrame,
    y_train: pd.Series,
    X_test: pd.DataFrame,
    y_test: pd.Series,
) -> int:
    model.fit(X_train, y_train)
    score = model.score(X_test, y_test)
    return score
コード例 #14
0
ファイル: test_classification.py プロジェクト: simai-ml/MAPIE
def test_classifier_without_classes_attribute(
        estimator: ClassifierMixin) -> None:
    """
    Test that prefitted classifier without 'classes_ 'attribute raises error.
    """
    estimator.fit(X_toy, y_toy)
    if isinstance(estimator, Pipeline):
        delattr(estimator[-1], "classes_")
    else:
        delattr(estimator, "classes_")
    mapie = MapieClassifier(estimator=estimator, cv="prefit")
    with pytest.raises(AttributeError,
                       match=r".*does not contain 'classes_'.*"):
        mapie.fit(X_toy, y_toy)
コード例 #15
0
ファイル: classification.py プロジェクト: simai-ml/MAPIE
    def _predict_oof_model(
        self,
        estimator: ClassifierMixin,
        X: ArrayLike,
    ) -> NDArray:
        """
        Predict probabilities of a test set from a fitted estimator.

        Parameters
        ----------
        estimator : ClassifierMixin
            Fitted estimator.
        X : ArrayLike
            Test set.

        Returns
        -------
        ArrayLike
            Predicted probabilities.
        """
        y_pred_proba = estimator.predict_proba(X)
        # we enforce y_pred_proba to contain all labels included y
        if len(estimator.classes_) != self.n_classes_:
            y_pred_proba = self._fix_number_of_classes(
                estimator.classes_,
                y_pred_proba
            )
        return y_pred_proba
コード例 #16
0
ファイル: article_learner.py プロジェクト: bmassman/fake_news
def test_probabilities(model: ClassifierMixin,
                       X: np.array,
                       y: pd.Series,
                       bins: int = 10,
                       threshold: float = 0.5):
    """Print confusion matrix based on class probability."""
    probs = [p[1] for p in model.predict_proba(X)]
    print('\tProbabilities')
    df = pd.DataFrame({'prob': probs, 'label': y})
    step = 1 / bins
    cut_labels = [round(step * f, 1) for f in range(10)]
    by_prob = (df.groupby(pd.cut(df['prob'], bins,
                                 labels=cut_labels)).agg(['sum',
                                                          'count'])['label'])
    print('\t\tprobs\t1\t0\tacc')
    for index, row in by_prob.iloc[::-1].iterrows():
        ones = row['sum']
        if math.isnan(ones):
            ones = 0
        else:
            ones = int(ones)
        count = row['count']
        zeros = int(count) - ones
        if count > 0:
            acc = zeros / count if index < threshold else ones / count
        else:
            acc = 0.0
        print(f'\t\t{index}\t{ones}\t{zeros}\t{acc:.3f}')
コード例 #17
0
def ml_cross_val_score(
        classifier: ClassifierMixin,
        X: pd.DataFrame,
        y: pd.Series,
        cv_gen: BaseCrossValidator,
        sample_weight_train: np.ndarray = None,
        sample_weight_score: np.ndarray = None,
        scoring: Callable[[np.array, np.array], float] = log_loss):
    # pylint: disable=invalid-name
    # pylint: disable=comparison-with-callable
    """
    Advances in Financial Machine Learning, Snippet 7.4, page 110.

    Using the PurgedKFold Class.

    Function to run a cross-validation evaluation of the using sample weights and a custom CV generator.

    Note: This function is different to the book in that it requires the user to pass through a CV object. The book
    will accept a None value as a default and then resort to using PurgedCV, this also meant that extra arguments had to
    be passed to the function. To correct this we have removed the default and require the user to pass a CV object to
    the function.

    Example:

    .. code-block:: python

        cv_gen = PurgedKFold(n_splits=n_splits, samples_info_sets=samples_info_sets, pct_embargo=pct_embargo)
        scores_array = ml_cross_val_score(classifier, X, y, cv_gen, sample_weight_train=sample_train,
                                          sample_weight_score=sample_score, scoring=accuracy_score)

    :param classifier: (ClassifierMixin) A sk-learn Classifier object instance.
    :param X: (pd.DataFrame) The dataset of records to evaluate.
    :param y: (pd.Series) The labels corresponding to the X dataset.
    :param cv_gen: (BaseCrossValidator) Cross Validation generator object instance.
    :param sample_weight_train: (np.array) Sample weights used to train the model for each record in the dataset.
    :param sample_weight_score: (np.array) Sample weights used to evaluate the model quality.
    :param scoring: (Callable) A metric scoring, can be custom sklearn metric.
    :return: (np.array) The computed score.
    """

    # If no sample_weight then broadcast a value of 1 to all samples (full weight).
    if sample_weight_train is None:
        sample_weight_train = np.ones((X.shape[0],))

    if sample_weight_score is None:
        sample_weight_score = np.ones((X.shape[0],))

    # Score model on KFolds
    ret_scores = []
    for train, test in cv_gen.split(X=X, y=y):
        fit = classifier.fit(X=X.iloc[train, :], y=y.iloc[train], sample_weight=sample_weight_train[train])
        if scoring == log_loss:
            prob = fit.predict_proba(X.iloc[test, :])
            score = -1 * scoring(y.iloc[test], prob, sample_weight=sample_weight_score[test], labels=classifier.classes_)
        else:
            pred = fit.predict(X.iloc[test, :])
            score = scoring(y.iloc[test], pred, sample_weight=sample_weight_score[test])
        ret_scores.append(score)
    return np.array(ret_scores)
コード例 #18
0
ファイル: chapter_4.py プロジェクト: aliabbasjaffri/zenml
def sklearn_evaluator(
    X_test: np.ndarray,
    y_test: np.ndarray,
    model: ClassifierMixin,
) -> float:
    """Calculate accuracy score with classifier."""

    test_acc = model.score(X_test.reshape((X_test.shape[0], -1)), y_test)
    return test_acc
コード例 #19
0
def ml_cross_val_score(
        classifier: ClassifierMixin,
        X: pd.DataFrame,
        y: pd.Series,
        cv_gen: BaseCrossValidator,
        sample_weight: np.ndarray = None,
        scoring: str = 'neg_log_loss'):
    # pylint: disable=invalid-name
    """
    Snippet 7.4, page 110, Using the PurgedKFold Class.
    Function to run a cross-validation evaluation of the using sample weights and a custom CV generator.

    Note: This function is different to the book in that it requires the user to pass through a CV object. The book
    will accept a None value as a default and then resort to using PurgedCV, this also meant that extra arguments had to
    be passed to the function. To correct this we have removed the default and require the user to pass a CV object to
    the function.

    Example:

    .. code-block:: python

        cv_gen = PurgedKFold(n_splits=n_splits, samples_info_sets=samples_info_sets, pct_embargo=pct_embargo)
        scores_array = ml_cross_val_score(classifier, X, y, cv_gen, sample_weight=None, scoring='neg_log_loss')

    :param classifier: A sk-learn Classifier object instance.
    :param X: The dataset of records to evaluate.
    :param y: The labels corresponding to the X dataset.
    :param cv_gen: Cross Validation generator object instance.
    :param sample_weight: A numpy array of weights for each record in the dataset.
    :param scoring: A metric name to use for scoring; currently supports `neg_log_loss`, `accuracy`, `f1`, `precision`,
        `recall`, and `roc_auc`.
    :return: The computed score as a numpy array.
    """
    # Define scoring metrics
    scoring_func_dict = {'neg_log_loss': log_loss, 'accuracy': accuracy_score, 'f1': f1_score,
                         'precision': precision_score, 'recall': recall_score, 'roc_auc': roc_auc_score}
    try:
        scoring_func = scoring_func_dict[scoring]
    except KeyError:
        raise ValueError('Wrong scoring method. Select from: neg_log_loss, accuracy, f1, precision, recall, roc_auc')

    # If no sample_weight then broadcast a value of 1 to all samples (full weight).
    if sample_weight is None:
        sample_weight = np.ones((X.shape[0],))

    # Score model on KFolds
    ret_scores = []
    for train, test in cv_gen.split(X=X, y=y):
        fit = classifier.fit(X=X.iloc[train, :], y=y.iloc[train], sample_weight=sample_weight[train])
        if scoring == 'neg_log_loss':
            prob = fit.predict_proba(X.iloc[test, :])
            score = -1 * scoring_func(y.iloc[test], prob, sample_weight=sample_weight[test], labels=classifier.classes_)
        else:
            pred = fit.predict(X.iloc[test, :])
            score = scoring_func(y.iloc[test], pred, sample_weight=sample_weight[test])
        ret_scores.append(score)
    return np.array(ret_scores)
コード例 #20
0
def get_score(dataset: np.array, answers: np.array, parametrs: int, model: base.ClassifierMixin, score_func) \
        -> (int, int):
    selecter = feature_selection.SelectKBest(score_func=score_func,
                                             k=parametrs)
    selecter.fit(dataset, answers)
    transformed_dataset = selecter.transform(dataset)
    x_train, x_test, y_train, y_test = model_selection.train_test_split(
        transformed_dataset, answers, test_size=0.25, random_state=0)

    model.fit(x_train, y_train)
    prediction = model.predict(x_test)
    simple_score = metrics.f1_score(y_test, prediction, average='binary')

    buffer_test = preprocessing.minmax_scale(dataset,
                                             feature_range=(0, 1),
                                             axis=0)
    nptraining = np.array(buffer_test, 'float32')
    nptarget = np.array(answers, 'float32')
    k5_score = kfold_cv(5, nptraining, nptarget, model, True)
    return simple_score, k5_score
コード例 #21
0
def evaluate_on_datasets(predictor: ClassifierMixin, datasets):
    y_preds = []
    mean_kappa = []
    for i, (x, y_true) in enumerate(datasets):
        y_pred = predictor.predict(x)
        y_preds.append(y_pred)

        kappa_hold = cohen_kappa_score(y_true, y_pred, weights='quadratic')
        mean_kappa.append(kappa_hold)

    print(np.mean(mean_kappa), mean_kappa)
    return y_preds
コード例 #22
0
def _train(train_data: DataFrame, classifier: ClassifierMixin,
           clusterer: Clustering) -> dict:
    models = dict()

    train_data = clusterer.cluster_data(train_data)

    for cluster in range(clusterer.n_clusters):
        cluster_train_df = train_data[cluster]
        if not cluster_train_df.empty:
            cluster_targets_df = DataFrame(cluster_train_df['label'])
            try:
                classifier.fit(cluster_train_df.drop('label', 1),
                               cluster_targets_df.values.ravel())
            except (NotImplementedError, KeyError):
                classifier.partial_fit(
                    cluster_train_df.drop('label', 1).values,
                    cluster_targets_df.values.ravel())
            except Exception as exception:
                raise exception

            models[cluster] = classifier
            try:
                classifier = clone(classifier)
            except TypeError:
                classifier = clone(classifier, safe=False)
                classifier.reset()

    return {
        ModelType.CLUSTERER.value: clusterer,
        ModelType.CLASSIFIER.value: models
    }
コード例 #23
0
def cross_validation(dataset: np.ndarray, answers: np.ndarray,
                     model: base.ClassifierMixin,
                     cross_validator: model_selection.BaseCrossValidator,
                     save_worst_data: bool) -> float:
    iteration_counter: int = 0
    f1_score_value = 0
    worst_f1_score_value = 1.0
    worst_predicted = None
    worst_actual = None

    for train_index, test_index in cross_validator.split(dataset, answers):
        train_x, test_x = dataset[train_index], dataset[test_index]
        train_y, test_y = answers[train_index], answers[test_index]
        iteration_counter += 1

        # Train
        model.fit(train_x, train_y)

        # Test
        predicted = model.predict(test_x)

        # Evaluate
        f1_iteration_score_value = metrics.f1_score(test_y,
                                                    predicted,
                                                    average='weighted')
        if f1_iteration_score_value <= worst_f1_score_value:
            worst_f1_score_value = f1_iteration_score_value
            worst_predicted = predicted
            worst_actual = test_y

        f1_score_value += f1_iteration_score_value

    if save_worst_data:
        np.savetxt(RESULT_FILENAME + 'predicted.txt', worst_predicted)
        np.savetxt(RESULT_FILENAME + 'actual.txt', worst_actual)

    return f1_score_value / iteration_counter
コード例 #24
0
def train_and_save(classifier: ClassifierMixin,
                   dataset: str,
                   transforms: List[str],
                   bundled: bool,
                   test_proportion: int = 0.1) -> None:
    """
    Trains on the given dataset and saves model.
    :param classifier: The classifier to train.
    :param dataset: The dataset to train on.
    :param transforms: The transforms to apply to the data.
    :param bundled: Whether to bundle chart classes together.
    :param test_proportion: What percentage of the dataset to use for testing.
    :return: None.
    """
    if not make_data(dataset, transforms, bundled):
        raise FileNotFoundError
    images, labels = np.load(f"{dataset}/X.npy"), np.load(f"{dataset}/Y.npy")
    X_train, X_test, Y_train, Y_test = \
        train_test_split(images, labels, test_size=test_proportion)
    classifier.fit(X_train, Y_train)
    pred = classifier.predict(X_test)
    print(classification_report(Y_test, pred))
    print(pd.DataFrame(confusion_matrix(Y_test, pred)))
    joblib.dump(classifier, f"{dataset}/model.joblib")
コード例 #25
0
ファイル: shared.py プロジェクト: Jiaqi-beep/cs451-practicals
def bootstrap_accuracy(
    f: ClassifierMixin,
    X,  # numpy array
    y,  # numpy array
    num_samples: int = 100,
    random_state: int = random.randint(0, 2 ** 32 - 1),
) -> List[float]:
    """
    Take the classifier ``f``, and compute it's bootstrapped accuracy over the dataset ``X``,``y``.
    Generate ``num_samples`` samples; and seed the resampler with ``random_state``.
    """
    return bootstrap_measure(
        f,
        X,
        y,
        num_samples=num_samples,
        random_state=random_state,
        predict=lambda f, X: f.predict(X),
        measure=accuracy_score,
    )
コード例 #26
0
def bootstrap_accuracy(
        f: ClassifierMixin,
        X,  # numpy array
        y,  # numpy array
        num_samples: int = 100,
        random_state: int = random.randint(0, 2**32 - 1),
) -> List[float]:
    """
    Take the classifier ``f``, and compute it's bootstrapped accuracy over the dataset ``X``,``y``.
    Generate ``num_samples`` samples; and seed the resampler with ``random_state``.
    """
    dist: List[float] = []
    y_pred = f.predict(X)  # type:ignore (predict not on ClassifierMixin)
    # do the bootstrap:
    for trial in range(num_samples):
        sample_pred, sample_truth = resample(y_pred,
                                             y,
                                             random_state=trial +
                                             random_state)  # type:ignore
        score = accuracy_score(y_true=sample_truth,
                               y_pred=sample_pred)  # type:ignore
        dist.append(score)
    return dist
コード例 #27
0
ファイル: src.py プロジェクト: fcanay/MachineLearning
	def __init__(self):
		ClassifierMixin.__init__(self)
		self.clasificadores = [RandomForest(),Boosting(),Gradient(),SVM(),SVM2()]#	,Bagging()]
コード例 #28
0
 def __init__(self):
     ClassifierMixin.__init__(self)
     BaseEstimator.__init__(self)
 def score(self,
           X: np.ndarray,
           y: np.ndarray,
           sample_weight: Optional[np.ndarray] = None) -> float:
     X, y = self._validate_input(X, y)
     return ClassifierMixin.score(self, X, y, sample_weight)
コード例 #30
0
 def __init__(self, base_estimator):
     ClassifierMixin.__init__(self)
     BaseEstimator.__init__(self)
     self.base_estimator = base_estimator
コード例 #31
0
 def __init__(self, penalty="l1"):
     BaseEstimator.__init__(self)
     ClassifierMixin.__init__(self)
     self.penalty = penalty
     self.estimator = LogisticRegression(penalty=self.penalty,
                                         solver="liblinear")