Python ClassifierMixin 예제들, sklearn.base.ClassifierMixin Python 예제들

예제 #1

0

파일 보기

파일: ml_train.py 프로젝트: hborkows/INZ

def train_model(model: ClassifierMixin, data_time_range: List[str],
                output_path: str):
    es_host = ESConnection(es_host='http://localhost:9200')

    dataset = ml_utils.get_data(start_time=data_time_range[0],
                                end_time=data_time_range[1],
                                es_host=es_host)
    dataset.to_pickle('data/dataset.pkl')
    dataset = pd.read_pickle('data/dataset.pkl')
    print(len(dataset.columns))

    y = dataset['target']
    X = dataset.drop(columns=['target'])

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=17)
    print('Training model')
    model = model.fit(X_train, y_train)
    print('Finished training')
    prediction = model.predict(X_test)
    print(confusion_matrix(y_test, prediction))

    dump(model, output_path + '/' + type(model).__name__ + '.joblib')

예제 #2

0

파일 보기

def get_score(dataset: np.array, answers: np.array, parametrs: int, model: base.ClassifierMixin, score_func)\
        -> (float, float, float, float, float):
    selecter = feature_selection.SelectKBest(score_func=score_func,
                                             k=parametrs)
    selecter.fit(dataset, answers)
    transformed_dataset = selecter.transform(dataset)
    x_train, x_test, y_train, y_test = model_selection.train_test_split(
        transformed_dataset, answers, random_state=0, stratify=answers)

    model.fit(x_train, y_train)
    prediction = model.predict(x_test)
    simple_score = metrics.f1_score(y_test, prediction, average='weighted')

    buffer_test = preprocessing.minmax_scale(transformed_dataset,
                                             feature_range=(0, 1),
                                             axis=0)
    nptraining = np.array(buffer_test, 'float32')
    nptarget = np.array(answers, 'float32')
    print('sample_score is done')
    k5_score = kfold_cv(5, nptraining, nptarget, model)
    print('k5_score is done')
    k10_score = kfold_cv(10, nptraining, nptarget, model)
    print('k10_score is done')
    k20_score = kfold_cv(20, nptraining, nptarget, model)
    print('k20_score is done')
    random_score = random_sampling_cv(nptraining, nptarget, model)
    return simple_score, k5_score, k10_score, k20_score, random_score

예제 #3

0

파일 보기

파일: decision_tree_logreg.py 프로젝트: astrogilda/mlinsights

    def __init__(self,
                 estimator=None,
                 max_depth=20,
                 min_samples_split=2,
                 min_samples_leaf=2,
                 min_weight_fraction_leaf=0.0,
                 fit_improve_algo='auto',
                 p1p2=0.09,
                 gamma=1.,
                 verbose=0):
        "constructor"
        ClassifierMixin.__init__(self)
        BaseEstimator.__init__(self)
        # logistic regression
        if estimator is None:
            self.estimator = LogisticRegression()
        else:
            self.estimator = estimator
        if max_depth is None:
            raise ValueError("'max_depth' cannot be None.")
        if max_depth > 1024:
            raise ValueError("'max_depth' must be <= 1024.")
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.min_weight_fraction_leaf = min_weight_fraction_leaf
        self.fit_improve_algo = fit_improve_algo
        self.p1p2 = p1p2
        self.gamma = gamma
        self.verbose = verbose

        if self.fit_improve_algo not in DecisionTreeLogisticRegression._fit_improve_algo_values:
            raise ValueError("fit_improve_algo='{}' not in {}".format(
                self.fit_improve_algo,
                DecisionTreeLogisticRegression._fit_improve_algo_values))

예제 #4

0

파일 보기

파일: classification_kmeans.py 프로젝트: xadupre/mlinsights

 def __init__(self, estimator=None, clus=None, **kwargs):
     """
     @param  estimator   :epkg:`sklearn:linear_model:LogisiticRegression`
                         by default
     @param  clus        clustering applied on each class,
                         by default k-means with two classes
     @param  kwargs      sent to :meth:`set_params
                         <mlinsights.mlmodel.classification_kmeans.
                         ClassifierAfterKMeans.set_params>`,
                         see its documentation to understand how to
                         specify parameters
     """
     ClassifierMixin.__init__(self)
     BaseEstimator.__init__(self)
     if estimator is None:
         estimator = LogisticRegression()
     if clus is None:
         clus = KMeans(n_clusters=2)
     self.estimator = estimator
     self.clus = clus
     if not hasattr(clus, "transform"):
         raise AttributeError(  # pragma: no cover
             "clus does not have a transform method.")
     if kwargs:
         self.set_params(**kwargs)

예제 #5

0

파일 보기

파일: test_algebra_onnx_operators_sub_estimator.py 프로젝트: Alexsandruss/sklearn-onnx

 def __init__(self, estimator=None, threshold=0.75):
     ClassifierMixin.__init__(self)
     BaseEstimator.__init__(self)
     if estimator is None:
         estimator = LogisticRegression(solver='liblinear')
     self.estimator = estimator
     self.threshold = threshold

예제 #6

0

파일 보기

파일: clusterclassifier.py 프로젝트: ogierpaul/suricate

    def __init__(self,
                 ixname='ix',
                 source_suffix='source',
                 target_suffix='target',
                 **kwargs):
        ClassifierMixin.__init__(self)
        self.ixname = ixname
        self.source_suffix = source_suffix
        self.target_suffix = target_suffix
        self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames(
            ixname=self.ixname,
            source_suffix=self.source_suffix,
            target_suffix=self.target_suffix)
        # clusters
        self.clusters = None

        # number of unique clusters
        self.n_clusters = None

        # clusters where no match has been found
        self.nomatch = None

        # clusters where all elements are positive matches
        self.allmatch = None

        # clusters where there is positive and negative values (matche and non-match)
        self.mixedmatch = None

        # Clusters not found (added in no matc)
        self.notfound = None

        self.fitted = False
        pass

예제 #7

0

파일 보기

파일: pipesbsclf.py 프로젝트: ogierpaul/suricate

    def __init__(self,
                 transformer,
                 classifier,
                 ixname='ix',
                 source_suffix='source',
                 target_suffix='target',
                 **kwargs):
        """

        Args:
            transformer (TransformerMixin):
            classifier (ClassifierMixin):
            ixname (str):
            source_suffix (str):
            target_suffix (str):
            n_jobs (int):
            pruning_ths (float): return only the pairs which have a score greater than the store_ths
        """
        ClassifierMixin.__init__(self)
        self.ixname = ixname
        self.source_suffix = source_suffix
        self.target_suffix = target_suffix
        self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames(
            ixname=self.ixname,
            source_suffix=self.source_suffix,
            target_suffix=self.target_suffix)
        self.fitted = False
        self.transformer = transformer
        self.classifier = classifier
        pass

예제 #8

0

파일 보기

파일: piecewise_estimator.py 프로젝트: xadupre/mlinsights

    def __init__(self, binner=None, estimator=None, n_jobs=None,
                 random_state=None, verbose=False):
        """
        @param      binner              transformer or predictor which creates the buckets
        @param      estimator           predictor trained on every bucket
        @param      n_jobs              number of parallel jobs (for training and predicting)
        @param      random_state        to pick up random examples when buckets do not
                                        contain enough examples of each class
        @param      verbose             boolean or use ``'tqdm'`` to use :epkg:`tqdm`
                                        to fit the estimators

        *binner* allows the following values:

        - ``tree``: the model is :epkg:`sklearn:tree:DecisionTreeClassifier`
        - ``'bins'``: the model :epkg:`sklearn:preprocessing:KBinsDiscretizer`
        - any instanciated model

        *estimator* allows the following values:

        - ``None``: the model is :epkg:`sklearn:linear_model:LogisticRegression`
        - any instanciated model
        """
        if estimator is None:
            estimator = LogisticRegression()
        if binner in ('tree', None):
            binner = DecisionTreeClassifier(min_samples_leaf=5)
        ClassifierMixin.__init__(self)
        PiecewiseEstimator.__init__(
            self, binner=binner, estimator=estimator,
            n_jobs=n_jobs, verbose=verbose)
        self.random_state = random_state

예제 #9

0

파일 보기

파일: partialclf.py 프로젝트: ogierpaul/suricate

 def __init__(self,
              classifier,
              ixname='ix',
              source_suffix='source',
              target_suffix='target',
              **kwargs):
     """
     This is a wrapper around a classifier that allows it to train on partial data
     where X and y do not have the same index, (because of pruning steps,...)
     It will train (fit) the classifier on the common index
     Args:
         classifier (ClassifierMixin): Classifier to use. Should be the output of the pipeline
         ixname (str):
         source_suffix (str):
         target_suffix (str):
     """
     ClassifierMixin.__init__(self)
     self.ixname = ixname
     self.source_suffix = source_suffix
     self.target_suffix = target_suffix
     self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames(
         ixname=self.ixname,
         source_suffix=self.source_suffix,
         target_suffix=self.target_suffix)
     self.fitted = False
     self.classifier = classifier
     pass

예제 #10

0

파일 보기

파일: pruningpipe.py 프로젝트: ogierpaul/suricate

    def __init__(self,
                 connector,
                 pruningclf,
                 sbsmodel,
                 classifier,
                 ixname='ix',
                 source_suffix='source',
                 target_suffix='target',
                 **kwargs):
        """

        Args:
            connector (ConnectorMixin): Connector (Scorer) used to do the calculation,
            pruningclf (Explorer): Classifier used to do the pruning (0=no match, 1: potential match, 2: sure match)
            sbsmodel (TransformerMixin): Side-by-Side scorer, Can be FeatureUnion, Pipeline...
            classifier (ClassifierMixin): Classifier used to do the prediction
            ixname (str): 'ix'
            source_suffix (str): 'left'
            target_suffix (str): 'right'
        """
        ClassifierMixin.__init__(self)
        self.ixname = ixname
        self.source_suffix = source_suffix
        self.target_suffix = target_suffix
        self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames(
            ixname=self.ixname,
            source_suffix=self.source_suffix,
            target_suffix=self.target_suffix)
        self.fitted = False
        self.connector = connector
        self.pruningclf = pruningclf
        self.sbsmodel = sbsmodel
        self.classifier = classifier
        pass

예제 #11

0

파일 보기

파일: pipe_runner.py 프로젝트: edesz/chicago-crime-predictor

def get_preds_probas(est: ClassifierMixin, X_test: DataFrame, y_test: Series,
                     mapper_dict: Dict) -> DataFrame:
    """
    Get prediction probabilities (if available) or return true and predicted
    labels
    """
    df_preds = DataFrame(est.predict(X_test), index=X_test.index)
    if hasattr(est.named_steps["clf"], "predict_proba"):
        # Get prediction probabilities (if available)
        df_probas = DataFrame(est.predict_proba(X_test), index=X_test.index)

        # Append prediction and prediction probabilities
        df_summ = concat([df_preds, df_probas], axis=1)
        df_summ.columns = ["predicted_label"] + [
            f"probability_of_{i}" for i in range(0, len(np.unique(y_test)))
        ]

        # Get label (class) with maximum prediction probability for each row
        df_summ["max_class_number_manually"] = df_probas.idxmax(axis=1)
        df_summ["probability_of_max_class"] = df_probas.max(axis=1)

        # Compare .predict_proba() and manually extracted prediction
        # probability
        lhs = df_summ["max_class_number_manually"]
        rhs = df_summ["predicted_label"].replace(mapper_dict)
        assert (lhs == rhs).eq(True).all()
    else:
        df_summ = df_preds.copy()
    # Get true label
    df_summ.insert(0, "true_label", y_test)
    return df_summ

예제 #12

0

파일 보기

def random_sampling_cv(dataset: np.ndarray, answers: np.ndarray,
                       model: base.ClassifierMixin) -> float:
    x_train, x_test, y_train, y_test = model_selection.train_test_split(
        dataset, answers, shuffle=True, stratify=answers)
    model.fit(x_train, y_train)
    prediction = model.predict(x_test)

    f1_score = metrics.f1_score(y_test, prediction, average='weighted')
    return f1_score

예제 #13

0

파일 보기

파일: main.py 프로젝트: matbur/um

def get_score(
    model: ClassifierMixin,
    X_train: pd.DataFrame,
    y_train: pd.Series,
    X_test: pd.DataFrame,
    y_test: pd.Series,
) -> int:
    model.fit(X_train, y_train)
    score = model.score(X_test, y_test)
    return score

예제 #14

0

파일 보기

파일: test_classification.py 프로젝트: simai-ml/MAPIE

def test_classifier_without_classes_attribute(
        estimator: ClassifierMixin) -> None:
    """
    Test that prefitted classifier without 'classes_ 'attribute raises error.
    """
    estimator.fit(X_toy, y_toy)
    if isinstance(estimator, Pipeline):
        delattr(estimator[-1], "classes_")
    else:
        delattr(estimator, "classes_")
    mapie = MapieClassifier(estimator=estimator, cv="prefit")
    with pytest.raises(AttributeError,
                       match=r".*does not contain 'classes_'.*"):
        mapie.fit(X_toy, y_toy)

예제 #15

0

파일 보기

파일: classification.py 프로젝트: simai-ml/MAPIE

    def _predict_oof_model(
        self,
        estimator: ClassifierMixin,
        X: ArrayLike,
    ) -> NDArray:
        """
        Predict probabilities of a test set from a fitted estimator.

        Parameters
        ----------
        estimator : ClassifierMixin
            Fitted estimator.
        X : ArrayLike
            Test set.

        Returns
        -------
        ArrayLike
            Predicted probabilities.
        """
        y_pred_proba = estimator.predict_proba(X)
        # we enforce y_pred_proba to contain all labels included y
        if len(estimator.classes_) != self.n_classes_:
            y_pred_proba = self._fix_number_of_classes(
                estimator.classes_,
                y_pred_proba
            )
        return y_pred_proba

예제 #16

0

파일 보기

파일: article_learner.py 프로젝트: bmassman/fake_news

def test_probabilities(model: ClassifierMixin,
                       X: np.array,
                       y: pd.Series,
                       bins: int = 10,
                       threshold: float = 0.5):
    """Print confusion matrix based on class probability."""
    probs = [p[1] for p in model.predict_proba(X)]
    print('\tProbabilities')
    df = pd.DataFrame({'prob': probs, 'label': y})
    step = 1 / bins
    cut_labels = [round(step * f, 1) for f in range(10)]
    by_prob = (df.groupby(pd.cut(df['prob'], bins,
                                 labels=cut_labels)).agg(['sum',
                                                          'count'])['label'])
    print('\t\tprobs\t1\t0\tacc')
    for index, row in by_prob.iloc[::-1].iterrows():
        ones = row['sum']
        if math.isnan(ones):
            ones = 0
        else:
            ones = int(ones)
        count = row['count']
        zeros = int(count) - ones
        if count > 0:
            acc = zeros / count if index < threshold else ones / count
        else:
            acc = 0.0
        print(f'\t\t{index}\t{ones}\t{zeros}\t{acc:.3f}')

예제 #17

0

파일 보기

파일: cross_validation.py 프로젝트: idanre1/mlfinlab_code

def ml_cross_val_score(
        classifier: ClassifierMixin,
        X: pd.DataFrame,
        y: pd.Series,
        cv_gen: BaseCrossValidator,
        sample_weight_train: np.ndarray = None,
        sample_weight_score: np.ndarray = None,
        scoring: Callable[[np.array, np.array], float] = log_loss):
    # pylint: disable=invalid-name
    # pylint: disable=comparison-with-callable
    """
    Advances in Financial Machine Learning, Snippet 7.4, page 110.

    Using the PurgedKFold Class.

    Function to run a cross-validation evaluation of the using sample weights and a custom CV generator.

    Note: This function is different to the book in that it requires the user to pass through a CV object. The book
    will accept a None value as a default and then resort to using PurgedCV, this also meant that extra arguments had to
    be passed to the function. To correct this we have removed the default and require the user to pass a CV object to
    the function.

    Example:

    .. code-block:: python

        cv_gen = PurgedKFold(n_splits=n_splits, samples_info_sets=samples_info_sets, pct_embargo=pct_embargo)
        scores_array = ml_cross_val_score(classifier, X, y, cv_gen, sample_weight_train=sample_train,
                                          sample_weight_score=sample_score, scoring=accuracy_score)

    :param classifier: (ClassifierMixin) A sk-learn Classifier object instance.
    :param X: (pd.DataFrame) The dataset of records to evaluate.
    :param y: (pd.Series) The labels corresponding to the X dataset.
    :param cv_gen: (BaseCrossValidator) Cross Validation generator object instance.
    :param sample_weight_train: (np.array) Sample weights used to train the model for each record in the dataset.
    :param sample_weight_score: (np.array) Sample weights used to evaluate the model quality.
    :param scoring: (Callable) A metric scoring, can be custom sklearn metric.
    :return: (np.array) The computed score.
    """

    # If no sample_weight then broadcast a value of 1 to all samples (full weight).
    if sample_weight_train is None:
        sample_weight_train = np.ones((X.shape[0],))

    if sample_weight_score is None:
        sample_weight_score = np.ones((X.shape[0],))

    # Score model on KFolds
    ret_scores = []
    for train, test in cv_gen.split(X=X, y=y):
        fit = classifier.fit(X=X.iloc[train, :], y=y.iloc[train], sample_weight=sample_weight_train[train])
        if scoring == log_loss:
            prob = fit.predict_proba(X.iloc[test, :])
            score = -1 * scoring(y.iloc[test], prob, sample_weight=sample_weight_score[test], labels=classifier.classes_)
        else:
            pred = fit.predict(X.iloc[test, :])
            score = scoring(y.iloc[test], pred, sample_weight=sample_weight_score[test])
        ret_scores.append(score)
    return np.array(ret_scores)

예제 #18

0

파일 보기

파일: chapter_4.py 프로젝트: aliabbasjaffri/zenml

def sklearn_evaluator(
    X_test: np.ndarray,
    y_test: np.ndarray,
    model: ClassifierMixin,
) -> float:
    """Calculate accuracy score with classifier."""

    test_acc = model.score(X_test.reshape((X_test.shape[0], -1)), y_test)
    return test_acc

예제 #19

0

파일 보기

파일: cross_validation.py 프로젝트: zhaofinance/mlfinlab

def ml_cross_val_score(
        classifier: ClassifierMixin,
        X: pd.DataFrame,
        y: pd.Series,
        cv_gen: BaseCrossValidator,
        sample_weight: np.ndarray = None,
        scoring: str = 'neg_log_loss'):
    # pylint: disable=invalid-name
    """
    Snippet 7.4, page 110, Using the PurgedKFold Class.
    Function to run a cross-validation evaluation of the using sample weights and a custom CV generator.

    Note: This function is different to the book in that it requires the user to pass through a CV object. The book
    will accept a None value as a default and then resort to using PurgedCV, this also meant that extra arguments had to
    be passed to the function. To correct this we have removed the default and require the user to pass a CV object to
    the function.

    Example:

    .. code-block:: python

        cv_gen = PurgedKFold(n_splits=n_splits, samples_info_sets=samples_info_sets, pct_embargo=pct_embargo)
        scores_array = ml_cross_val_score(classifier, X, y, cv_gen, sample_weight=None, scoring='neg_log_loss')

    :param classifier: A sk-learn Classifier object instance.
    :param X: The dataset of records to evaluate.
    :param y: The labels corresponding to the X dataset.
    :param cv_gen: Cross Validation generator object instance.
    :param sample_weight: A numpy array of weights for each record in the dataset.
    :param scoring: A metric name to use for scoring; currently supports `neg_log_loss`, `accuracy`, `f1`, `precision`,
        `recall`, and `roc_auc`.
    :return: The computed score as a numpy array.
    """
    # Define scoring metrics
    scoring_func_dict = {'neg_log_loss': log_loss, 'accuracy': accuracy_score, 'f1': f1_score,
                         'precision': precision_score, 'recall': recall_score, 'roc_auc': roc_auc_score}
    try:
        scoring_func = scoring_func_dict[scoring]
    except KeyError:
        raise ValueError('Wrong scoring method. Select from: neg_log_loss, accuracy, f1, precision, recall, roc_auc')

    # If no sample_weight then broadcast a value of 1 to all samples (full weight).
    if sample_weight is None:
        sample_weight = np.ones((X.shape[0],))

    # Score model on KFolds
    ret_scores = []
    for train, test in cv_gen.split(X=X, y=y):
        fit = classifier.fit(X=X.iloc[train, :], y=y.iloc[train], sample_weight=sample_weight[train])
        if scoring == 'neg_log_loss':
            prob = fit.predict_proba(X.iloc[test, :])
            score = -1 * scoring_func(y.iloc[test], prob, sample_weight=sample_weight[test], labels=classifier.classes_)
        else:
            pred = fit.predict(X.iloc[test, :])
            score = scoring_func(y.iloc[test], pred, sample_weight=sample_weight[test])
        ret_scores.append(score)
    return np.array(ret_scores)

예제 #20

0

파일 보기

def get_score(dataset: np.array, answers: np.array, parametrs: int, model: base.ClassifierMixin, score_func) \
        -> (int, int):
    selecter = feature_selection.SelectKBest(score_func=score_func,
                                             k=parametrs)
    selecter.fit(dataset, answers)
    transformed_dataset = selecter.transform(dataset)
    x_train, x_test, y_train, y_test = model_selection.train_test_split(
        transformed_dataset, answers, test_size=0.25, random_state=0)

    model.fit(x_train, y_train)
    prediction = model.predict(x_test)
    simple_score = metrics.f1_score(y_test, prediction, average='binary')

    buffer_test = preprocessing.minmax_scale(dataset,
                                             feature_range=(0, 1),
                                             axis=0)
    nptraining = np.array(buffer_test, 'float32')
    nptarget = np.array(answers, 'float32')
    k5_score = kfold_cv(5, nptraining, nptarget, model, True)
    return simple_score, k5_score

예제 #21

0

파일 보기

def evaluate_on_datasets(predictor: ClassifierMixin, datasets):
    y_preds = []
    mean_kappa = []
    for i, (x, y_true) in enumerate(datasets):
        y_pred = predictor.predict(x)
        y_preds.append(y_pred)

        kappa_hold = cohen_kappa_score(y_true, y_pred, weights='quadratic')
        mean_kappa.append(kappa_hold)

    print(np.mean(mean_kappa), mean_kappa)
    return y_preds

예제 #22

0

파일 보기

def _train(train_data: DataFrame, classifier: ClassifierMixin,
           clusterer: Clustering) -> dict:
    models = dict()

    train_data = clusterer.cluster_data(train_data)

    for cluster in range(clusterer.n_clusters):
        cluster_train_df = train_data[cluster]
        if not cluster_train_df.empty:
            cluster_targets_df = DataFrame(cluster_train_df['label'])
            try:
                classifier.fit(cluster_train_df.drop('label', 1),
                               cluster_targets_df.values.ravel())
            except (NotImplementedError, KeyError):
                classifier.partial_fit(
                    cluster_train_df.drop('label', 1).values,
                    cluster_targets_df.values.ravel())
            except Exception as exception:
                raise exception

            models[cluster] = classifier
            try:
                classifier = clone(classifier)
            except TypeError:
                classifier = clone(classifier, safe=False)
                classifier.reset()

    return {
        ModelType.CLUSTERER.value: clusterer,
        ModelType.CLASSIFIER.value: models
    }

예제 #23

0

파일 보기

def cross_validation(dataset: np.ndarray, answers: np.ndarray,
                     model: base.ClassifierMixin,
                     cross_validator: model_selection.BaseCrossValidator,
                     save_worst_data: bool) -> float:
    iteration_counter: int = 0
    f1_score_value = 0
    worst_f1_score_value = 1.0
    worst_predicted = None
    worst_actual = None

    for train_index, test_index in cross_validator.split(dataset, answers):
        train_x, test_x = dataset[train_index], dataset[test_index]
        train_y, test_y = answers[train_index], answers[test_index]
        iteration_counter += 1

        # Train
        model.fit(train_x, train_y)

        # Test
        predicted = model.predict(test_x)

        # Evaluate
        f1_iteration_score_value = metrics.f1_score(test_y,
                                                    predicted,
                                                    average='weighted')
        if f1_iteration_score_value <= worst_f1_score_value:
            worst_f1_score_value = f1_iteration_score_value
            worst_predicted = predicted
            worst_actual = test_y

        f1_score_value += f1_iteration_score_value

    if save_worst_data:
        np.savetxt(RESULT_FILENAME + 'predicted.txt', worst_predicted)
        np.savetxt(RESULT_FILENAME + 'actual.txt', worst_actual)

    return f1_score_value / iteration_counter

예제 #24

0

파일 보기

파일: modelling.py 프로젝트: inkyubeytor/chart_classification

def train_and_save(classifier: ClassifierMixin,
                   dataset: str,
                   transforms: List[str],
                   bundled: bool,
                   test_proportion: int = 0.1) -> None:
    """
    Trains on the given dataset and saves model.
    :param classifier: The classifier to train.
    :param dataset: The dataset to train on.
    :param transforms: The transforms to apply to the data.
    :param bundled: Whether to bundle chart classes together.
    :param test_proportion: What percentage of the dataset to use for testing.
    :return: None.
    """
    if not make_data(dataset, transforms, bundled):
        raise FileNotFoundError
    images, labels = np.load(f"{dataset}/X.npy"), np.load(f"{dataset}/Y.npy")
    X_train, X_test, Y_train, Y_test = \
        train_test_split(images, labels, test_size=test_proportion)
    classifier.fit(X_train, Y_train)
    pred = classifier.predict(X_test)
    print(classification_report(Y_test, pred))
    print(pd.DataFrame(confusion_matrix(Y_test, pred)))
    joblib.dump(classifier, f"{dataset}/model.joblib")

예제 #25

0

파일 보기

파일: shared.py 프로젝트: Jiaqi-beep/cs451-practicals

def bootstrap_accuracy(
    f: ClassifierMixin,
    X,  # numpy array
    y,  # numpy array
    num_samples: int = 100,
    random_state: int = random.randint(0, 2 ** 32 - 1),
) -> List[float]:
    """
    Take the classifier ``f``, and compute it's bootstrapped accuracy over the dataset ``X``,``y``.
    Generate ``num_samples`` samples; and seed the resampler with ``random_state``.
    """
    return bootstrap_measure(
        f,
        X,
        y,
        num_samples=num_samples,
        random_state=random_state,
        predict=lambda f, X: f.predict(X),
        measure=accuracy_score,
    )

예제 #26

0

파일 보기

def bootstrap_accuracy(
        f: ClassifierMixin,
        X,  # numpy array
        y,  # numpy array
        num_samples: int = 100,
        random_state: int = random.randint(0, 2**32 - 1),
) -> List[float]:
    """
    Take the classifier ``f``, and compute it's bootstrapped accuracy over the dataset ``X``,``y``.
    Generate ``num_samples`` samples; and seed the resampler with ``random_state``.
    """
    dist: List[float] = []
    y_pred = f.predict(X)  # type:ignore (predict not on ClassifierMixin)
    # do the bootstrap:
    for trial in range(num_samples):
        sample_pred, sample_truth = resample(y_pred,
                                             y,
                                             random_state=trial +
                                             random_state)  # type:ignore
        score = accuracy_score(y_true=sample_truth,
                               y_pred=sample_pred)  # type:ignore
        dist.append(score)
    return dist

예제 #27

0

파일 보기

파일: src.py 프로젝트: fcanay/MachineLearning

	def __init__(self):
		ClassifierMixin.__init__(self)
		self.clasificadores = [RandomForest(),Boosting(),Gradient(),SVM(),SVM2()]#	,Bagging()]

예제 #28

0

파일 보기

 def __init__(self):
     ClassifierMixin.__init__(self)
     BaseEstimator.__init__(self)

예제 #29

0

파일 보기

파일: neural_network_classifier.py 프로젝트: manoelmarques/qiskit-machine-learning

 def score(self,
           X: np.ndarray,
           y: np.ndarray,
           sample_weight: Optional[np.ndarray] = None) -> float:
     X, y = self._validate_input(X, y)
     return ClassifierMixin.score(self, X, y, sample_weight)

예제 #30

0

파일 보기

파일: test_custom_embedded_any_models.py 프로젝트: sdpython/mlprodict

 def __init__(self, base_estimator):
     ClassifierMixin.__init__(self)
     BaseEstimator.__init__(self)
     self.base_estimator = base_estimator

예제 #31

0

파일 보기

 def __init__(self, penalty="l1"):
     BaseEstimator.__init__(self)
     ClassifierMixin.__init__(self)
     self.penalty = penalty
     self.estimator = LogisticRegression(penalty=self.penalty,
                                         solver="liblinear")