예제 #1
0
def load_binary_clf_tracks() -> typing.List[Track]:
    """Return binary classification tracks."""

    return [
        Track(
            name="Phishing",
            dataset=datasets.Phishing(),
            metric=metrics.Accuracy() + metrics.F1(),
        ),
        Track(
            name="Bananas",
            dataset=datasets.Bananas(),
            metric=metrics.Accuracy() + metrics.F1(),
        ),
    ]
예제 #2
0
 def __init__(
     self,
     n_models: int = 10,
     max_features: typing.Union[bool, str, int] = "sqrt",
     lambda_value: int = 6,
     metric: metrics.MultiClassMetric = metrics.Accuracy(),
     disable_weighted_vote=False,
     drift_detector: typing.Union[base.DriftDetector,
                                  None] = ADWIN(delta=0.001),
     warning_detector: typing.Union[base.DriftDetector,
                                    None] = ADWIN(delta=0.01),
     # Tree parameters
     grace_period: int = 50,
     max_depth: int = None,
     split_criterion: str = "info_gain",
     split_confidence: float = 0.01,
     tie_threshold: float = 0.05,
     leaf_prediction: str = "nba",
     nb_threshold: int = 0,
     nominal_attributes: list = None,
     splitter: Splitter = None,
     binary_split: bool = False,
     max_size: int = 32,
     memory_estimate_period: int = 2_000_000,
     stop_mem_management: bool = False,
예제 #3
0
 def __init__(self):
     optimizer = optim.SGD(0.1)
     self.model = compose.Pipeline(
         preprocessing.StandardScaler(),
         linear_model.LogisticRegression(optimizer))
     self.metric = metrics.Accuracy()
     self.count = 0
예제 #4
0
def test_compose():

    metrics.MAE() + metrics.MSE()
    metrics.Accuracy() + metrics.LogLoss()

    with pytest.raises(ValueError):
        _ = metrics.MSE() + metrics.LogLoss()

    with pytest.raises(ValueError):
        _ = metrics.MSE() + metrics.MAE() + metrics.LogLoss()
예제 #5
0
    def __init__(
        self,
        n_models: int = 10,
        max_features: typing.Union[bool, str, int] = "sqrt",
        lambda_value: int = 6,
        metric: metrics.MultiClassMetric = metrics.Accuracy(),
        disable_weighted_vote=False,
        drift_detector: typing.Union[base.DriftDetector,
                                     None] = ADWIN(delta=0.001),
        warning_detector: typing.Union[base.DriftDetector,
                                       None] = ADWIN(delta=0.01),
        # Tree parameters
        grace_period: int = 50,
        max_depth: int = None,
        split_criterion: str = "info_gain",
        split_confidence: float = 0.01,
        tie_threshold: float = 0.05,
        leaf_prediction: str = "nba",
        nb_threshold: int = 0,
        nominal_attributes: list = None,
        attr_obs: str = "gaussian",
        attr_obs_params: dict = None,
        max_size: int = 32,
        memory_estimate_period: int = 2000000,
        seed: int = None,
        **kwargs,
    ):
        super().__init__(
            n_models=n_models,
            max_features=max_features,
            lambda_value=lambda_value,
            metric=metric,
            disable_weighted_vote=disable_weighted_vote,
            drift_detector=drift_detector,
            warning_detector=warning_detector,
            seed=seed,
        )

        self._n_samples_seen = 0
        self._base_member_class = ForestMemberClassifier

        # Tree parameters
        self.grace_period = grace_period
        self.max_depth = max_depth
        self.split_criterion = split_criterion
        self.split_confidence = split_confidence
        self.tie_threshold = tie_threshold
        self.leaf_prediction = leaf_prediction
        self.nb_threshold = nb_threshold
        self.nominal_attributes = nominal_attributes
        self.attr_obs = attr_obs
        self.attr_obs_params = attr_obs_params
        self.max_size = max_size
        self.memory_estimate_period = memory_estimate_period
        self.kwargs = kwargs
예제 #6
0
    def __init__(self):
        """Create a persistent model file if there isn't one. If one exists, use it."""

        self.file_path = 'models/decision_tree.joblib'
        self.include_hunger = False
        self.accuracy_metric_float = 0.0
        self.metrics = metrics.Accuracy()

        if path.exists(self.file_path):
            self.model = load(self.file_path)
        else:
            self.model = tree.HoeffdingTreeClassifier(grace_period=20)
            self.save_model()
예제 #7
0
    def __init__(self, cm: "metrics.ConfusionMatrix" = None):

        self.cm = metrics.ConfusionMatrix() if cm is None else cm
        self.accuracy = metrics.Accuracy(cm=self.cm)
        self.kappa = metrics.CohenKappa(cm=self.cm)
        self.kappa_m = metrics.KappaM(cm=self.cm)
        self.kappa_t = metrics.KappaT(cm=self.cm)
        self.recall = metrics.Recall(cm=self.cm)
        self.micro_recall = metrics.MicroRecall(cm=self.cm)
        self.macro_recall = metrics.MacroRecall(cm=self.cm)
        self.precision = metrics.Precision(cm=self.cm)
        self.micro_precision = metrics.MicroPrecision(cm=self.cm)
        self.macro_precision = metrics.MacroPrecision(cm=self.cm)
        self.f1 = metrics.F1(cm=self.cm)
        self.micro_f1 = metrics.MicroF1(cm=self.cm)
        self.macro_f1 = metrics.MacroF1(cm=self.cm)
        self.geometric_mean = metrics.GeometricMean(cm=self.cm)
예제 #8
0
 def __init__(self, step, name):
     self.name = name
     self.optimizer = SynchronousSGD(0.01, name, None)
     self.model = compose.Pipeline(
         preprocessing.StandardScaler(),
         linear_model.LogisticRegression(self.optimizer))
     self.metrics = [
         metrics.Accuracy(),
         metrics.MAE(),
         metrics.RMSE(),
         metrics.Precision(),
         metrics.Recall()
     ]
     self.count = 0
     if step is None:
         self.step = 50
     else:
         self.step = int(step)
예제 #9
0
  def __init__(self):
    """Create a persistent model file if there isn't one. If one exists, use it."""

    self.file_path = 'models/decision_tree.joblib'
    self.include_hunger = False
    self.accuracy_metric_float = 0.0
    self.accuracy = metrics.Accuracy()


    if path.exists(self.file_path):
      self.model = load(self.file_path)
    else:
      # self.model = compat.convert_sklearn_to_river(
      #     estimator=MLPClassifier(random_state=1, max_iter=300),
      #     classes=[0, 1]
      # )

      self.model = compat.convert_sklearn_to_river(
          estimator=BNB(binarize=.1),
          classes=[0, 1]
      )

      self.save_model()
예제 #10
0
from river import preprocessing
from river import evaluate
from river import metrics
from river import datasets
from river import tree
from river import compose
from river import optim

X_y = synth.PredictionInfluenceStream(stream=[
    synth.RandomRBF(seed_model=42,
                    seed_sample=42,
                    n_classes=2,
                    n_features=4,
                    n_centroids=20),
    synth.RandomRBF(seed_model=41,
                    seed_sample=49,
                    n_classes=2,
                    n_features=4,
                    n_centroids=20)
])

model = preprocessing.StandardScaler()
model |= tree.HoeffdingAdaptiveTreeClassifier(grace_period=100,
                                              split_confidence=1e-5,
                                              leaf_prediction='nb',
                                              nb_threshold=10,
                                              seed=0)

metric = metrics.Accuracy()

evaluate.evaluate_influential(X_y, model, metric, print_every=100)
예제 #11
0
            y_pred = [
                np.random.dirichlet(np.ones(3)).tolist() for _ in range(n)
            ]
        yield y_true, y_pred, sample_weights

    if isinstance(metric, base.RegressionMetric):
        yield ([random.random() for _ in range(n)],
               [random.random() for _ in range(n)], sample_weights)


def partial(f, **kwargs):
    return functools.update_wrapper(functools.partial(f, **kwargs), f)


TEST_CASES = [
    (metrics.Accuracy(), sk_metrics.accuracy_score),
    (metrics.Precision(), sk_metrics.precision_score),
    (metrics.MacroPrecision(),
     partial(sk_metrics.precision_score, average='macro')),
    (metrics.MicroPrecision(),
     partial(sk_metrics.precision_score, average='micro')),
    (metrics.WeightedPrecision(),
     partial(sk_metrics.precision_score, average='weighted')),
    (metrics.Recall(), sk_metrics.recall_score),
    (metrics.MacroRecall(), partial(sk_metrics.recall_score, average='macro')),
    (metrics.MicroRecall(), partial(sk_metrics.recall_score, average='micro')),
    (metrics.WeightedRecall(),
     partial(sk_metrics.recall_score, average='weighted')),
    (metrics.FBeta(beta=.5), partial(sk_metrics.fbeta_score, beta=.5)),
    (metrics.MacroFBeta(beta=.5),
     partial(sk_metrics.fbeta_score, beta=.5, average='macro')),
                results["Memory"] = [model._memory_usage] * model.population_size
            results["Name"] = []
            results["Model Performance"] = []
            for idx, me in enumerate(population_metrics):
                results["Name"].append(f'Individual {idx}')
                results["Model Performance"].append(me.get())
                #results["Model Performance"].append(model.population_metrics[idx].get())

            yield results
            next_checkpoint = next(checkpoints, None)

def evo_rbf_accuracy_50_001_track(n_samples=10_000, seed=42):
    n_centroids = 50
    change_speed= .001
    dataset = synth.RandomRBFDrift(seed_model=7, seed_sample=seed,n_classes=5,n_features=50, n_centroids=n_centroids, change_speed=change_speed).take(n_samples)
    track = EvoTrack("RBF(50,0.001)", dataset, metrics.Accuracy(), n_samples)
    return track

def evo_rbf_accuracy_10_0001_track(n_samples=10_000, seed=42):
    n_centroids = 10
    change_speed= .0001
    dataset = synth.RandomRBFDrift(seed_model=7, seed_sample=seed,n_classes=5,n_features=50, n_centroids=n_centroids, change_speed=change_speed).take(n_samples)
    track = EvoTrack("RBF(10,0.0001)", dataset, metrics.Accuracy(), n_samples)
    return track

def evo_rbf_accuracy_10_001_track(n_samples=10_000, seed=42):
    n_centroids = 10
    change_speed= .001
    dataset = synth.RandomRBFDrift(seed_model=7, seed_sample=seed,n_classes=5,n_features=50, n_centroids=n_centroids, change_speed=change_speed).take(n_samples)
    track = EvoTrack("RBF(10,0.001)", dataset, metrics.Accuracy(), n_samples)
    return track
예제 #13
0
    def __init__(self):
        self.accuracy_metric_float = 0.0
        self.accuracy = metrics.Accuracy()

        self.include_hunger = True