Beispiel #1
0
def _train(train_data: DataFrame, classifier: ClassifierMixin,
           clusterer: Clustering) -> dict:
    models = dict()

    train_data = clusterer.cluster_data(train_data)

    for cluster in range(clusterer.n_clusters):
        cluster_train_df = train_data[cluster]
        if not cluster_train_df.empty:
            cluster_targets_df = DataFrame(cluster_train_df['label'])
            try:
                classifier.fit(cluster_train_df.drop('label', 1),
                               cluster_targets_df.values.ravel())
            except (NotImplementedError, KeyError):
                classifier.partial_fit(
                    cluster_train_df.drop('label', 1).values,
                    cluster_targets_df.values.ravel())
            except Exception as exception:
                raise exception

            models[cluster] = classifier
            try:
                classifier = clone(classifier)
            except TypeError:
                classifier = clone(classifier, safe=False)
                classifier.reset()

    return {
        ModelType.CLUSTERER.value: clusterer,
        ModelType.CLASSIFIER.value: models
    }
Beispiel #2
0
def predict_proba(job: Job, data: DataFrame) -> Any:
    data = data.drop(['trace_id'], 1)
    clusterer = Clustering.load_model(job)
    data = clusterer.cluster_data(data)

    classifier = joblib.load(job.predictive_model.model_path)

    non_empty_clusters = clusterer.n_clusters

    result = None

    for cluster in range(clusterer.n_clusters):
        cluster_test_df = data[cluster]
        if cluster_test_df.empty:
            non_empty_clusters -= 1
        else:
            try:
                result = classifier[cluster].predict_proba(
                    cluster_test_df.drop(['label'], 1))
            except (NotImplementedError, KeyError):
                try:
                    result = classifier[cluster].predict_proba(
                        cluster_test_df.drop(['label'], 1).T)
                except (KeyError, ValueError):
                    result = classifier[cluster].predict_proba(
                        cluster_test_df.drop(['label'], 1).values)

    return result
Beispiel #3
0
def _update(job: Job, data: DataFrame) -> dict:
    previous_job = job.incremental_train

    clusterer = Clustering.load_model(previous_job)

    update_data = clusterer.cluster_data(data)

    models = joblib.load(previous_job.predictive_model.model_path)

    for cluster in range(clusterer.n_clusters):
        x = update_data[cluster]
        if not x.empty:
            y = x['label']

            try:
                models[cluster].partial_fit(x.drop('label', 1),
                                            y.values.ravel())
            except (NotImplementedError, KeyError):
                try:
                    models[cluster].partial_fit(
                        x.drop('label', 1).T, y.values.ravel())
                except KeyError:
                    models[cluster].partial_fit(
                        x.drop('label', 1).values, y.values.ravel())
            except Exception as exception:
                raise exception

    return {
        ModelType.CLUSTERER.value: clusterer,
        ModelType.CLASSIFIER.value: models
    }
def _update(job: Job, data: DataFrame, models) -> dict:
    clusterer = Clustering.load_model(job.clustering)

    update_data = clusterer.cluster_data(data)

    for cluster in range(clusterer.n_clusters):
        x = update_data[cluster]
        if not x.empty:
            y = x['label']

            models[cluster].partial_fit(x.drop('label', 1), y.values.ravel())

    return {'clusterer': clusterer, 'classifier': models}
Beispiel #5
0
def _train(train_data: DataFrame, time_series_predictor: Any, clusterer: Clustering) -> dict:
    models = dict()

    train_data = clusterer.cluster_data(train_data)

    for cluster in range(clusterer.n_clusters):

        cluster_train_df = train_data[cluster]
        if not cluster_train_df.empty:
            time_series_predictor.fit(cluster_train_df)

            models[cluster] = time_series_predictor
            time_series_predictor = clone(time_series_predictor, safe=False)
    return {ModelType.CLUSTERER.value: clusterer, ModelType.TIME_SERIES_PREDICTOR.value: models}
def _train(train_data: DataFrame, time_series_predictor: Any,
           clusterer: Clustering) -> dict:
    models = dict()

    train_data = clusterer.cluster_data(train_data)

    for cluster in range(clusterer.n_clusters):

        cluster_train_df = train_data[cluster]
        if not cluster_train_df.empty:
            time_series_predictor.fit(cluster_train_df)

            models[cluster] = time_series_predictor
            time_series_predictor = clone(time_series_predictor, safe=False)
    return {'clusterer': clusterer, 'time_series_predictor': models}
Beispiel #7
0
def predict(job: Job, data: DataFrame) -> Any:
    data = data.drop(['trace_id'], 1)
    clusterer = Clustering.load_model(job)
    test_data = clusterer.cluster_data(data)

    regressor = joblib.load(job.predictive_model.model_path)

    result = None

    for cluster in range(clusterer.n_clusters):
        cluster_test_df = test_data[cluster]
        if not cluster_test_df.empty:
            result = regressor[cluster].predict(
                cluster_test_df.drop('label', 1))

    return result
Beispiel #8
0
def _train(train_data: DataFrame,
           regressor: RegressorMixin,
           clusterer: Clustering,
           do_cv=False) -> dict:
    models = dict()

    train_data = clusterer.cluster_data(train_data)

    for cluster in range(clusterer.n_clusters):

        cluster_train_df = train_data[cluster]
        if not cluster_train_df.empty:
            cluster_targets_df = cluster_train_df['label']

            if do_cv:
                cross_validation_result = cross_validate(
                    regressor,
                    cluster_train_df.drop('label', 1),
                    cluster_targets_df.values.ravel(),
                    return_estimator=True,
                    cv=10  #TODO per Chiara check se vuoi 10 cv
                )

                validation_scores = cross_validation_result['test_score']
                regressors = cross_validation_result['estimator']
                regressor = regressors[dict(
                    zip(validation_scores, range(len(validation_scores)))
                )[max(
                    validation_scores
                )]]  #TODO per Chiara check se vuoi il max o min o quello che sta in mezzo
            else:
                regressor.fit(cluster_train_df.drop('label', 1),
                              cluster_targets_df.values.ravel())

            models[cluster] = regressor
            try:
                regressor = clone(regressor)
            except TypeError:
                regressor = clone(regressor, safe=False)

    return {
        ModelType.CLUSTERER.value: clusterer,
        ModelType.REGRESSOR.value: models
    }
Beispiel #9
0
def _train(train_data: DataFrame, regressor: RegressorMixin, clusterer: Clustering) -> dict:
    models = dict()

    train_data = clusterer.cluster_data(train_data)

    for cluster in range(clusterer.n_clusters):

        cluster_train_df = train_data[cluster]
        if not cluster_train_df.empty:
            cluster_targets_df = cluster_train_df['label']
            regressor.fit(cluster_train_df.drop('label', 1), cluster_targets_df.values.ravel())

            models[cluster] = regressor
            try:
                regressor = clone(regressor)
            except TypeError:
                regressor = clone(regressor, safe=False)

    return {'clusterer': clusterer, PredictiveModels.REGRESSION.value: models}
Beispiel #10
0
def _update(job: Job, data: DataFrame) -> dict:
    previous_job = job.incremental_train

    clusterer = Clustering.load_model(previous_job)

    update_data = clusterer.cluster_data(data)

    models = joblib.load(previous_job.predictive_model.model_path)
    if job.predictive_model.prediction_method in [
            ClassificationMethods.MULTINOMIAL_NAIVE_BAYES.value,
            ClassificationMethods.ADAPTIVE_TREE.value,
            ClassificationMethods.HOEFFDING_TREE.value,
            ClassificationMethods.SGDCLASSIFIER.value,
            ClassificationMethods.PERCEPTRON.value,
            ClassificationMethods.RANDOM_FOREST.value
    ]:  # TODO: workaround
        print('entered update')
        for cluster in range(clusterer.n_clusters):
            x = update_data[cluster]
            if not x.empty:
                y = x['label']
                try:
                    if previous_job.predictive_model.prediction_method == ClassificationMethods.RANDOM_FOREST.value:
                        models[cluster].fit(x.drop('label', 1),
                                            y.values.ravel())
                    else:
                        models[cluster].partial_fit(x.drop('label', 1),
                                                    y.values.ravel())
                except (NotImplementedError, KeyError):
                    if previous_job.predictive_model.prediction_method == ClassificationMethods.RANDOM_FOREST.value:
                        models[cluster].fit(
                            x.drop('label', 1).values, y.values.ravel())
                    else:
                        models[cluster].partial_fit(
                            x.drop('label', 1).values, y.values.ravel())
                except Exception as exception:
                    raise exception

    return {
        ModelType.CLUSTERER.value: clusterer,
        ModelType.CLASSIFIER.value: models
    }
Beispiel #11
0
def _update(job: Job, data: DataFrame) -> dict:
    previous_job = job.incremental_train

    clusterer = Clustering.load_model(previous_job)

    update_data = clusterer.cluster_data(data)

    models = joblib.load(previous_job.predictive_model.model_path)

    for cluster in range(clusterer.n_clusters):
        x = update_data[cluster]
        if not x.empty:
            y = x['label']

            models[cluster].partial_fit(x.drop('label', 1), y.values.ravel())

    return {
        ModelType.CLUSTERER.value: clusterer,
        ModelType.CLASSIFIER.value: models
    }
Beispiel #12
0
def _init_clusterer(clustering: Clustering, train_data: DataFrame):
    clusterer = Clustering(clustering)
    clusterer.fit(train_data.drop(['trace_id', 'label'], 1))
    return clusterer