Exemple #1
0
def _update(job: Job, data: DataFrame) -> dict:
    previous_job = job.incremental_train

    clusterer = Clustering.load_model(previous_job)

    update_data = clusterer.cluster_data(data)

    models = joblib.load(previous_job.predictive_model.model_path)

    for cluster in range(clusterer.n_clusters):
        x = update_data[cluster]
        if not x.empty:
            y = x['label']

            try:
                models[cluster].partial_fit(x.drop('label', 1),
                                            y.values.ravel())
            except (NotImplementedError, KeyError):
                try:
                    models[cluster].partial_fit(
                        x.drop('label', 1).T, y.values.ravel())
                except KeyError:
                    models[cluster].partial_fit(
                        x.drop('label', 1).values, y.values.ravel())
            except Exception as exception:
                raise exception

    return {
        ModelType.CLUSTERER.value: clusterer,
        ModelType.CLASSIFIER.value: models
    }
Exemple #2
0
def predict_proba(job: Job, data: DataFrame) -> Any:
    data = data.drop(['trace_id'], 1)
    clusterer = Clustering.load_model(job)
    data = clusterer.cluster_data(data)

    classifier = joblib.load(job.predictive_model.model_path)

    non_empty_clusters = clusterer.n_clusters

    result = None

    for cluster in range(clusterer.n_clusters):
        cluster_test_df = data[cluster]
        if cluster_test_df.empty:
            non_empty_clusters -= 1
        else:
            try:
                result = classifier[cluster].predict_proba(
                    cluster_test_df.drop(['label'], 1))
            except (NotImplementedError, KeyError):
                try:
                    result = classifier[cluster].predict_proba(
                        cluster_test_df.drop(['label'], 1).T)
                except (KeyError, ValueError):
                    result = classifier[cluster].predict_proba(
                        cluster_test_df.drop(['label'], 1).values)

    return result
def _update(job: Job, data: DataFrame, models) -> dict:
    clusterer = Clustering.load_model(job.clustering)

    update_data = clusterer.cluster_data(data)

    for cluster in range(clusterer.n_clusters):
        x = update_data[cluster]
        if not x.empty:
            y = x['label']

            models[cluster].partial_fit(x.drop('label', 1), y.values.ravel())

    return {'clusterer': clusterer, 'classifier': models}
Exemple #4
0
def predict(job: Job, data: DataFrame) -> Any:
    data = data.drop(['trace_id'], 1)
    clusterer = Clustering.load_model(job)
    test_data = clusterer.cluster_data(data)

    regressor = joblib.load(job.predictive_model.model_path)

    result = None

    for cluster in range(clusterer.n_clusters):
        cluster_test_df = test_data[cluster]
        if not cluster_test_df.empty:
            result = regressor[cluster].predict(
                cluster_test_df.drop('label', 1))

    return result
Exemple #5
0
def _update(job: Job, data: DataFrame) -> dict:
    previous_job = job.incremental_train

    clusterer = Clustering.load_model(previous_job)

    update_data = clusterer.cluster_data(data)

    models = joblib.load(previous_job.predictive_model.model_path)
    if job.predictive_model.prediction_method in [
            ClassificationMethods.MULTINOMIAL_NAIVE_BAYES.value,
            ClassificationMethods.ADAPTIVE_TREE.value,
            ClassificationMethods.HOEFFDING_TREE.value,
            ClassificationMethods.SGDCLASSIFIER.value,
            ClassificationMethods.PERCEPTRON.value,
            ClassificationMethods.RANDOM_FOREST.value
    ]:  # TODO: workaround
        print('entered update')
        for cluster in range(clusterer.n_clusters):
            x = update_data[cluster]
            if not x.empty:
                y = x['label']
                try:
                    if previous_job.predictive_model.prediction_method == ClassificationMethods.RANDOM_FOREST.value:
                        models[cluster].fit(x.drop('label', 1),
                                            y.values.ravel())
                    else:
                        models[cluster].partial_fit(x.drop('label', 1),
                                                    y.values.ravel())
                except (NotImplementedError, KeyError):
                    if previous_job.predictive_model.prediction_method == ClassificationMethods.RANDOM_FOREST.value:
                        models[cluster].fit(
                            x.drop('label', 1).values, y.values.ravel())
                    else:
                        models[cluster].partial_fit(
                            x.drop('label', 1).values, y.values.ravel())
                except Exception as exception:
                    raise exception

    return {
        ModelType.CLUSTERER.value: clusterer,
        ModelType.CLASSIFIER.value: models
    }
Exemple #6
0
def _update(job: Job, data: DataFrame) -> dict:
    previous_job = job.incremental_train

    clusterer = Clustering.load_model(previous_job)

    update_data = clusterer.cluster_data(data)

    models = joblib.load(previous_job.predictive_model.model_path)

    for cluster in range(clusterer.n_clusters):
        x = update_data[cluster]
        if not x.empty:
            y = x['label']

            models[cluster].partial_fit(x.drop('label', 1), y.values.ravel())

    return {
        ModelType.CLUSTERER.value: clusterer,
        ModelType.CLASSIFIER.value: models
    }