def _train(train_data: DataFrame, classifier: ClassifierMixin, clusterer: Clustering) -> dict: models = dict() train_data = clusterer.cluster_data(train_data) for cluster in range(clusterer.n_clusters): cluster_train_df = train_data[cluster] if not cluster_train_df.empty: cluster_targets_df = DataFrame(cluster_train_df['label']) try: classifier.fit(cluster_train_df.drop('label', 1), cluster_targets_df.values.ravel()) except (NotImplementedError, KeyError): classifier.partial_fit( cluster_train_df.drop('label', 1).values, cluster_targets_df.values.ravel()) except Exception as exception: raise exception models[cluster] = classifier try: classifier = clone(classifier) except TypeError: classifier = clone(classifier, safe=False) classifier.reset() return { ModelType.CLUSTERER.value: clusterer, ModelType.CLASSIFIER.value: models }
def predict_proba(job: Job, data: DataFrame) -> Any: data = data.drop(['trace_id'], 1) clusterer = Clustering.load_model(job) data = clusterer.cluster_data(data) classifier = joblib.load(job.predictive_model.model_path) non_empty_clusters = clusterer.n_clusters result = None for cluster in range(clusterer.n_clusters): cluster_test_df = data[cluster] if cluster_test_df.empty: non_empty_clusters -= 1 else: try: result = classifier[cluster].predict_proba( cluster_test_df.drop(['label'], 1)) except (NotImplementedError, KeyError): try: result = classifier[cluster].predict_proba( cluster_test_df.drop(['label'], 1).T) except (KeyError, ValueError): result = classifier[cluster].predict_proba( cluster_test_df.drop(['label'], 1).values) return result
def _update(job: Job, data: DataFrame) -> dict: previous_job = job.incremental_train clusterer = Clustering.load_model(previous_job) update_data = clusterer.cluster_data(data) models = joblib.load(previous_job.predictive_model.model_path) for cluster in range(clusterer.n_clusters): x = update_data[cluster] if not x.empty: y = x['label'] try: models[cluster].partial_fit(x.drop('label', 1), y.values.ravel()) except (NotImplementedError, KeyError): try: models[cluster].partial_fit( x.drop('label', 1).T, y.values.ravel()) except KeyError: models[cluster].partial_fit( x.drop('label', 1).values, y.values.ravel()) except Exception as exception: raise exception return { ModelType.CLUSTERER.value: clusterer, ModelType.CLASSIFIER.value: models }
def _update(job: Job, data: DataFrame, models) -> dict: clusterer = Clustering.load_model(job.clustering) update_data = clusterer.cluster_data(data) for cluster in range(clusterer.n_clusters): x = update_data[cluster] if not x.empty: y = x['label'] models[cluster].partial_fit(x.drop('label', 1), y.values.ravel()) return {'clusterer': clusterer, 'classifier': models}
def _train(train_data: DataFrame, time_series_predictor: Any, clusterer: Clustering) -> dict: models = dict() train_data = clusterer.cluster_data(train_data) for cluster in range(clusterer.n_clusters): cluster_train_df = train_data[cluster] if not cluster_train_df.empty: time_series_predictor.fit(cluster_train_df) models[cluster] = time_series_predictor time_series_predictor = clone(time_series_predictor, safe=False) return {ModelType.CLUSTERER.value: clusterer, ModelType.TIME_SERIES_PREDICTOR.value: models}
def _train(train_data: DataFrame, time_series_predictor: Any, clusterer: Clustering) -> dict: models = dict() train_data = clusterer.cluster_data(train_data) for cluster in range(clusterer.n_clusters): cluster_train_df = train_data[cluster] if not cluster_train_df.empty: time_series_predictor.fit(cluster_train_df) models[cluster] = time_series_predictor time_series_predictor = clone(time_series_predictor, safe=False) return {'clusterer': clusterer, 'time_series_predictor': models}
def predict(job: Job, data: DataFrame) -> Any: data = data.drop(['trace_id'], 1) clusterer = Clustering.load_model(job) test_data = clusterer.cluster_data(data) regressor = joblib.load(job.predictive_model.model_path) result = None for cluster in range(clusterer.n_clusters): cluster_test_df = test_data[cluster] if not cluster_test_df.empty: result = regressor[cluster].predict( cluster_test_df.drop('label', 1)) return result
def _train(train_data: DataFrame, regressor: RegressorMixin, clusterer: Clustering, do_cv=False) -> dict: models = dict() train_data = clusterer.cluster_data(train_data) for cluster in range(clusterer.n_clusters): cluster_train_df = train_data[cluster] if not cluster_train_df.empty: cluster_targets_df = cluster_train_df['label'] if do_cv: cross_validation_result = cross_validate( regressor, cluster_train_df.drop('label', 1), cluster_targets_df.values.ravel(), return_estimator=True, cv=10 #TODO per Chiara check se vuoi 10 cv ) validation_scores = cross_validation_result['test_score'] regressors = cross_validation_result['estimator'] regressor = regressors[dict( zip(validation_scores, range(len(validation_scores))) )[max( validation_scores )]] #TODO per Chiara check se vuoi il max o min o quello che sta in mezzo else: regressor.fit(cluster_train_df.drop('label', 1), cluster_targets_df.values.ravel()) models[cluster] = regressor try: regressor = clone(regressor) except TypeError: regressor = clone(regressor, safe=False) return { ModelType.CLUSTERER.value: clusterer, ModelType.REGRESSOR.value: models }
def _train(train_data: DataFrame, regressor: RegressorMixin, clusterer: Clustering) -> dict: models = dict() train_data = clusterer.cluster_data(train_data) for cluster in range(clusterer.n_clusters): cluster_train_df = train_data[cluster] if not cluster_train_df.empty: cluster_targets_df = cluster_train_df['label'] regressor.fit(cluster_train_df.drop('label', 1), cluster_targets_df.values.ravel()) models[cluster] = regressor try: regressor = clone(regressor) except TypeError: regressor = clone(regressor, safe=False) return {'clusterer': clusterer, PredictiveModels.REGRESSION.value: models}
def _update(job: Job, data: DataFrame) -> dict: previous_job = job.incremental_train clusterer = Clustering.load_model(previous_job) update_data = clusterer.cluster_data(data) models = joblib.load(previous_job.predictive_model.model_path) if job.predictive_model.prediction_method in [ ClassificationMethods.MULTINOMIAL_NAIVE_BAYES.value, ClassificationMethods.ADAPTIVE_TREE.value, ClassificationMethods.HOEFFDING_TREE.value, ClassificationMethods.SGDCLASSIFIER.value, ClassificationMethods.PERCEPTRON.value, ClassificationMethods.RANDOM_FOREST.value ]: # TODO: workaround print('entered update') for cluster in range(clusterer.n_clusters): x = update_data[cluster] if not x.empty: y = x['label'] try: if previous_job.predictive_model.prediction_method == ClassificationMethods.RANDOM_FOREST.value: models[cluster].fit(x.drop('label', 1), y.values.ravel()) else: models[cluster].partial_fit(x.drop('label', 1), y.values.ravel()) except (NotImplementedError, KeyError): if previous_job.predictive_model.prediction_method == ClassificationMethods.RANDOM_FOREST.value: models[cluster].fit( x.drop('label', 1).values, y.values.ravel()) else: models[cluster].partial_fit( x.drop('label', 1).values, y.values.ravel()) except Exception as exception: raise exception return { ModelType.CLUSTERER.value: clusterer, ModelType.CLASSIFIER.value: models }
def _update(job: Job, data: DataFrame) -> dict: previous_job = job.incremental_train clusterer = Clustering.load_model(previous_job) update_data = clusterer.cluster_data(data) models = joblib.load(previous_job.predictive_model.model_path) for cluster in range(clusterer.n_clusters): x = update_data[cluster] if not x.empty: y = x['label'] models[cluster].partial_fit(x.drop('label', 1), y.values.ravel()) return { ModelType.CLUSTERER.value: clusterer, ModelType.CLASSIFIER.value: models }
def _init_clusterer(clustering: Clustering, train_data: DataFrame): clusterer = Clustering(clustering) clusterer.fit(train_data.drop(['trace_id', 'label'], 1)) return clusterer