def delete_model(id): app.logger.info(f"Removing model {id}") model, config, dataset = Dataset.model_from_id(id) model.delete_data() config.models = [m for m in config.models if m.id != model.id] dataset.save() return jsonify({})
def export_confusion_matrix(id): model, _, _ = Dataset.model_from_id(id) if model.status != "done": return {"error": "Model is not trained"}, 409 if not model.confusion_matrix_path: return {"error": "No confusion matrix available"}, 404 return send_file(model.confusion_matrix_path, as_attachment=True)
def dataset_status(id): model: DatasetModel model, _, _ = Dataset.model_from_id(id) reply = {"status": model.status} if model.log_path: try: with open(model.log_path) as f: reply["logs"] = f.read() except FileNotFoundError: pass return reply
def predict_result(id): dataset: Dataset config: DatasetConfig model, config, dataset = Dataset.model_from_id(id) # Check if model is trained if model.status != "done": return {"error": "Model is not trained"}, 409 app.logger.info(f"predicting for dataset {dataset.name}") app.logger.info(f"Found configuration {config}") data = request.json app.logger.info(f"got data {data}") mapping = column_mapping.decode_mapping(dataset.column_mapping) for line in data: for k in line.keys(): if k in mapping: line[k] = mapping[k][line[k]] else: line[k] = float(line[k]) app.logger.info(f"Decoded data {data}") columns_order = [ col for col in dataset.columns if col in config.columns and config.columns[col] and col != config.label ] app.logger.info(f"columns order {columns_order}") data = np.array([[line[col] for col in columns_order] for line in data]) app.logger.info(f"sorted data {data}") with open(model.pickled_model_path, "rb") as f: pipeline = pickle.load(f) app.logger.info("loaded pipeline") result = pipeline.predict(data).tolist() app.logger.info(f"Predicted {result}") if config.label in mapping: result = [ column_mapping.reconvert_one_value(config.label, value, mapping) for value in result ] return jsonify([{config.label: value} for value in result])
def train_model(id): model, config, dataset = Dataset.model_from_id(id) # Check if training is already done or in progress if model.status == "done": return {"error": "Model is already trained"}, 409 if model.status not in ["not started", "error"]: return {"error": "Model is currently training"}, 409 app.logger.info(f"Starting training dataset {dataset.name}") app.logger.info(f"config: {config.to_json()}") app.logger.info(f"model: {model.to_json()}") app.logger.info(f"Found configuration {config}") # update status model.status = "starting" dataset.save() fut = client.submit(training.train_model, id) fire_and_forget(fut) return {"status": model.status}, 202
def train_model(model_id): config: DatasetConfig model, config, dataset = Dataset.model_from_id(model_id) def set_status(status): logger.info(f"Setting status of {model.id} to: {status}") model.status = status dataset.save() try: # Create the different assets path dataset_path = Path(dataset.path) model_dir = dataset_path.parent / \ f"{dataset.name}-model-{str(model.id)}" model_dir.mkdir(exist_ok=True) log_path = model_dir / "training.log" pickled_model_path = model_dir / "pipeline.pickle" exported_model_path = model_dir / "pipeline.py" shap_model_path = model_dir / "save.png" confusion_matrix_path = model_dir / "confusion_matrix.png" model.log_path = str(log_path) set_status("started") # Load the dataset mapping = column_mapping.decode_mapping(dataset.column_mapping) X, y = get_dataset(dataset_path, config, mapping) logger.info(f"Loaded dataset: {X} {y}") logger.info(f"Mapping: {mapping}") # Copy data before column name drop (using it for shap) copy_X = X copy_y = y # Convert to types TPOT understands X = X.to_numpy().astype(np.float64) y = y.to_numpy().astype(np.float64) # Separate training and testing data with column name _, X_test_col, _, y_test_col = train_test_split(copy_X, copy_y, test_size=0.2) # Separate training and testing data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) logger.info(config.to_json()) # Split values #X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2) # Train the model classifier = tpot_training(X_train, y_train, model.model_config, log_file=log_path, model_type=config.model_type) # Save best pipeline save_res = save_pipeline(classifier, pickled_model_path) # Export best pipeline code export_res = export_pipeline_code(classifier, exported_model_path) # Save shap image image_res = save_shap(classifier, shap_model_path, copy_X, copy_y, mapping) # Create metrics on the generated pipeline analysis_res = analyse_model(classifier, X_train, y_train, X_test, y_test) # Create the confusion matrix if config.model_type == "classification": matrix_res = create_confusion_matrix(classifier, X_test_col, y_test_col, confusion_matrix_path) else: matrix_res = dask.delayed(None) # Get the results of the exportation and model saving _, _, analysis, *_ = dask.compute(save_res, export_res, analysis_res, matrix_res, image_res) # Update the model with the exported paths # and set the status as done logger.info( f"PATH MATRIX : {confusion_matrix_path}\n\nPATH SHAP : {shap_model_path}\n\n\n\n" ) model.pickled_model_path = str(pickled_model_path) model.exported_model_path = str(exported_model_path) if config.model_type == "classification": model.confusion_matrix_path = str(confusion_matrix_path) model.shap_model_path = str(shap_model_path) model.analysis = analysis model.status = "done" dataset.save() except Exception as e: logger.error(f"Got error while training: {e}") traceback.print_exc() set_status("error")
def export_shap_value(id): model, _, _ = Dataset.model_from_id(id) if model.status != "done": return {"error": "Model is not trained"}, 409 app.logger.info(f"ICI : {model.shap_model_path}\n\n\n\n") return send_file(model.shap_model_path, as_attachment=True)
def export_pickle(id): model, _, _ = Dataset.model_from_id(id) if model.status != "done": return {"error": "Model is not trained"}, 409 return send_file(model.pickled_model_path, as_attachment=True)
def get_model(id): model, config, dataset = Dataset.model_from_id(id) return model.to_json()