コード例 #1
0
 def delete_model(id):
     app.logger.info(f"Removing model {id}")
     model, config, dataset = Dataset.model_from_id(id)
     model.delete_data()
     config.models = [m for m in config.models if m.id != model.id]
     dataset.save()
     return jsonify({})
コード例 #2
0
 def export_confusion_matrix(id):
     model, _, _ = Dataset.model_from_id(id)
     if model.status != "done":
         return {"error": "Model is not trained"}, 409
     if not model.confusion_matrix_path:
         return {"error": "No confusion matrix available"}, 404
     return send_file(model.confusion_matrix_path, as_attachment=True)
コード例 #3
0
    def dataset_status(id):
        model: DatasetModel
        model, _, _ = Dataset.model_from_id(id)

        reply = {"status": model.status}

        if model.log_path:
            try:
                with open(model.log_path) as f:
                    reply["logs"] = f.read()
            except FileNotFoundError:
                pass

        return reply
コード例 #4
0
    def predict_result(id):
        dataset: Dataset
        config: DatasetConfig
        model, config, dataset = Dataset.model_from_id(id)

        # Check if model is trained
        if model.status != "done":
            return {"error": "Model is not trained"}, 409

        app.logger.info(f"predicting for dataset {dataset.name}")
        app.logger.info(f"Found configuration {config}")
        data = request.json
        app.logger.info(f"got data {data}")
        mapping = column_mapping.decode_mapping(dataset.column_mapping)
        for line in data:
            for k in line.keys():
                if k in mapping:
                    line[k] = mapping[k][line[k]]
                else:
                    line[k] = float(line[k])
        app.logger.info(f"Decoded data {data}")
        columns_order = [
            col for col in dataset.columns if col in config.columns
            and config.columns[col] and col != config.label
        ]
        app.logger.info(f"columns order {columns_order}")
        data = np.array([[line[col] for col in columns_order]
                         for line in data])
        app.logger.info(f"sorted data {data}")
        with open(model.pickled_model_path, "rb") as f:
            pipeline = pickle.load(f)
        app.logger.info("loaded pipeline")
        result = pipeline.predict(data).tolist()
        app.logger.info(f"Predicted {result}")

        if config.label in mapping:
            result = [
                column_mapping.reconvert_one_value(config.label, value,
                                                   mapping) for value in result
            ]
        return jsonify([{config.label: value} for value in result])
コード例 #5
0
    def train_model(id):
        model, config, dataset = Dataset.model_from_id(id)

        # Check if training is already done or in progress
        if model.status == "done":
            return {"error": "Model is already trained"}, 409
        if model.status not in ["not started", "error"]:
            return {"error": "Model is currently training"}, 409

        app.logger.info(f"Starting training dataset {dataset.name}")
        app.logger.info(f"config: {config.to_json()}")
        app.logger.info(f"model: {model.to_json()}")
        app.logger.info(f"Found configuration {config}")

        # update status
        model.status = "starting"
        dataset.save()

        fut = client.submit(training.train_model, id)
        fire_and_forget(fut)
        return {"status": model.status}, 202
コード例 #6
0
def train_model(model_id):
    config: DatasetConfig
    model, config, dataset = Dataset.model_from_id(model_id)

    def set_status(status):
        logger.info(f"Setting status of {model.id} to: {status}")
        model.status = status
        dataset.save()

    try:
        # Create the different assets path
        dataset_path = Path(dataset.path)
        model_dir = dataset_path.parent / \
            f"{dataset.name}-model-{str(model.id)}"
        model_dir.mkdir(exist_ok=True)
        log_path = model_dir / "training.log"
        pickled_model_path = model_dir / "pipeline.pickle"
        exported_model_path = model_dir / "pipeline.py"
        shap_model_path = model_dir / "save.png"
        confusion_matrix_path = model_dir / "confusion_matrix.png"

        model.log_path = str(log_path)
        set_status("started")

        # Load the dataset
        mapping = column_mapping.decode_mapping(dataset.column_mapping)
        X, y = get_dataset(dataset_path, config, mapping)
        logger.info(f"Loaded dataset: {X} {y}")
        logger.info(f"Mapping: {mapping}")

        # Copy data before column name drop (using it for shap)
        copy_X = X
        copy_y = y

        # Convert to types TPOT understands
        X = X.to_numpy().astype(np.float64)
        y = y.to_numpy().astype(np.float64)

        # Separate training and testing data with column name
        _, X_test_col, _, y_test_col = train_test_split(copy_X,
                                                        copy_y,
                                                        test_size=0.2)

        # Separate training and testing data
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2)

        logger.info(config.to_json())

        # Split values
        #X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2)

        # Train the model
        classifier = tpot_training(X_train,
                                   y_train,
                                   model.model_config,
                                   log_file=log_path,
                                   model_type=config.model_type)

        # Save best pipeline
        save_res = save_pipeline(classifier, pickled_model_path)

        # Export best pipeline code
        export_res = export_pipeline_code(classifier, exported_model_path)

        # Save shap image
        image_res = save_shap(classifier, shap_model_path, copy_X, copy_y,
                              mapping)

        # Create metrics on the generated pipeline
        analysis_res = analyse_model(classifier, X_train, y_train, X_test,
                                     y_test)

        # Create the confusion matrix
        if config.model_type == "classification":
            matrix_res = create_confusion_matrix(classifier, X_test_col,
                                                 y_test_col,
                                                 confusion_matrix_path)
        else:
            matrix_res = dask.delayed(None)

        # Get the results of the exportation and model saving
        _, _, analysis, *_ = dask.compute(save_res, export_res, analysis_res,
                                          matrix_res, image_res)

        # Update the model with the exported paths
        # and set the status as done
        logger.info(
            f"PATH MATRIX : {confusion_matrix_path}\n\nPATH SHAP : {shap_model_path}\n\n\n\n"
        )
        model.pickled_model_path = str(pickled_model_path)
        model.exported_model_path = str(exported_model_path)
        if config.model_type == "classification":
            model.confusion_matrix_path = str(confusion_matrix_path)
        model.shap_model_path = str(shap_model_path)
        model.analysis = analysis
        model.status = "done"
        dataset.save()
    except Exception as e:
        logger.error(f"Got error while training: {e}")
        traceback.print_exc()
        set_status("error")
コード例 #7
0
 def export_shap_value(id):
     model, _, _ = Dataset.model_from_id(id)
     if model.status != "done":
         return {"error": "Model is not trained"}, 409
     app.logger.info(f"ICI : {model.shap_model_path}\n\n\n\n")
     return send_file(model.shap_model_path, as_attachment=True)
コード例 #8
0
 def export_pickle(id):
     model, _, _ = Dataset.model_from_id(id)
     if model.status != "done":
         return {"error": "Model is not trained"}, 409
     return send_file(model.pickled_model_path, as_attachment=True)
コード例 #9
0
 def get_model(id):
     model, config, dataset = Dataset.model_from_id(id)
     return model.to_json()