Ejemplo n.º 1
0
    def test_pre_process(self):
        import math

        dataBot = DataBot(
            self.df,
            target_name='Survived',
            project_path='./tests')

        expected = {
            'Pclass': 0.46,
            'Age': 0.30,
            'SibSp': 0.05,
            'Parch': 0.05,
            'Fare': 0.05,
            'Sex_male': 0.64,
            'Sex_female': 0.35,
            'Embarked_S': 0.72,
            'Embarked_C': 0.18,
            'Embarked_Q': 0.08
        }

        dataBot.pre_process()
        features_average = dataBot.features.describe().loc['mean'].to_dict()
        for key in features_average.keys():
            self.assertAlmostEqual(expected[key], features_average[key], delta=0.1)
Ejemplo n.º 2
0
def create_model():
    dataset = None
    dataset_processed = None
    models = None
    scores = None
    best_model = None
    if request.method == 'POST':
        print(request.form)
        project_path = f'{PROJECTS_FOLDER}{request.form.get("project_name")}'
        if not os.path.exists(project_path):
            os.mkdir(project_path)
        dataset = pd.read_csv(request.form.get('dataset_path'))
        columns_types = [key for key in request.form.keys() if '_type' in key]
        for column in columns_types:
            col_data = column.split('_')
            dataset[col_data[0]] = dataset[col_data[0]].astype(request.form.get(column))
        dataset.to_csv(f'{project_path}/dataset.csv', index=False)
        dataBot = DataBot(dataset=dataset,
                          project_path=project_path,
                          target_name=request.form.get('target'),
                          null_threshold=float(request.form.get('null_threshold')) / 100,
                          cardinal_threshold=float(request.form.get('cardinal_threshold')) / 100)
        dataBot.pre_process()

        dataset_processed = dataBot.get_dataset()
        dataset_processed.to_csv(f'{project_path}/dataset_processed.csv', index=False)

        model = Model(dataset_processed, request.form.get('target'))
        model.train_models()
        best_model = model.save_best_model(f'{project_path}/model.joblib')
        models = list(model.training_results['learner'].values)
        scores = list(model.training_results['test_score'].values)

        project_info = {
            'project_name': [request.form.get("project_name")],
            'project_path': [project_path],
            'model_name': [best_model.learner.__class__.__name__],
            'model_score': [best_model.test_score],
            'target': [request.form.get("target")],
            'null_threshold': [request.form.get("null_threshold")],
            'cardinal_threshold': [request.form.get("cardinal_threshold")]
        }

        project = Project(project_info)
        project.save()

    return render_template(
        'model_info.html',
        dataset=dataset.head(3),
        dataset_processed=dataset_processed.head(),
        models=models,
        scores=scores)