Ejemplo n.º 1
0
    def _params_producer() -> tuple:
        """ Generates parameters for imbalance training.

        Returns:
        ----------
            Bugs description, classes codes, SMOTE instance and model name.
        """
        for metric in filtered_classes.keys():
            if metric == "Priority" or metric == "Time to Resolve":
                filtered_df = df[df[metric].isin(filtered_classes[metric])]
                smt = SMOTE(
                    ratio="minority",
                    random_state=0,
                    kind="borderline1",
                    n_jobs=4,
                )
                smt.k_neighbors = get_k_neighbors(df[metric.lower() +
                                                     "_codes"])
                classes_codes = filtered_df[metric.lower() + "_codes"]
                model_name = metric.split("_")[0]

                yield filtered_df.Description_tr, classes_codes, smt, model_name
            else:
                for class_ in filtered_classes[metric]:
                    df_index = ("Resolution_" +
                                class_ if metric == "Resolution" else class_)
                    smt = SMOTE(
                        ratio="minority",
                        random_state=0,
                        kind="borderline1",
                        n_jobs=4,
                    )
                    smt.k_neighbors = get_k_neighbors(df[df_index])

                    yield df.Description_tr, df[df_index], smt, class_
Ejemplo n.º 2
0
def train_model(model_path, dataframe_path, areas_of_testing, resolution):
    if not exists(model_path):
        makedirs(model_path)
    df = read_pickle(dataframe_path)

    if not check_bugs_count(df, 100):
        raise ValueError(
            'Oops! Too little data to analyze. Model can\'t be trained.')

    filtered_classes, dataframes = prepare_df(df, areas_of_testing, resolution)

    for _class in filtered_classes:
        check_classes_count(filtered_classes[_class])

    # model/ folder cleanup
    remove_models()

    # updated predictions parameters writing
    write_classes(filtered_classes)

    from main.config_processor import load_config_to_session
    load_config_to_session(
        str(Path(__file__).parents[1]) + '/models/selected/' +
        'predictions_parameters.ini')

    smt = SMOTE(ratio='minority', random_state=0, kind='borderline1', n_jobs=4)
    svm_imb = SVC(gamma=2, C=1, probability=True, class_weight='balanced')
    chi2 = feature_selection.chi2
    # areas of testing models training
    for area in filtered_classes['areas_of_testing_classes']:
        smt.k_neighbors = get_k_neighbors(
            df[areas_of_testing[area]['series_name']])
        training_imbalance(df['Description_tr'],
                           df[areas_of_testing[area]['series_name']],
                           session['tfidf'], smt, chi2, 50, svm_imb,
                           model_path + secure_filename(area))
    # priority models training
    smt.k_neighbors = get_k_neighbors(
        dataframes['priority_df']['Priority_codes'])
    training_imbalance(dataframes['priority_df']['Description_tr'],
                       dataframes['priority_df']['Priority_codes'],
                       session['tfidf'], smt, chi2, 50, svm_imb,
                       model_path + secure_filename('priority'))
    # ttr models training
    smt.k_neighbors = get_k_neighbors(
        dataframes['ttr_df']['coded_ttr_intervals'])
    training_imbalance(dataframes['ttr_df']['Description_tr'],
                       dataframes['ttr_df']['coded_ttr_intervals'],
                       session['tfidf'], smt, chi2, 50, svm_imb,
                       model_path + secure_filename('ttr'))

    # resolution models training
    for resol in filtered_classes['resolutions_classes']:
        smt.k_neighbors = get_k_neighbors(
            dataframes['resolution_df']['Resolution_' + resol])
        training_imbalance(dataframes['resolution_df']['Description_tr'],
                           dataframes['resolution_df']['Resolution_' + resol],
                           session['tfidf'], smt, chi2, 50, svm_imb,
                           model_path + secure_filename(resol))