Ejemplo n.º 1
0
def methodology_test(html, X, y, good_features, loss_func, number_of_test):
    feature_split = StratifiedKFold(5, shuffle=True)
    feature_marks = feature_mask(X.shape[1], good_features)

    estimator = SVC()
    filters = list(map(lambda measure: UnivariateFilter(measure, select_k_best(30)), GLOB_MEASURE.values()))
    param_grid = starting_dict
    delta = 0.1      

    for feature_train, feature_test in feature_split.split(X.T, feature_marks):
        
        train_mapping = {i:f for i, f in enumerate(feature_train)}
        test_mapping = {i:f for i, f in enumerate(feature_test)}
        
        sample_split = StratifiedKFold(5)
        
        for sample_train, sample_test in sample_split.split(X, y):
            print('new test number:', number_of_test)
            
            X_ftrain = X[:, feature_train]
            X_ftest = X[:, feature_test]
            good_features_test = [value for value in test_mapping.values() if value in good_features]
            good_features_train = [value for value in train_mapping.values() if value in good_features]

            score_train = partial(loss_func, good_features=good_features, mapping=train_mapping)
            score_test_rec = partial(loss_func, good_features=good_features, mapping=test_mapping)
            melif_phase = Melif2Phase(filters, score_train)
            melif_phase.fit(X_ftrain[sample_train], y[sample_train], estimator, select_k_best(select_k_number), X_ftrain[sample_test], y[sample_test], delta=delta, points=ParameterGrid(param_grid))
            melif_phase.run()
            rec_score_phase, prec_score_phase, feat_phase = melif_phase.get_score(X_ftest[sample_train], y[sample_train], X_ftest[sample_test], y[sample_test], score_test_rec)

            un_map_phase = [test_mapping[f] for f in feat_phase]
            good_phase = [f for f in un_map_phase if f in good_features]

            score = f1_score
            melif = Melif(filters, score)
            melif.fit(X_ftrain[sample_train], y[sample_train], estimator, select_k_best(select_k_number), X_ftrain[sample_test], y[sample_test], delta=delta, points=ParameterGrid(param_grid))
            melif.run()
            _, feat_m = melif.get_score(X_ftest[sample_train], y[sample_train], X_ftest[sample_test], y[sample_test])
            un_map_m = [test_mapping[f] for f in feat_m]
            good_m = [f for f in un_map_m if f in good_features]
            score_m = score_test_rec(feat_m)

            score = f1_score
            melif_ontest = Melif(filters, score)
            melif_ontest.fit(X_ftest[sample_train], y[sample_train], estimator, select_k_best(select_k_number), X_ftest[sample_test], y[sample_test], delta=delta, points=ParameterGrid(param_grid))
            feat_ontest = melif_ontest.run()
            un_map_ontest = [test_mapping[f] for f in feat_ontest]
            good_ontest = [f for f in un_map_ontest if f in good_features]
            score_ontest = score_test_rec(feat_ontest)
            
            __write_row(html, number_of_test, good_features_train, good_features_test, good_phase, melif_phase.best_point, rec_score_phase, prec_score_phase, good_m, melif.best_point, score_m, good_ontest, melif_ontest.best_point, score_ontest)

            number_of_test += 1
    return number_of_test
Ejemplo n.º 2
0
def methodology_test(html, X, y, good_features, number_of_test, kernel_parameters, scores):
    feature_split = StratifiedKFold(6, shuffle=True)
    feature_marks = feature_mask(X.shape[1], good_features)
    for feature_tv, feature_test in feature_split.split(X.T, feature_marks):
        tv_mapping = {i:f for i, f in enumerate(feature_tv)}
        test_mapping = {i:f for i, f in enumerate(feature_test)}
        
        good_tv = [k for k, v in tv_mapping.items() if v in good_features]
        
        good_train = random.sample(good_tv, 20)
        good_validate = list(set(good_tv).difference(set(good_train)))
        best_score_prec = 0.0
        best_score_rec = 0.0
        best_kernel = ''
        for kernel in kernel_parameters:
            fs_alg = SemiSupFS(kernel)
            fs_alg.run(X[:, feature_tv], good_train)
            features_semi = fs_alg.selected_features

            # orig_true = [tv_mapping[f] for f in good_validate]
            # orig_sel = [tv_mapping[f] for f in features_semi]
            rec, prec = loss_no_mapping(features_semi, good_validate, len(feature_tv), recall_score), loss_no_mapping(features_semi, good_validate, len(feature_tv), precision_score)
            if best_score_prec < prec:
                best_score_prec = prec
                best_score_rec = rec
                best_kernel = kernel
            elif best_score_prec == prec and best_score_rec < rec:
                best_score_rec = rec
                best_kernel = kernel
        if best_kernel == '':
            continue
        good_tv_v = [f for f in feature_tv if f in good_features]
        fs_alg = SemiSupFS(best_kernel)
        fs_alg.run(X, good_tv_v) 
        features_semi = fs_alg.selected_features
        good_test = [f for f in feature_test if f in good_features]
        score_test = loss_no_mapping(features_semi, good_test, X.shape[1], recall_score), loss_no_mapping(features_semi, good_test, X.shape[1], precision_score)        
        
        # no_test_list = list(set(range(0, X.shape[1])).difference(set(good_test)))

        __write_row(html, number_of_test, good_tv_v, good_test, best_kernel, features_semi, score_test)

        scores[len(features_semi)].append(score_test)

        number_of_test += 1
    return number_of_test
Ejemplo n.º 3
0
def methodology_test(X, y, good_features, number_of_test, scores_filters, k):
    feature_split = StratifiedKFold(5, shuffle=True)
    feature_marks = feature_mask(X.shape[1], good_features)

    filters = list(
        map(lambda measure: UnivariateFilter(measure, select_k_best(k)),
            GLOB_MEASURE.values()))

    for feature_train, feature_test in feature_split.split(X.T, feature_marks):

        train_mapping = {i: f for i, f in enumerate(feature_train)}
        test_mapping = {i: f for i, f in enumerate(feature_test)}

        sample_split = StratifiedKFold(5)

        for sample_train, sample_test in sample_split.split(X, y):
            print('new test number:', number_of_test)

            X_ftrain = X[:, feature_train]
            X_ftest = X[:, feature_test]
            good_features_test = [
                value for value in test_mapping.values()
                if value in good_features
            ]
            good_features_train = [
                value for value in train_mapping.values()
                if value in good_features
            ]

            score_test_2phase = partial(loss_2phase,
                                        good_features=good_features,
                                        mapping=test_mapping)

            for filter_ in filters:
                filter_.fit(X_ftest[sample_train], y[sample_train])
                sel_feat = filter_.selected_features
                rec, prec = score_test_2phase(sel_feat)
                scores_filters.append([rec, prec, k, filter_.measure.__name__])

            number_of_test += 1
    return number_of_test
Ejemplo n.º 4
0
def methodology_test(X, y, good_features, number_of_test, scores, k):
    feature_split = StratifiedKFold(5, shuffle=True)
    feature_marks = feature_mask(X.shape[1], good_features)

    estimator = SVC()
    filters = list(
        map(lambda measure: UnivariateFilter(measure, select_k_best(30)),
            GLOB_MEASURE.values()))
    param_grid = starting_dict
    delta = 0.1

    for feature_train, feature_test in feature_split.split(X.T, feature_marks):

        feature_train_good = [f for f in feature_train if f in good_features]
        feature_test_good = [f for f in feature_test if f in good_features]

        # good_train_samp = random.sample(feature_train_good, k)
        good_test_samp = random.sample(feature_test_good, k)

        # print('good features train', feature_train_good)
        # print('good features train sampled', good_train_samp)

        # print('good features test', feature_test_good)
        # print('good features test sampled', good_test_samp)
        # print(sorted(feature_train))
        # print(sorted(feature_train_good))
        # feature_train_del = np.setdiff1d(feature_train, feature_train_good)
        # print('confirm deletion', set(feature_train_del).intersection(set(feature_train_good)))
        # feature_train_fin = np.append(feature_train_del, good_train_samp)
        # print('confirm append', set(feature_train_fin).intersection(set(good_train_samp)))

        feature_test_del = np.setdiff1d(feature_test, feature_test_good)
        feature_test_fin = np.append(feature_test_del, good_test_samp)

        train_mapping = {i: f for i, f in enumerate(feature_train)}
        test_mapping = {i: f for i, f in enumerate(feature_test_fin)}

        sample_split = StratifiedKFold(5)

        for sample_train, sample_test in sample_split.split(X, y):
            print('new test number:', number_of_test)

            X_ftrain = X[:, feature_train]
            X_ftest = X[:, feature_test_fin]
            # good_features_test = [value for value in test_mapping.values() if value in good_features]
            # good_features_train = [value for value in train_mapping.values() if value in good_features]

            # train and test melif on recall
            # score_train_rec = partial(loss_rec, good_features=good_features, mapping=train_mapping)
            # score_test_rec = partial(loss_rec, good_features=good_features, mapping=test_mapping)

            # melif_rec = MelifLossF(filters, score_train_rec)
            # melif_rec.fit(X_ftrain[sample_train], y[sample_train], select_k_best(24), delta=delta, points=ParameterGrid(param_grid))
            # melif_rec.run()
            # feat_rec = melif_rec.transform(X_ftest[sample_train], y[sample_train], select_k_best(k))
            # sel_rec = [test_mapping[f] for f in feat_rec]
            # good_rec = [test_mapping[f] for f in feat_rec if test_mapping[f] in good_features]

            # train and test melif on precision
            score_train_prec = partial(loss_prec,
                                       good_features=good_features,
                                       mapping=train_mapping)
            score_test_prec = partial(loss_prec,
                                      good_features=good_features,
                                      mapping=test_mapping)

            melif_prec = MelifLossF(filters, score_train_prec)
            melif_prec.fit(X_ftrain[sample_train],
                           y[sample_train],
                           select_k_best(24),
                           delta=delta,
                           points=ParameterGrid(param_grid))
            melif_prec.run()
            feat_prec = melif_prec.transform(X_ftest[sample_train],
                                             y[sample_train], select_k_best(k))
            sel_prec = [test_mapping[f] for f in feat_prec]
            good_prec = [
                test_mapping[f] for f in feat_prec
                if test_mapping[f] in good_features
            ]

            # train and test melif on f1 score
            # score_train_f1 = partial(loss_f1, good_features=good_features, mapping=train_mapping)
            # score_test_f1 = partial(loss_f1, good_features=good_features, mapping=test_mapping)

            # melif_f1 = MelifLossF(filters, score_train_f1)
            # melif_f1.fit(X_ftrain[sample_train], y[sample_train], select_k_best(24), delta=delta, points=ParameterGrid(param_grid))
            # melif_f1.run()
            # feat_f1 = melif_f1.transform(X_ftest[sample_train], y[sample_train], select_k_best(k))
            # sel_f1 = [test_mapping[f] for f in feat_f1]
            # good_f1 = [test_mapping[f] for f in feat_f1 if test_mapping[f] in good_features]

            # # train and test melif on 2phase
            # score_train_2phase = partial(loss_2phase, good_features=good_features, mapping=train_mapping)
            score_test_2phase = partial(loss_2phase,
                                        good_features=good_features,
                                        mapping=test_mapping)

            # melif_2phase = Melif2Phase(filters, score_train_2phase)
            # melif_2phase.fit(X_ftrain[sample_train], y[sample_train], select_k_best(24), delta=delta, points=ParameterGrid(param_grid))
            # melif_2phase.run()
            # feat_2phase = melif_2phase.transform(X_ftest[sample_train], y[sample_train], select_k_best(k))
            # sel_2phase = [test_mapping[f] for f in feat_2phase]
            # good_2phase = [test_mapping[f] for f in feat_2phase if test_mapping[f] in good_features]

            # train casual melif
            score = f1_score
            melif = Melif(filters, score)
            melif.fit(X_ftrain[sample_train],
                      y[sample_train],
                      estimator,
                      select_k_best(24),
                      X_ftrain[sample_test],
                      y[sample_test],
                      delta=delta,
                      points=ParameterGrid(param_grid))
            melif.run()
            feat_m = melif.transform(X_ftest[sample_train], y[sample_train],
                                     select_k_best(k))
            sel_m = [test_mapping[f] for f in feat_m]
            good_m = [
                test_mapping[f] for f in feat_m
                if test_mapping[f] in good_features
            ]

            # train melif straight on test features
            score = f1_score
            melif_test = Melif(filters, score)
            melif_test.fit(X_ftest[sample_train],
                           y[sample_train],
                           estimator,
                           select_k_best(k),
                           X_ftest[sample_test],
                           y[sample_test],
                           delta=delta,
                           points=ParameterGrid(param_grid))
            feat_test = melif_test.run()
            sel_test = [test_mapping[f] for f in feat_test]
            good_test = [
                test_mapping[f] for f in feat_test
                if test_mapping[f] in good_features
            ]

            # goods = [sel_rec, sel_prec, sel_f1, sel_2phase, sel_m]
            # best_points = [melif_rec.best_point, melif_prec.best_point, melif_f1.best_point, melif_2phase.best_point, melif.best_point]
            # scores = [score_test_2phase(feat_rec), score_test_2phase(feat_prec), score_test_2phase(feat_f1), score_test_2phase(feat_2phase), score_test_2phase(feat_m)]

            # rec_rec, prec_rec = score_test_2phase(feat_rec)
            # scores.append([rec_rec, prec_rec, k, 'recall'])
            rec_prec, prec_prec = score_test_2phase(feat_prec)
            scores.append([rec_prec, prec_prec, k, 'точность'])
            # rec_f1, prec_f1 = score_test_2phase(feat_f1)
            # scores.append([rec_f1, prec_f1, k, 'f1_feature'])
            # rec_2phase, prec_2phase = score_test_2phase(feat_2phase)
            # scores.append([rec_2phase, prec_2phase, k, '2phase'])
            rec_m, prec_m = score_test_2phase(feat_m)
            scores.append([rec_m, prec_m, k, 'ф1 мера на объектах'])
            rec_test, prec_test = score_test_2phase(feat_test)
            scores.append([rec_test, prec_test, k, 'на тестовых признаках'])

            number_of_test += 1
    return number_of_test
Ejemplo n.º 5
0
def methodology_test(X, y, good_features, other_good, number_of_test):
    feature_split = StratifiedKFold(5, shuffle=True)
    feature_marks = feature_mask(X.shape[1], good_features)

    estimator = SVC()
    filters = list(
        map(lambda measure: UnivariateFilter(measure, select_k_best(30)),
            GLOB_MEASURE.values()))
    param_grid = starting_dict
    delta = 0.1

    for feature_train, feature_test in feature_split.split(X.T, feature_marks):

        other_good_train = [f for f in feature_train if f in other_good]
        other_good_test = [f for f in feature_test if f in other_good]
        feature_test_cut = [f for f in feature_test if f not in good_features]

        train_mapping = {i: f for i, f in enumerate(feature_train)}
        test_mapping = {i: f for i, f in enumerate(feature_test_cut)}

        sample_split = StratifiedKFold(5)

        for sample_train, sample_test in sample_split.split(X, y):
            print('new test number:', number_of_test)

            X_ftrain = X[:, feature_train]
            X_ftest = X[:, feature_test_cut]
            good_features_test = [
                value for value in test_mapping.values()
                if value in good_features
            ]
            good_features_train = [
                value for value in train_mapping.values()
                if value in good_features
            ]

            score_train_prec = partial(loss_prec,
                                       good_features=good_features,
                                       mapping=train_mapping)
            score_test_prec = partial(loss_prec,
                                      good_features=good_features,
                                      mapping=test_mapping)

            melif_prec = MelifLossF(filters, score_train_prec)
            melif_prec.fit(X_ftrain[sample_train],
                           y[sample_train],
                           select_k_best(24),
                           delta=delta,
                           points=ParameterGrid(param_grid))
            melif_prec.run()
            feat_prec = melif_prec.transform(X_ftest[sample_train],
                                             y[sample_train], select_k_best(6))
            sel_prec = [test_mapping[f] for f in feat_prec]
            good_prec = [
                test_mapping[f] for f in feat_prec
                if test_mapping[f] in good_features
            ]

            score_test_2phase = partial(loss_2phase,
                                        good_features=other_good_test,
                                        mapping=test_mapping)
            print('selected features', sorted(sel_prec))
            print('other from top', sorted(other_good_test))
            print('recall and precision', score_test_2phase(feat_prec))

            number_of_test += 1
    return number_of_test
Ejemplo n.º 6
0
def methodology_test(X, y, good_features, number_of_test, train_percentage,
                     test_percentage, scores):
    feature_split = StratifiedKFold(5, shuffle=True)
    feature_marks = feature_mask(X.shape[1], good_features)

    estimator = SVC()
    filters = list(
        map(lambda measure: UnivariateFilter(measure, select_k_best(30)),
            GLOB_MEASURE.values()))
    param_grid = starting_dict
    delta = 0.1

    for feature_train, feature_test in feature_split.split(X.T, feature_marks):
        train_mapping = {i: f for i, f in enumerate(feature_train)}
        test_mapping = {i: f for i, f in enumerate(feature_test)}

        print('new test number:', number_of_test)
        X_ftrain = X[:, feature_train]
        X_ftest = X[:, feature_test]
        good_features_test = [
            value for value in test_mapping.values() if value in good_features
        ]
        good_features_train = [
            value for value in train_mapping.values() if value in good_features
        ]

        # train and test melif on recall
        # score_train_rec = partial(loss_rec, good_features=good_features, mapping=train_mapping)
        # score_test_rec = partial(loss_rec, good_features=good_features, mapping=test_mapping)

        # melif_rec = MelifLossFMeta(filters, score_train_rec)
        # melif_rec.fit(X_ftrain[sample_train], y[sample_train], delta=delta, points=ParameterGrid(param_grid))
        # melif_rec.run()
        # feat_rec = melif_rec.transform(X_ftest[sample_train], y[sample_train])
        # sel_rec = [test_mapping[f] for f in feat_rec]
        # good_rec = [test_mapping[f] for f in feat_rec if test_mapping[f] in good_features]
        # print('recall passed')
        # train and test melif on precision
        score_train_prec = partial(loss_prec,
                                   good_features=good_features,
                                   mapping=train_mapping)
        score_test_prec = partial(loss_prec,
                                  good_features=good_features,
                                  mapping=test_mapping)

        melif_prec = MelifLossFStable(filters, score_train_prec)
        melif_prec.fit(X_ftrain,
                       y,
                       train_percentage,
                       delta=delta,
                       points=ParameterGrid(param_grid))
        melif_prec.run()
        feat_prec = melif_prec.transform(X_ftest, y, test_percentage)
        sel_prec = [test_mapping[f] for f in feat_prec]
        good_prec = [
            test_mapping[f] for f in feat_prec
            if test_mapping[f] in good_features
        ]
        print('precision passed')
        # train and test melif on f1 score
        score_train_f1 = partial(loss_f1,
                                 good_features=good_features,
                                 mapping=train_mapping)
        score_test_f1 = partial(loss_f1,
                                good_features=good_features,
                                mapping=test_mapping)

        melif_f1 = MelifLossFStable(filters, score_train_f1)
        melif_f1.fit(X_ftrain,
                     y,
                     train_percentage,
                     delta=delta,
                     points=ParameterGrid(param_grid))
        melif_f1.run()
        feat_f1 = melif_f1.transform(X_ftest, y, test_percentage)
        sel_f1 = [test_mapping[f] for f in feat_f1]
        good_f1 = [
            test_mapping[f] for f in feat_f1
            if test_mapping[f] in good_features
        ]
        print('f1 passed')
        # train and test melif on 2phase
        score_train_2phase = partial(loss_2phase,
                                     good_features=good_features,
                                     mapping=train_mapping)
        score_test_2phase = partial(loss_2phase,
                                    good_features=good_features,
                                    mapping=test_mapping)

        melif_2phase = Melif2PhaseStable(filters, score_train_2phase)
        melif_2phase.fit(X_ftrain,
                         y,
                         train_percentage,
                         delta=delta,
                         points=ParameterGrid(param_grid))
        melif_2phase.run()
        feat_2phase = melif_2phase.transform(X_ftest, y, test_percentage)
        sel_2phase = [test_mapping[f] for f in feat_2phase]
        good_2phase = [
            test_mapping[f] for f in feat_2phase
            if test_mapping[f] in good_features
        ]
        print('2phase passed')
        # train casual melif
        # score = f1_score
        # melif = Melif(filters, score)
        # melif.fit(X_ftrain[sample_train], y[sample_train], estimator, select_k_best(24), X_ftrain[sample_test], y[sample_test], delta=delta, points=ParameterGrid(param_grid))
        # melif.run()
        # feat_m = melif.transform(X_ftest[sample_train], y[sample_train], select_k_best(6))
        # sel_m = [test_mapping[f] for f in feat_m]
        # good_m = [test_mapping[f] for f in feat_m if test_mapping[f] in good_features]

        # goods = [sel_rec, sel_prec, sel_f1, sel_2phase, sel_m]
        # best_percentages = [melif_rec.best_percentage, melif_prec.best_percentage, melif_f1.best_percentage, melif_2phase.best_percentage]
        # best_points = [melif_rec.best_point, melif_prec.best_point, melif_f1.best_point, melif_2phase.best_point, melif.best_point]
        # scores_html = [score_test_2phase(feat_rec), score_test_2phase(feat_prec), score_test_2phase(feat_f1), score_test_2phase(feat_2phase), score_test_2phase(feat_m)]

        # goods = [sel_prec, sel_f1, sel_2phase]
        # best_percentages = [melif_prec.best_percentage, melif_f1.best_percentage, melif_2phase.best_percentage]
        # best_points = [melif_prec.best_point, melif_f1.best_point, melif_2phase.best_point]
        # scores_html = [score_test_2phase(feat_prec), score_test_2phase(feat_f1), score_test_2phase(feat_2phase)]

        # __write_row(html, number_of_test, good_features_train, good_features_test, goods, best_points, scores_html, best_percentages)
        # html.flush()

        # rec_rec, prec_rec = score_test_2phase(feat_rec)
        # scores.append([rec_rec, prec_rec, melif_rec.best_percentage, 'recall'])
        rec_prec, prec_prec = score_test_2phase(feat_prec)
        scores.append([rec_prec, prec_prec, test_percentage, 'точность'])
        rec_f1, prec_f1 = score_test_2phase(feat_f1)
        scores.append(
            [rec_f1, prec_f1, test_percentage, 'ф1 мера на признаках'])
        rec_2phase, prec_2phase = score_test_2phase(feat_2phase)
        scores.append(
            [rec_2phase, prec_2phase, test_percentage, 'двух-фазная мера'])
        # rec_m, prec_m = score_test_2phase(feat_m)
        # scores.append([rec_m, prec_m, 'f1_object'])
        number_of_test += 1
    return number_of_test
Ejemplo n.º 7
0
def methodology_test(html, X, y, good_features, number_of_test):
    feature_split = StratifiedKFold(5, shuffle=True)
    feature_marks = feature_mask(X.shape[1], good_features)

    estimator = SVC()
    filters = list(map(lambda measure: UnivariateFilter(measure, select_k_best(30)), GLOB_MEASURE.values()))
    param_grid = starting_dict
    delta = 0.1      

    for feature_train, feature_test in feature_split.split(X.T, feature_marks):
        
        train_mapping = {i:f for i, f in enumerate(feature_train)}
        test_mapping = {i:f for i, f in enumerate(feature_test)}
        
        sample_split = StratifiedKFold(5)
        
        for sample_train, sample_test in sample_split.split(X, y):
            print('new test number:', number_of_test)
            
            X_ftrain = X[:, feature_train]
            X_ftest = X[:, feature_test]
            good_features_test = [value for value in test_mapping.values() if value in good_features]
            good_features_train = [value for value in train_mapping.values() if value in good_features]

            # train and test melif on recall
            # score_train_rec = partial(loss_rec, good_features=good_features, mapping=train_mapping)
            # score_test_rec = partial(loss_rec, good_features=good_features, mapping=test_mapping)

            # melif_rec = MelifLossF(filters, score_train_rec)
            # melif_rec.fit(X_ftrain[sample_train], y[sample_train], select_k_best(24), delta=delta, points=ParameterGrid(param_grid))
            # melif_rec.run()
            # feat_rec = melif_rec.transform(X_ftest[sample_train], y[sample_train], select_k_best(6))
            # sel_rec = [test_mapping[f] for f in feat_rec]
            # good_rec = [test_mapping[f] for f in feat_rec if test_mapping[f] in good_features]

            # train and test melif on precision
            score_train_prec = partial(loss_prec, good_features=good_features, mapping=train_mapping)
            score_test_prec = partial(loss_prec, good_features=good_features, mapping=test_mapping)
            
            melif_prec = MelifLossF(filters, score_train_prec)
            melif_prec.fit(X_ftrain[sample_train], y[sample_train], select_k_best(24), delta=delta, points=ParameterGrid(param_grid))
            melif_prec.run()
            feat_prec = melif_prec.transform(X_ftest[sample_train], y[sample_train], select_k_best(6))
            sel_prec = [test_mapping[f] for f in feat_prec]
            good_prec = [test_mapping[f] for f in feat_prec if test_mapping[f] in good_features]

            # train and test melif on f1 score            
            # score_train_f1 = partial(loss_f1, good_features=good_features, mapping=train_mapping)
            # score_test_f1 = partial(loss_f1, good_features=good_features, mapping=test_mapping)
            
            # melif_f1 = MelifLossF(filters, score_train_f1)
            # melif_f1.fit(X_ftrain[sample_train], y[sample_train], select_k_best(24), delta=delta, points=ParameterGrid(param_grid))
            # melif_f1.run()
            # feat_f1 = melif_f1.transform(X_ftest[sample_train], y[sample_train], select_k_best(6))
            # sel_f1 = [test_mapping[f] for f in feat_f1]
            # good_f1 = [test_mapping[f] for f in feat_f1 if test_mapping[f] in good_features]

            # # train and test melif on 2phase
            # score_train_2phase = partial(loss_2phase, good_features=good_features, mapping=train_mapping)
            score_test_2phase = partial(loss_2phase, good_features=good_features, mapping=test_mapping)
            
            # melif_2phase = Melif2Phase(filters, score_train_2phase)
            # melif_2phase.fit(X_ftrain[sample_train], y[sample_train], select_k_best(24), delta=delta, points=ParameterGrid(param_grid))
            # melif_2phase.run()
            # feat_2phase = melif_2phase.transform(X_ftest[sample_train], y[sample_train], select_k_best(6))
            # sel_2phase = [test_mapping[f] for f in feat_2phase]
            # good_2phase = [test_mapping[f] for f in feat_2phase if test_mapping[f] in good_features]

            # train casual melif
            score = f1_score
            melif = Melif(filters, score)
            melif.fit(X_ftrain[sample_train], y[sample_train], estimator, select_k_best(24), X_ftrain[sample_test], y[sample_test], delta=delta, points=ParameterGrid(param_grid))
            melif.run()
            feat_m = melif.transform(X_ftest[sample_train], y[sample_train], select_k_best(6))
            sel_m = [test_mapping[f] for f in feat_m]
            good_m = [test_mapping[f] for f in feat_m if test_mapping[f] in good_features]

            # melif on test
            score = f1_score
            melif_test = Melif(filters, score)
            melif_test.fit(X_ftest[sample_train], y[sample_train], estimator, select_k_best(6), X_ftest[sample_test], y[sample_test], delta=delta, points=ParameterGrid(param_grid))
            feat_test = melif_test.run()
            sel_test = [test_mapping[f] for f in feat_test]
            good_test = [test_mapping[f] for f in feat_test if test_mapping[f] in good_features]
            
            # goods = [sel_rec, sel_prec, sel_f1, sel_2phase, sel_m]
            # best_points = [melif_rec.best_point, melif_prec.best_point, melif_f1.best_point, melif_2phase.best_point, melif.best_point]
            # scores = [score_test_2phase(feat_rec), score_test_2phase(feat_prec), score_test_2phase(feat_f1), score_test_2phase(feat_2phase), score_test_2phase(feat_m)]
            goods = [sel_prec, sel_m, sel_test]
            best_points = [melif_prec.best_point, melif.best_point, melif_test.best_point]
            scores = [score_test_2phase(feat_prec), score_test_2phase(feat_m), score_test_2phase(feat_test)]
            
            __write_row(html, number_of_test, good_features_train, good_features_test, goods, best_points, scores)

            number_of_test += 1
    return number_of_test
Ejemplo n.º 8
0
def methodology_test(X, y, good_features, number_of_test, scores):
    feature_split = StratifiedKFold(5, shuffle=True)
    feature_marks = feature_mask(X.shape[1], good_features)

    estimator = SVC()
    filters = list(map(lambda measure: UnivariateFilter(measure, partial(select_by_percentage, percent=80)), GLOB_MEASURE.values()))
    param_grid = starting_dict
    delta = 0.1      
    for feature_train, feature_test in feature_split.split(X.T, feature_marks):
        train_mapping = {i:f for i, f in enumerate(feature_train)}
        test_mapping = {i:f for i, f in enumerate(feature_test)}
        
        
        print('new test number:', number_of_test)
        X_ftrain = X[:, feature_train]
        X_ftest = X[:, feature_test]
        good_features_test = [value for value in test_mapping.values() if value in good_features]
        good_features_train = [value for value in train_mapping.values() if value in good_features]

        score_train_prec = partial(loss_prec, good_features=good_features, mapping=train_mapping)
        score_test_prec = partial(loss_prec, good_features=good_features, mapping=test_mapping)
        
        melif_prec = MelifLossFStable(filters, score_train_prec)
        melif_prec.fit(X_ftrain, y, 80, delta=delta, points=ParameterGrid(param_grid))
        melif_prec.run()
        feat_prec = melif_prec.transform(X_ftest, y, 80)
        sel_prec = [test_mapping[f] for f in feat_prec]
        good_prec = [test_mapping[f] for f in feat_prec if test_mapping[f] in good_features]
        print('precision passed')
        # train and test melif on f1 score            
        score_train_f1 = partial(loss_f1, good_features=good_features, mapping=train_mapping)
        score_test_f1 = partial(loss_f1, good_features=good_features, mapping=test_mapping)
        
        melif_f1 = MelifLossFStable(filters, score_train_f1)
        melif_f1.fit(X_ftrain, y, 80, delta=delta, points=ParameterGrid(param_grid))
        melif_f1.run()
        feat_f1 = melif_f1.transform(X_ftest, y, 80)
        sel_f1 = [test_mapping[f] for f in feat_f1]
        good_f1 = [test_mapping[f] for f in feat_f1 if test_mapping[f] in good_features]
        print('f1 passed')
        # train and test melif on 2phase
        score_train_2phase = partial(loss_2phase, good_features=good_features, mapping=train_mapping)
        score_test_2phase = partial(loss_2phase, good_features=good_features, mapping=test_mapping)
        
        melif_2phase = Melif2PhaseStable(filters, score_train_2phase)
        melif_2phase.fit(X_ftrain, y, 80, delta=delta, points=ParameterGrid(param_grid))
        melif_2phase.run()
        feat_2phase = melif_2phase.transform(X_ftest, y, 80)
        sel_2phase = [test_mapping[f] for f in feat_2phase]
        good_2phase = [test_mapping[f] for f in feat_2phase if test_mapping[f] in good_features]
        print('2phase passed')
        
        rec_prec, prec_prec = score_test_2phase(feat_prec)
        scores.append([rec_prec, prec_prec, 'точность'])
        rec_f1, prec_f1 = score_test_2phase(feat_f1)
        scores.append([rec_f1, prec_f1, 'F1-мера на признаках'])
        rec_2phase, prec_2phase = score_test_2phase(feat_2phase)
        scores.append([rec_2phase, prec_2phase, 'двух-фазная мера'])
        
        sample_split = StratifiedKFold(5, shuffle=True)
        for sample_train, sample_test in sample_split.split(X, y):
            
            melif_usual = MelifStable(filters, f1_score)
            melif_usual.fit(X_ftrain[sample_train], y[sample_train], estimator, X_ftrain[sample_test], y[sample_test], delta=delta, points=ParameterGrid(basic_dict))
            melif_usual.run()
            feat_usual = melif_usual.transform(X_ftest, y)
            rec_usual, prec_usual = score_test_2phase(feat_usual)
            scores.append([rec_usual, prec_usual, 'F1-мера на объетках'])
            print('basic melif passed')
            melif_ontest = MelifStable(filters, f1_score)
            melif_ontest.fit(X_ftest[sample_train], y[sample_train], estimator, X_ftest[sample_test], y[sample_test], delta=delta, points=ParameterGrid(basic_dict))
            feat_ontest = melif_ontest.run()
            rec_ontest, prec_ontest = score_test_2phase(feat_usual)
            scores.append([rec_ontest, prec_ontest, 'F1-мера только на тестовых признаках'])
            print('ontest melif passed')
            break

        for filter_ in filters:
            filter_.fit(X_ftest, y)
            sel_feat = filter_.selected_features
            rec, prec = score_test_2phase(sel_feat)
            scores.append([rec, prec, filter_.measure.__name__])
        print('filters passed')

        wrapper_forw = SequentialForwardSelection(estimator, 6, make_scorer(f1_score))
        wrapper_forw.fit(X_ftest, y)
        sel_feat = wrapper_forw.selected_features
        rec, prec = score_test_2phase(sel_feat)
        scores.append([rec, prec, 'sequential forward selection'])
        print('seq wrapper passed')

        wrapper_back = BackwardSelection(estimator, 6, make_scorer(f1_score))
        wrapper_back.fit(X_ftest, y)
        sel_feat = wrapper_back.selected_features
        rec, prec = score_test_2phase(sel_feat)
        scores.append([rec, prec, 'backward selection'])

        print('back wrapper passed')
        # train casual melif
        # score = f1_score
        # melif = Melif(filters, score)
        # melif.fit(X_ftrain[sample_train], y[sample_train], estimator, select_k_best(24), X_ftrain[sample_test], y[sample_test], delta=delta, points=ParameterGrid(param_grid))
        # melif.run()
        # feat_m = melif.transform(X_ftest[sample_train], y[sample_train], select_k_best(6))
        # sel_m = [test_mapping[f] for f in feat_m]
        # good_m = [test_mapping[f] for f in feat_m if test_mapping[f] in good_features]
        
        # goods = [sel_rec, sel_prec, sel_f1, sel_2phase, sel_m]
        # best_percentages = [melif_rec.best_percentage, melif_prec.best_percentage, melif_f1.best_percentage, melif_2phase.best_percentage]
        # best_points = [melif_rec.best_point, melif_prec.best_point, melif_f1.best_point, melif_2phase.best_point, melif.best_point]
        # scores_html = [score_test_2phase(feat_rec), score_test_2phase(feat_prec), score_test_2phase(feat_f1), score_test_2phase(feat_2phase), score_test_2phase(feat_m)]

        # goods = [sel_prec, sel_f1, sel_2phase]
        # best_percentages = [melif_prec.best_percentage, melif_f1.best_percentage, melif_2phase.best_percentage]
        # best_points = [melif_prec.best_point, melif_f1.best_point, melif_2phase.best_point]
        # scores_html = [score_test_2phase(feat_prec), score_test_2phase(feat_f1), score_test_2phase(feat_2phase)]

        # __write_row(html, number_of_test, good_features_train, good_features_test, goods, best_points, scores_html, best_percentages)
        # html.flush()

        # rec_rec, prec_rec = score_test_2phase(feat_rec)
        # scores.append([rec_rec, prec_rec, melif_rec.best_percentage, 'recall'])
        # rec_m, prec_m = score_test_2phase(feat_m)
        # scores.append([rec_m, prec_m, 'f1_object'])
        number_of_test += 1
    return number_of_test