Ejemplo n.º 1
0
def run():
    print(' => Reading features dataset')
    df = pd.read_csv('results/processed_extracted_features.csv').drop('ids',
                                                                      axis=1)
    y = df['Diagnosis'].values
    x = df[df.columns[1:]].values
    x = StandardScaler().fit_transform(x)
    results = pd.DataFrame(
        columns=['Classifier', 'Params', 'BAS', 'BER', 'MCC', 'Accuracy'])
    print(' => Testing classifiers')
    print(' ==> Naive Bayes .. ', end='\r')
    evaluation = evaluate(GaussianNB, x, y)
    evaluation.update({'Classifier': 'Naive Bayes'})
    results = results.append(evaluation, ignore_index=True).round(4)
    print(f' ==> Naive Bayes .. {evaluation["BAS"]}')
    print(' ==> Random Forest .. ', end='\r')
    evaluation = evaluate(RandomForestClassifier,
                          x,
                          y,
                          params={'n_estimators': 50})
    evaluation.update({'Classifier': 'Random Forest', 'Params': '# Trees: 50'})
    results = results.append(evaluation, ignore_index=True).round(4)
    print(f' ==> Random Forest .. {evaluation["BAS"]}')
    print(' ==> Ada Boost .. ', end='\r')
    evaluation = evaluate(AdaBoostClassifier,
                          x,
                          y,
                          params={'learning_rate': 0.05})
    evaluation.update({
        'Classifier': 'Ada Boost',
        'Params': 'Learning rate: 0.5'
    })
    results = results.append(evaluation, ignore_index=True).round(4)
    print(f' ==> Ada Boost .. {evaluation["BAS"]}')
    print(' ==> SVC .. ', end='\r')
    evaluation = evaluate(SVC, x, y, params={'kernel': 'linear', 'C': 0.1})
    evaluation.update({
        'Classifier': 'SVM',
        'Params': 'kernel: linear, C: 0.1'
    })
    results = results.append(evaluation, ignore_index=True).round(4)
    print(f' ==> SVC .. {evaluation["BAS"]}')
    print(' ==> KNN .. ', end='\r')
    evaluation = evaluate(KNeighborsClassifier,
                          x,
                          y,
                          params={'n_neighbors': 7})
    evaluation.update({'Classifier': 'KNN', 'Params': 'K: 7'})
    results = results.append(evaluation, ignore_index=True).round(4)
    print(f' ==> KNN .. {evaluation["BAS"]}')
    print(' => Done!')

    return results
Ejemplo n.º 2
0
def ffs():
    stdout.write(' => Reading DF')
    df = pd.read_csv(TRAIN_URI)
    stdout.write('\r => Gettting FDR ')
    fdr = FDR(df)
    stdout.write('\r => Initializing sets ')
    ir = set(['CDR'])
    not_ir = set(df.columns[1:])
    not_ir.remove('CDR')
    Y = df['Diagnosis']
    res = pd.DataFrame(columns=['feature', 'Accuracy'])
    evaluation = evaluate(SVC,
                          df[['CDR']].values,
                          Y,
                          params={
                              'C': 0.1,
                              'kernel': 'linear'
                          })
    evaluation['feature'] = 'CDR'
    res = res.append(evaluation, ignore_index=True)

    for i in range(len(not_ir)):
        features = list(ir)
        best_result = {SELECTION_CRITERIA: 0}
        for f in not_ir:
            values = df[features + [f]].values
            evaluation = evaluate(SVC,
                                  values,
                                  Y,
                                  params={
                                      'C': 0.1,
                                      'kernel': 'linear'
                                  })
            if evaluation[SELECTION_CRITERIA] > best_result[SELECTION_CRITERIA]:
                best_result = evaluation
                best_result['feature'] = f
            elif evaluation[SELECTION_CRITERIA] == best_result[
                    SELECTION_CRITERIA]:
                champion = best_result['feature']
                challenger = f
                if fdr[challenger] > fdr[champion]:
                    best_result = evaluation
                    best_result['feature'] = f

        res = res.append(best_result, ignore_index=True)
        not_ir.remove(best_result['feature'])
        ir.add(best_result['feature'])
        stdout.write('\r ==> %d features selected .. %0.04f sensibility' %
                     (res.shape[0], best_result[SELECTION_CRITERIA]))
    stdout.write('\n')
    return res
def grid_search():
    """Main handler.

    This function looks for the best classifier & it's best combination of
    parameters.
    """
    cols = ['Accuracy', 'BAS', 'BER', 'MCC', 'Sensibility', 'Specificity']

    all_res = pd.DataFrame(columns=(['Classifier'] + cols))
    print(' => Reading features dataset')
    df = pd.read_csv(TRAIN_URI)  # .drop('ids', axis=1)
    y = df['Diagnosis'].values
    x = df[df.columns[1:]].values

    print('\nNaive Bayes')
    results = pd.DataFrame([evaluate(GaussianNB, x, y)], columns=cols)
    print(results)
    results['Classifier'] = 'Naive Bayes'
    all_res = all_res.append(results.iloc[0], ignore_index=True)

    print('\nRandom Forest')
    results = pd.DataFrame(
        [evaluate(RandomForestClassifier, x, y, params={'n_estimators': 50})],
        columns=cols)
    print(results)
    results['Classifier'] = 'Random Forest'
    all_res = all_res.append(results.iloc[0], ignore_index=True)

    print('\nAda Boost')
    results = ada_boost.grid_search(x, y).sort_values('Accuracy',
                                                      ascending=False)
    print(results)
    results['Classifier'] = 'Ada Boost'
    all_res = all_res.append(results.iloc[0], ignore_index=True)

    print('\nSVC linear')
    results = linear_svm.grid_search(x, y).sort_values('Accuracy',
                                                       ascending=False)
    print(results)
    results['Classifier'] = 'SVC'
    all_res = all_res.append(results.iloc[0], ignore_index=True)

    print('\nKNN')
    results = knn.grid_search(x, y).sort_values('Accuracy', ascending=False)
    print(results)
    results['Classifier'] = 'KNN'
    all_res = all_res.append(results.iloc[0], ignore_index=True)

    print('\n', all_res.sort_values('Accuracy', ascending=False))
Ejemplo n.º 4
0
def grid_search(values, target, verbose=True):
    """Looks for the best param combinations for SVC."""
    stdout.write(' => Best Cs for SVM\n')

    results = pd.DataFrame(columns=['C', 'Accuracy'])
    cs = [10**c for c in range(-3, 2)]
    for i, c in enumerate(cs):
        if verbose:
            stdout.write(f'\r ==> SVC .... {i + 1}/{len(cs)}')

        evaluation = evaluate(SVC,
                              values,
                              target,
                              params={
                                  'C': c,
                                  'kernel': 'linear'
                              })
        evaluation.update({'C': c})
        results = results.append(evaluation, ignore_index=True).round(4)

    if verbose:
        stdout.write(
            f'\x1b[2k\r => Best SVM, {results.shape[0]} combs tested!\n')

    return results
def get_best_comb(images, target, msg=''):
    """Looks for the best distance on Haralick features."""
    results = pd.DataFrame()
    # In disc
    for dist in range(1, 4):
        print(f' ==> ({msg}) Distance {dist}/3 (all degrees + mean)', end='\r')
        haralick = extractor.get_haralick(images, dist, HARALICK_NAMES)
        values = StandardScaler().fit_transform(haralick)
        res = evaluator.evaluate(GaussianNB, values, target)
        res.update({'Distance': dist})
        results = results.append(res, ignore_index=True).round(4)
    return results
Ejemplo n.º 6
0
def grid_search(values, target, verbose=True):
    """Looks for the best param combinations for KNN."""
    stdout.write(' => Best Ks for KNN\n')
    results = pd.DataFrame(columns=['K', 'Accuracy'])
    ks = [k for k in range(1, 20, 2)]
    for i, k in enumerate(ks):
        if verbose:
            stdout.write(f'\r ==> KNN .... {i + 1}/{len(ks)}')
        evaluation = evaluate(KNeighborsClassifier,
                              values,
                              target,
                              params={'n_neighbors': k})
        evaluation.update({'K': k})
        results = results.append(evaluation, ignore_index=True).round(4)

    if verbose:
        stdout.write(f'\x1b[2k\r => KNN, {results.shape[0]} combs tested!\n')

    return results
Ejemplo n.º 7
0
def grid_search(values, target, verbose=True):
    """Looks for the best param combinations for AdaBoost."""
    if verbose:
        stdout.write(' => Getting best LRates for AdaBoost\n')

    results = pd.DataFrame(columns=['Learning rate', 'Accuracy'])
    l_rates = [lr / 1000 for lr in range(80, 121, 10)]
    for i, lr in enumerate(l_rates):
        if verbose:
            stdout.write(f'\r ==> AdaBoost LRates .... {i + 1}/{len(l_rates)}')

        evaluation = evaluate(AdaBoostClassifier,
                              values,
                              target,
                              params={'learning_rate': lr})
        evaluation.update({'Learning rate': lr})
        results = results.append(evaluation, ignore_index=True).round(4)

    if verbose:
        stdout.write(
            f'\x1b[2k\r => AdaBoost, {results.shape[0]} combs tested! \n')

    return results
Ejemplo n.º 8
0
from lib.evaluator import evaluate
from lib import rimone

POINTS = [6, 7, 8, 9, 10]
RADIUS = [1, 2, 3]

ds = rimone.dataset()

Y = ds.Y

res = pd.DataFrame()
for p in POINTS:
    for r in RADIUS:
        print(f'p: {p}, r: {r}')
        X = StandardScaler().fit_transform(get_lbp(ds.cups, radius=r, points=p))
        evaluation = evaluate(GaussianNB, X, Y)
        evaluation.update({
            'radius': r,
            'point': p
        })
        res = res.append(evaluation, ignore_index=True).sort_values('Score', ascending=False)
        res.to_csv('results/lbp_cups.csv', index=False)

print(res.sort_values('Score', ascending=False))

POINTS = [6, 7, 8, 9, 10]
RADIUS = [1, 2, 3]


res = pd.DataFrame()
for p in POINTS: