Esempio n. 1
0
def crossValidation(folds, classifiers, inf, sup):
    acc = [[] for c in classifiers]
    print(acc)
    for i in range(len(folds)):
        print('fold: ', i)
        aux = [df for j, df in enumerate(folds) if j != i]
        tr = pd.concat(aux, ignore_index=True)
        ts = folds[i]
        v = TfidfVectorizer(lowercase=True,
                            min_df=inf,
                            max_df=sup,
                            tokenizer=token_stem)
        v.encoding = 'ISO-8859-14'
        m = v.fit_transform(tr['Text'].values)
        train_df = pd.DataFrame(data=m.toarray(),
                                columns=v.get_feature_names())
        train_df = pd.concat([train_df, tr['Class'].reset_index(drop=True)],
                             axis=1)
        test_df = pd.DataFrame(columns=v.get_feature_names(),
                               data=v.transform(ts['Text'].values).toarray())
        test_df = pd.concat([test_df, ts['Class'].reset_index(drop=True)],
                            axis=1)
        ts = test_df.copy()
        tr = train_df.copy()
        for j, c in enumerate(classifiers):
            print('alg: ', j)
            try:
                c.fit(tr)
                res = c.predict(ts.iloc[:, :-1])
                cm = confusion_matrix(y_pred=res, y_true=ts['Class'].values)
                acc[j].append(Accuracy(y_pred=res, y_true=ts['Class'].values))
                #print(cm)
                #print("Accuracy: {:.3}".format(acc[-1]))
            except Exception as e:
                c.fit(tr.iloc[:, :-1], tr.iloc[:, -1])
                res = c.predict(ts.iloc[:, :-1].values)
                cm = confusion_matrix(y_pred=res, y_true=ts['Class'].values)
                acc[j].append(Accuracy(y_pred=res, y_true=ts['Class'].values))
                #print(cm)
                #print("Accuracy: {:.3}".format(acc[-1]))

    acc_mean = [sum(a) / len(a) for a in acc]
    return acc, acc_mean