def rodarValidacaoCruzada(dados, classes, nProts, montecarlo, nFolds, np): global pa global k global n global treinamento global classesTreinamento if len(nProts) == 1: nProts = ones(max(classes)+1) * nProts[0] k = sum(nProts) erros = zeros(montecarlo*nFolds) rem = 0.0 pa = shape(dados)[1]/2 if not intervalar: pa = shape(dados)[1] consideradas = zeros(pa) nDados = size(classes) for i in range(montecarlo): indices = arange(nDados) random.shuffle(indices) dadosEmbaralhados = dados[indices,:] classesEmbaralhadas = classes[indices] folds = slvq.separarFolds(dadosEmbaralhados, classesEmbaralhadas, nFolds) for fold in range(nFolds): print i*nFolds + fold [treinamento, classesTreinamento, teste, classesTeste] = slvq.separarConjuntos(folds, dadosEmbaralhados, classesEmbaralhadas, fold) n = shape(treinamento)[0] preds = zeros((30, len(classesTeste))) from tqdm import tqdm for l in tqdm(arange(30)): inicializar(nProts, np) VABC(np, nProts) GBEST = PBEST[indiceGBEST] consideradas = consideradas + around(GBEST[0,:pa]).astype(float) #print sum(removidosPBEST[indiceGBEST]) rem = rem + float(sum(removidosPBEST[indiceGBEST])) preds[l] = testar(teste, classesTeste, GBEST, classesParticulas[indiceGBEST], pesosPBEST[indiceGBEST], removidosPBEST[indiceGBEST]) # [erros[i*nFolds + fold], d] = testar(teste, classesTeste, GBEST, classesParticulas[indiceGBEST], pesosPBEST[indiceGBEST], removidosPBEST[indiceGBEST]) # print erros[i*nFolds + fold] predictions = around(mean(preds, axis=0)) print_confusion_matrix(classesTeste, predictions) exit() print erros print mean(erros) print std(erros) print consideradas / (montecarlo * nFolds) print rem / (montecarlo * nFolds) return erros , "\n erro medio:" , mean(erros) , "\n desvio:" , std(erros) , "\n" , consideradas / (montecarlo * nFolds) , "\n" , rem / (montecarlo * nFolds)
def rodarValidacaoCruzada(dados, classes, nMedias, montecarlo, nFolds): erros = zeros(montecarlo * nFolds) n = size(classes) for i in range(montecarlo): indices = arange(n) random.shuffle(indices) dadosEmbaralhados = dados[indices, :] classesEmbaralhadas = classes[indices] folds = slvq.separarFolds(dadosEmbaralhados, classesEmbaralhadas, nFolds) for fold in range(nFolds): print i * nFolds + fold [treinamento, classesTreinamento, teste, classesTeste] = slvq.separarConjuntos(folds, dadosEmbaralhados, classesEmbaralhadas, fold) preds = zeros((30, len(classesTeste))) from tqdm import tqdm for l in tqdm(arange(30)): [prototipos, classesPrototipos ] = slvq.iniciarPrototiposPorSelecao(treinamento, classesTreinamento, nMedias) [prototipos, pesos, _] = old.fcm.treinar(treinamento, classesTreinamento, prototipos, classesPrototipos, 500) # print testar(teste, classesTeste, prototipos, classesPrototipos, pesos) [prototipos, pesos] = treinar(treinamento, classesTreinamento, prototipos, classesPrototipos, pesos, 0.3) preds[l] = testar(teste, classesTeste, prototipos, classesPrototipos, pesos) predictions = around(mean(preds, axis=0)) print_confusion_matrix(classesTeste, predictions) exit() # erros[i*nFolds + fold] = testar(teste, classesTeste, prototipos, classesPrototipos, pesos) # print erros[i*nFolds + fold] print erros print mean(erros) print std(erros)
filename = "dados/" + dataset_name + "-headers.csv" dataset = get_dataset(filename) X, y = dataset.data, dataset.target mins = dataset.data[:, ::2].min(0) maxs = dataset.data[:, 1::2].max(0) skf = StratifiedKFold(n_splits=n_folds, shuffle=True) for i, (train_index, test_index) in enumerate(skf.split(X, y)): if i == 0: X_train, y_train = X[train_index], y[train_index] X_test, y_test = X[test_index], y[test_index] preds = np.zeros((30, len(y_test))) from tqdm import tqdm for l in tqdm(np.arange(30)): ivabc = IVABC(params["n_particles"], params["n_prots"], mins, maxs, params["alpha"], params["k"], params["max_iter"], params["max_reps"]) ivabc.fit(X_train, y_train) predictions = ivabc.predict(X_test) preds[l] = predictions predictions = np.around(np.mean(preds, axis=0)) print_confusion_matrix(y_test, predictions) # curve = ivabc.convergence_curve # curve_data = np.hstack((curve.reshape(-1, 1), # np.arange( # params["max_iter"]).reshape( # -1,1))) # df = pd.DataFrame(data=curve_data, columns=["fitness", # "iteration"]) # df.to_csv("convergence-curve-{}.csv".format(dataset_name), # header=None)