def test_pais_100(N, indicators): g08.testing = True g08.riggedRandom = 100 muestra = g08.generar_muestra_pais(N) for i in range(len(indicators)): assert (g08.show_percentages_indicator(muestra, indicators[i]) == 1.0)
def test_pais_90(N, indicators, r): g08.testing = True g08.riggedRandom = 90 muestra = g08.generar_muestra_pais(N) for i in range(len(indicators)): assert (0.9 - r < g08.show_percentages_indicator( muestra, indicators[i]) < 0.9 + r)
def averge(n): corridas_pais = [] result = [] keys = [] for i in range(10): if actual!="Pais": keys, values = show_percentages(generar_muestra_provincia(n, actual.upper())) elif actual=="Pais": keys, values = show_percentages(generar_muestra_pais(n)) corridas_pais.append(values) corridas_pais = average_aux(corridas_pais) return keys, corridas_pais
def gen_dataset(n, provincia, sample_type=1): """ Generates the working dataset """ result = None if provincia != "PAIS": result = g08.generar_muestra_provincia(n,provincia,sample_type) else: result = g08.generar_muestra_pais(n,sample_type) return result
def test_distribucion_por_provincia(N): dicc = { "SAN JOSE": 0, "ALAJUELA": 1, "CARTAGO": 2, "HEREDIA": 3, "GUANACASTE": 4, "PUNTARENAS": 5, "LIMON": 6 } muestraPop = [0, 0, 0, 0, 0, 0, 0] population = [ 1404242, #SJO 848146, #ALA 490903, #CAR 433677, #HER 326953, #GUA 410929, #PUN 386862 ] #LIM totalPop = sum(population) distribucionPProvincia = [0, 0, 0, 0, 0, 0, 0] distribucionPProvinciaMuestra = [0, 0, 0, 0, 0, 0, 0] for i in range(len(population)): distribucionPProvincia[i] = population[i] / totalPop muestra1 = g08.generar_muestra_pais(N) for i in range(len(muestra1)): muestraPop[dicc[muestra1[i][32]]] += 1 for i in range(len(muestraPop)): distribucionPProvinciaMuestra[i] = muestraPop[i] / N for i in range(len(population)): assert (distribucionPProvincia[i] - 0.01 < distribucionPProvinciaMuestra[i] < distribucionPProvincia[i] + 0.01)
import pandas from tec.ic.ia.pc1 import g08 from sklearn.preprocessing import MinMaxScaler from keras.models import Sequential from keras.layers import Dense, Dropout from keras.wrappers.scikit_learn import KerasClassifier from keras.utils import np_utils from sklearn.cross_validation import train_test_split from sklearn.preprocessing import LabelEncoder, StandardScaler # fix random seed for reproducibility seed = 5 numpy.random.seed(seed) # load dataset dataframe = numpy.array(g08.generar_muestra_pais(10000, 0)) X = dataframe[:, 1:-2] scalerX = StandardScaler() scalerX.fit(X) X = scalerX.transform(X) Y = dataframe[:, -1] # encode class values as integers encoder = LabelEncoder() encoder.fit(Y) encoded_Y = encoder.transform(Y) # convert integers to dummy variables (i.e. one hot encoded) dummy_y = np_utils.to_categorical(encoded_Y)
idxstr = 'predictions_' + str(round_number) predictions[idxstr] += prediction idxstr = 'accuracies_' + str(round_number) accuracies[idxstr].append(accuracy) print("Accuracy: ", accuracy) is_training = ['False' for i in range(len(training_dataset))] best_trees = get_best_trees(trees, accuracies) avg_train_acc = (sum(accuracies['accuracies_1']) + sum( accuracies['accuracies_2']) + sum(accuracies['accuracies_3'])) / 3 final_dict = final_tests(best_trees, test_dataset, predictions, avg_train_acc, is_training) """ print("Results") print(len(final_dict['res_1'])) print(len(final_dict['res_2'])) print(len(final_dict['res_3'])) print(len(final_dict['train_set'])) print(final_dict['err_train']) print(final_dict['err_test']) """ return final_dict if __name__ == '__main__': cross_validate(dataset=g08.generar_muestra_pais(2005, 1))
third = [g08.PARTIDOS2[int(predictions[i])] for i in range(len(predictions))] predictions = model.predict(x_test) third += [g08.PARTIDOS2[int(predictions[i])] for i in range(len(predictions))] third_acc_train = model.score(x_train,y_train.ravel()) third_acc = model.score(x_test,y_test.ravel()) #print(first) print(first_acc) print() #print(second) print(second_acc) print() #print(third) print(third_acc) finalDict = { 'res_1': first, 'res_2': second, 'res_3': third, 'err_train': (first_acc+second_acc+third_acc)/3, 'err_test': (first_acc_train+second_acc_train+third_acc_train)/3, 'train_set': [True]*len(X1)+[False]*len(Y1) } return finalDict execute_model(g08.generar_muestra_pais(10000,1),20)
predict = max(set(bestOccurrences), key=bestOccurrences.count) if predict == testPerson[-1]: accuracy += 1 finalDict[resString].append(g08.PARTIDOS[int(predict)]) accuracy = accuracy / len(dataSet) totalaccuracy += accuracy dataSetIndex += 1 print("Error ", dataSetIndex, ": ", 1 - accuracy) finalDict['err_test'] = totalaccuracy / 3 return finalDict # Returns the best trees for each round based on the precision they had def getBestTrees(trees1, trees2, trees3, accuracies1, accuracies2, accuracies3): print("Best accuracy 1", max(accuracies1)) ind1 = accuracies1.index(max(accuracies1)) print("Best accuracy 2", max(accuracies2)) ind2 = accuracies2.index(max(accuracies2)) print("Best accuracy 3", max(accuracies3)) ind3 = accuracies3.index(max(accuracies3)) return trees1[ind1], trees2[ind2], trees3[ind3] if __name__ == '__main__': cross_validate(dataset=numpy.array(g08.generar_muestra_pais(1003, 1)))