Example #1
0
def test_pais_100(N, indicators):

    g08.testing = True
    g08.riggedRandom = 100

    muestra = g08.generar_muestra_pais(N)
    for i in range(len(indicators)):
        assert (g08.show_percentages_indicator(muestra, indicators[i]) == 1.0)
Example #2
0
def test_pais_90(N, indicators, r):

    g08.testing = True
    g08.riggedRandom = 90

    muestra = g08.generar_muestra_pais(N)
    for i in range(len(indicators)):
        assert (0.9 - r < g08.show_percentages_indicator(
            muestra, indicators[i]) < 0.9 + r)
Example #3
0
def averge(n):
	corridas_pais = []
	result = []
	keys = []
	for i in range(10):
		if actual!="Pais": keys, values = show_percentages(generar_muestra_provincia(n, actual.upper()))
		elif actual=="Pais": keys, values = show_percentages(generar_muestra_pais(n))
		corridas_pais.append(values)
	corridas_pais = average_aux(corridas_pais)
	return keys, corridas_pais
Example #4
0
def gen_dataset(n, provincia, sample_type=1):
    """
    Generates the working dataset
    """
    result = None
    if provincia != "PAIS":
        result = g08.generar_muestra_provincia(n,provincia,sample_type)
    else:
        result = g08.generar_muestra_pais(n,sample_type)
    return result
Example #5
0
def test_distribucion_por_provincia(N):
    dicc = {
        "SAN JOSE": 0,
        "ALAJUELA": 1,
        "CARTAGO": 2,
        "HEREDIA": 3,
        "GUANACASTE": 4,
        "PUNTARENAS": 5,
        "LIMON": 6
    }

    muestraPop = [0, 0, 0, 0, 0, 0, 0]
    population = [
        1404242,  #SJO
        848146,  #ALA
        490903,  #CAR
        433677,  #HER
        326953,  #GUA
        410929,  #PUN
        386862
    ]  #LIM

    totalPop = sum(population)

    distribucionPProvincia = [0, 0, 0, 0, 0, 0, 0]
    distribucionPProvinciaMuestra = [0, 0, 0, 0, 0, 0, 0]

    for i in range(len(population)):
        distribucionPProvincia[i] = population[i] / totalPop

    muestra1 = g08.generar_muestra_pais(N)

    for i in range(len(muestra1)):
        muestraPop[dicc[muestra1[i][32]]] += 1

    for i in range(len(muestraPop)):
        distribucionPProvinciaMuestra[i] = muestraPop[i] / N

    for i in range(len(population)):
        assert (distribucionPProvincia[i] - 0.01 <
                distribucionPProvinciaMuestra[i] <
                distribucionPProvincia[i] + 0.01)
Example #6
0
import pandas
from tec.ic.ia.pc1 import g08

from sklearn.preprocessing import MinMaxScaler

from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
# fix random seed for reproducibility
seed = 5
numpy.random.seed(seed)
# load dataset
dataframe = numpy.array(g08.generar_muestra_pais(10000, 0))

X = dataframe[:, 1:-2]

scalerX = StandardScaler()
scalerX.fit(X)
X = scalerX.transform(X)
Y = dataframe[:, -1]

# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)
Example #7
0
            idxstr = 'predictions_' + str(round_number)
            predictions[idxstr] += prediction

            idxstr = 'accuracies_' + str(round_number)
            accuracies[idxstr].append(accuracy)
            print("Accuracy: ", accuracy)
    is_training = ['False' for i in range(len(training_dataset))]

    best_trees = get_best_trees(trees, accuracies)

    avg_train_acc = (sum(accuracies['accuracies_1']) + sum(
        accuracies['accuracies_2']) + sum(accuracies['accuracies_3'])) / 3

    final_dict = final_tests(best_trees, test_dataset, predictions,
                             avg_train_acc, is_training)
    """
    print("Results")
    print(len(final_dict['res_1']))
    print(len(final_dict['res_2']))
    print(len(final_dict['res_3']))
    print(len(final_dict['train_set']))
    print(final_dict['err_train'])
    print(final_dict['err_test'])
    """

    return final_dict


if __name__ == '__main__':
    cross_validate(dataset=g08.generar_muestra_pais(2005, 1))
Example #8
0
	third = [g08.PARTIDOS2[int(predictions[i])] for i in range(len(predictions))]

	predictions = model.predict(x_test)
	third += [g08.PARTIDOS2[int(predictions[i])] for i in range(len(predictions))]

	third_acc_train = model.score(x_train,y_train.ravel())
	third_acc = model.score(x_test,y_test.ravel())


	#print(first)
	print(first_acc)
	print()
	#print(second)
	print(second_acc)
	print()
	#print(third)
	print(third_acc)

	finalDict = {
	        'res_1':        first,
	        'res_2':        second,
	        'res_3':        third,
	        'err_train':    (first_acc+second_acc+third_acc)/3,
	        'err_test':     (first_acc_train+second_acc_train+third_acc_train)/3,
	        'train_set':    [True]*len(X1)+[False]*len(Y1)
	    }
	return finalDict


execute_model(g08.generar_muestra_pais(10000,1),20)
Example #9
0
            predict = max(set(bestOccurrences), key=bestOccurrences.count)
            if predict == testPerson[-1]:
                accuracy += 1
            finalDict[resString].append(g08.PARTIDOS[int(predict)])

        accuracy = accuracy / len(dataSet)
        totalaccuracy += accuracy
        dataSetIndex += 1
        print("Error ", dataSetIndex, ": ", 1 - accuracy)

    finalDict['err_test'] = totalaccuracy / 3

    return finalDict


# Returns the best trees for each round based on the precision they had
def getBestTrees(trees1, trees2, trees3, accuracies1, accuracies2,
                 accuracies3):
    print("Best accuracy 1", max(accuracies1))
    ind1 = accuracies1.index(max(accuracies1))
    print("Best accuracy 2", max(accuracies2))
    ind2 = accuracies2.index(max(accuracies2))
    print("Best accuracy 3", max(accuracies3))
    ind3 = accuracies3.index(max(accuracies3))

    return trees1[ind1], trees2[ind2], trees3[ind3]


if __name__ == '__main__':
    cross_validate(dataset=numpy.array(g08.generar_muestra_pais(1003, 1)))