def desicion_tree(k, lenData, pctTest, params, threshold): clear_csv() samples = [] if (params[0] == "PAIS"): samples = generar_muestra_pais(lenData) else: samples = generar_muestra_provincia(lenData, params[1]) quantity_for_testing = int(lenData * pctTest) normalizer = Normalizer() data = normalizer.separate_data_2(samples, quantity_for_testing) decisionTree = DecisionTree(threshold) firstRound = cross_validation(k, decisionTree, data, lenData, "trainingFeaturesFirst", "testingFeaturesFirst", "First") secondRound = cross_validation(k, decisionTree, data, lenData, "trainingFeaturesSecond", "testingFeaturesSecond", "Second") secondWithFirst = cross_validation(k, decisionTree, data, lenData, "trainingFeaturesFirstInclude", "testingFeaturesFirstInclude", "Second") normalData = normalizer.get_normal_data() predictions = [firstRound, secondRound, secondWithFirst] show_accuracy("DT", predictions) make_csv(k, normalData, lenData, pctTest, predictions)
def lr_classification(k, lenData, pctTest, l_regulizer=1): clear_csv() samples = generar_muestra_pais(lenData) quantity_for_testing = int(lenData * pctTest) normalizer = Normalizer() data = normalizer.prepare_data_tensor(samples, quantity_for_testing) lrClassifier = LogisticRegression(1, l_regulizer) firstRound = cross_validation(k, lrClassifier, data, lenData, "trainingFeatures", "testingFeatures", "First") lrClassifier = LogisticRegression(2, l_regulizer) print("Paso primero") secondRound = cross_validation(k, lrClassifier, data, lenData, "trainingFeatures", "testingFeatures", "Second") print("Paso segundo") secondWithFirst = cross_validation(k, lrClassifier, data, lenData, "trainingFeaturesFirstInclude", "testingFeaturesFirstInclude", "Second") print("Paso tercero") normalData = normalizer.get_normal_data() # predictions = [firstRound, secondRound, secondWithFirst] predictions = [secondRound] show_accuracy("LR", predictions) make_csv(k, normalData, lenData, pctTest, predictions)
def main(argv): poblacion = 0 porcentaje = 0 prueba = 0 try: opts, args = getopt.getopt(argv, "hi:o:", ["poblacion=", "porcentaje-pruebas="]) except getopt.GetoptError: print( 'main.py --poblacion <cantidad> --porcentaje-prueba <porcentaje, prueba>' ) sys.exit(2) for opt, arg in opts: if opt == '-h': print( 'main.py --poblacion <cantidad> --porcentaje-prueba <porcentaje, prueba>' ) sys.exit() elif opt in ("--poblacion"): poblacion = arg elif opt in ("--porcentaje-pruebas"): porcentaje = arg print('Poblacion', poblacion) print('Porcentaje', porcentaje) print(generar_muestra_pais(5))
def kd_tree_classification(k, lenData, pctTest, params, neightboards): clear_csv() samples = [] if (params[0] == "PAIS"): samples = generar_muestra_pais(lenData) else: samples = generar_muestra_provincia(lenData, params[1]) quantity_for_testing = int(lenData * pctTest) normalizer = Normalizer() data = normalizer.prepare_data(samples, quantity_for_testing) kdTree = Kd_Tree(neightboards) firstRound = cross_validation(k, kdTree, data, lenData, "trainingFeatures", "testingFeatures", "First") secondRound = cross_validation(k, kdTree, data, lenData, "trainingFeatures", "testingFeatures", "Second") secondWithFirst = cross_validation(k, kdTree, data, lenData, "trainingFeaturesFirstInclude", "testingFeaturesFirstInclude", "Second") normalData = normalizer.get_normal_data() predictions = [firstRound, secondRound, secondWithFirst] show_accuracy("KD-TREE", predictions) make_csv(k, normalData, lenData, pctTest, predictions)
def svm_classification( k, lenData, pctTest, params, C=1, gamma=1, kernel="rbf"): clear_csv() samples = [] print(params) if (params[0] == "PAIS"): samples = generar_muestra_pais(lenData) else: samples = generar_muestra_provincia(lenData, params[1]) quantity_for_testing = int(lenData * pctTest) normalizer = Normalizer() data = normalizer.prepare_data(samples, quantity_for_testing) svmClassifier = SVMClassifier(kernel, C, gamma) firstRound = cross_validation( k, svmClassifier, data, lenData, "trainingFeatures", "testingFeatures", "First" ) secondRound = cross_validation( k, svmClassifier, data, lenData, "trainingFeatures", "testingFeatures", "Second" ) secondWithFirst = cross_validation( k, svmClassifier, data, lenData, "trainingFeaturesFirstInclude", "testingFeaturesFirstInclude", "Second" ) normalData = normalizer.get_normal_data() predictions = [firstRound, secondRound, secondWithFirst] show_accuracy("SVM", predictions) make_csv(k, normalData, lenData, pctTest, predictions)
def lr_classification(k, lenData, pctTest, l_regulizer=1): clear_csv() samples = generar_muestra_pais(lenData) quantity_for_testing = int(lenData * pctTest) normalizer = Normalizer() data = normalizer.prepare_data_tensor(samples, quantity_for_testing) lrClassifier = LogisticRegression(1, l_regulizer) firstRound = cross_validation( k, lrClassifier, data, lenData, "trainingFeatures", "testingFeatures", "First" ) lrClassifier = LogisticRegression(2, l_regulizer) print("Paso primero") secondRound = cross_validation( k, lrClassifier, data, lenData, "trainingFeatures", "testingFeatures", "Second" ) print("Paso segundo") secondWithFirst = cross_validation( k, lrClassifier, data, lenData, "trainingFeaturesFirstInclude", "testingFeaturesFirstInclude", "Second" ) print("Paso tercero") normalData = normalizer.get_normal_data() # predictions = [firstRound, secondRound, secondWithFirst] predictions = [secondRound] show_accuracy("LR", predictions) make_csv(k, normalData, lenData, pctTest, predictions)
def kd_tree_classification(k, lenData, pctTest, params, neightboards): clear_csv() samples = [] if (params[0] == "PAIS"): samples = generar_muestra_pais(lenData) else: samples = generar_muestra_provincia(lenData, params[1]) quantity_for_testing = int(lenData * pctTest) normalizer = Normalizer() data = normalizer.prepare_data(samples, quantity_for_testing) kdTree = Kd_Tree(neightboards) firstRound = cross_validation( k, kdTree, data, lenData, "trainingFeatures", "testingFeatures", "First" ) secondRound = cross_validation( k, kdTree, data, lenData, "trainingFeatures", "testingFeatures", "Second" ) secondWithFirst = cross_validation( k, kdTree, data, lenData, "trainingFeaturesFirstInclude", "testingFeaturesFirstInclude", "Second" ) normalData = normalizer.get_normal_data() predictions = [firstRound, secondRound, secondWithFirst] show_accuracy("KD-TREE", predictions) make_csv(k, normalData, lenData, pctTest, predictions)
def desicion_tree(k, lenData, pctTest, params, threshold): clear_csv() samples = [] if (params[0] == "PAIS"): samples = generar_muestra_pais(lenData) else: samples = generar_muestra_provincia(lenData, params[1]) quantity_for_testing = int(lenData * pctTest) normalizer = Normalizer() data = normalizer.separate_data_2(samples, quantity_for_testing) decisionTree = DecisionTree(threshold) firstRound = cross_validation( k, decisionTree, data, lenData, "trainingFeaturesFirst", "testingFeaturesFirst", "First" ) secondRound = cross_validation( k, decisionTree, data, lenData, "trainingFeaturesSecond", "testingFeaturesSecond", "Second" ) secondWithFirst = cross_validation( k, decisionTree, data, lenData, "trainingFeaturesFirstInclude", "testingFeaturesFirstInclude", "Second" ) normalData = normalizer.get_normal_data() predictions = [firstRound, secondRound, secondWithFirst] show_accuracy("DT", predictions) make_csv(k, normalData, lenData, pctTest, predictions)
def pruebas(): # svm_classification(1000, 0.2, C=10, gamma=0.00833333333, kernel="rbf") lenData = 2500 print(lenData) print("kernel: ", "sigmoid", " C: ", 1, " G: ", 0.000000001) pctTest = 0.2 # samples = generar_muestra_provincia(lenData, "SAN JOSE") # quantity_for_testing = int(lenData*pctTest) # normalizer = Normalizer() # data = normalizer.prepare_data(samples, quantity_for_testing) # svm_classification(10, lenData, pctTest, C=1, gamma=1, kernel="rbf") time1 = time.time() for i in range(0, 30): samples = generar_muestra_pais(lenData) quantity_for_testing = int(lenData * pctTest) normalizer = Normalizer() data = normalizer.prepare_data(samples, quantity_for_testing) svm_classification(10, lenData, pctTest, C=1, gamma=0.000000001, kernel="sigmoid") time2 = time.time() print("ms: ", ((time2 - time1) * 1000.0)) totalacc = 0.0 for i in range(0, len(accList), 3): totalacc += accList[i][1] print("ER: ", 1 - (totalacc / 30.0)) totalacc = 0.0 for i in range(1, len(accList), 3): totalacc += accList[i][1] print("ER: ", 1 - (totalacc / 30.0)) totalacc = 0.0 for i in range(2, len(accList), 3): totalacc += accList[i][1] print("ER: ", 1 - (totalacc / 30.0))
def pruebas(): # svm_classification(1000, 0.2, C=10, gamma=0.00833333333, kernel="rbf") lenData = 2500 print(lenData) print("kernel: ", "sigmoid", " C: ", 1, " G: ", 0.000000001) pctTest = 0.2 # samples = generar_muestra_provincia(lenData, "SAN JOSE") # quantity_for_testing = int(lenData*pctTest) # normalizer = Normalizer() # data = normalizer.prepare_data(samples, quantity_for_testing) # svm_classification(10, lenData, pctTest, C=1, gamma=1, kernel="rbf") time1 = time.time() for i in range(0, 30): samples = generar_muestra_pais(lenData) quantity_for_testing = int(lenData*pctTest) normalizer = Normalizer() data = normalizer.prepare_data(samples, quantity_for_testing) svm_classification( 10, lenData, pctTest, C=1, gamma=0.000000001, kernel="sigmoid") time2 = time.time() print("ms: ", ((time2-time1)*1000.0)) totalacc = 0.0 for i in range(0, len(accList), 3): totalacc += accList[i][1] print("ER: ", 1-(totalacc/30.0)) totalacc = 0.0 for i in range(1, len(accList), 3): totalacc += accList[i][1] print("ER: ", 1-(totalacc/30.0)) totalacc = 0.0 for i in range(2, len(accList), 3): totalacc += accList[i][1] print("ER: ", 1-(totalacc/30.0))
def svm_classification(k, lenData, pctTest, params, C=1, gamma=1, kernel="rbf"): clear_csv() samples = [] print(params) if (params[0] == "PAIS"): samples = generar_muestra_pais(lenData) else: samples = generar_muestra_provincia(lenData, params[1]) quantity_for_testing = int(lenData * pctTest) normalizer = Normalizer() data = normalizer.prepare_data(samples, quantity_for_testing) svmClassifier = SVMClassifier(kernel, C, gamma) firstRound = cross_validation(k, svmClassifier, data, lenData, "trainingFeatures", "testingFeatures", "First") secondRound = cross_validation(k, svmClassifier, data, lenData, "trainingFeatures", "testingFeatures", "Second") secondWithFirst = cross_validation(k, svmClassifier, data, lenData, "trainingFeaturesFirstInclude", "testingFeaturesFirstInclude", "Second") normalData = normalizer.get_normal_data() predictions = [firstRound, secondRound, secondWithFirst] show_accuracy("SVM", predictions) make_csv(k, normalData, lenData, pctTest, predictions)
def main(argv): poblacion = 0 porcentaje = 0 prueba = 0 try: opts, args = getopt.getopt(argv,"hi:o:",["poblacion=","porcentaje-pruebas="]) except getopt.GetoptError: print ('main.py --poblacion <cantidad> --porcentaje-prueba <porcentaje, prueba>') sys.exit(2) for opt, arg in opts: if opt == '-h': print ('main.py --poblacion <cantidad> --porcentaje-prueba <porcentaje, prueba>') sys.exit() elif opt in ("--poblacion"): poblacion = arg elif opt in ("--porcentaje-pruebas"): porcentaje =arg print ('Poblacion', poblacion) print ('Porcentaje', porcentaje) print(generar_muestra_pais(5))
# empieza el entrenamiento _, c = sess.run([optimizer, cost], feed_dict={self.X: train_x, self.y: train_y}) cost_in_each_epoch += c ## # you can uncomment next two lines of code for printing cost when training ## if (epoch+1) % display_step == 0: ## print("Epoch: {}".format(epoch + 1), "cost={}".format(cost_in_each_epoch)) print("Accuracy Training:", accuracy.eval({X: train_x, y: train_y})) ## sess = tf.Session() ## with sess.as_default(): ## return self.toparty(self.y.eval({self.X: test_x, self.y: test_y}).tolist()) ## samples = generar_muestra_pais(100) quantity_for_testing = int(100*0.2) normalizer = Normalizer() data = normalizer.prepare_data(samples, quantity_for_testing) classes = np.append( data["trainingClassesFirst"], data["testingClassesFirst"], axis=0 ) sample = { "trainingFeatures": data["trainingFeatures"], "trainingClasses": data["trainingClassesFirst"],"testingFeatures": data["testingFeatures"], "testingClasses": data["testingClassesFirst"]} sample2 = { "testingFeatures": data["testingFeatures"], "testingClasses": data["testingClassesFirst"]} print(sample2["testingClasses"]) prueba = logistic_regression_classifier(1,classes) prueba.train(sample) print(prueba.classify(sample2))
## if (epoch+1) % display_step == 0: ## print("Epoch: {}".format(epoch + 1), "cost={}".format(cost_in_each_epoch)) print("Accuracy Training:", accuracy.eval({ X: train_x, y: train_y })) ## sess = tf.Session() ## with sess.as_default(): ## return self.toparty(self.y.eval({self.X: test_x, self.y: test_y}).tolist()) ## samples = generar_muestra_pais(100) quantity_for_testing = int(100 * 0.2) normalizer = Normalizer() data = normalizer.prepare_data(samples, quantity_for_testing) classes = np.append(data["trainingClassesFirst"], data["testingClassesFirst"], axis=0) sample = { "trainingFeatures": data["trainingFeatures"], "trainingClasses": data["trainingClassesFirst"], "testingFeatures": data["testingFeatures"], "testingClasses": data["testingClassesFirst"] } sample2 = { "testingFeatures": data["testingFeatures"], "testingClasses": data["testingClassesFirst"]