コード例 #1
0
def desicion_tree(k, lenData, pctTest, params, threshold):

    clear_csv()

    samples = []

    if (params[0] == "PAIS"):
        samples = generar_muestra_pais(lenData)
    else:
        samples = generar_muestra_provincia(lenData, params[1])
    quantity_for_testing = int(lenData * pctTest)
    normalizer = Normalizer()
    data = normalizer.separate_data_2(samples, quantity_for_testing)

    decisionTree = DecisionTree(threshold)
    firstRound = cross_validation(k, decisionTree, data, lenData,
                                  "trainingFeaturesFirst",
                                  "testingFeaturesFirst", "First")

    secondRound = cross_validation(k, decisionTree, data, lenData,
                                   "trainingFeaturesSecond",
                                   "testingFeaturesSecond", "Second")

    secondWithFirst = cross_validation(k, decisionTree, data, lenData,
                                       "trainingFeaturesFirstInclude",
                                       "testingFeaturesFirstInclude", "Second")

    normalData = normalizer.get_normal_data()
    predictions = [firstRound, secondRound, secondWithFirst]

    show_accuracy("DT", predictions)
    make_csv(k, normalData, lenData, pctTest, predictions)
コード例 #2
0
def lr_classification(k, lenData, pctTest, l_regulizer=1):

    clear_csv()

    samples = generar_muestra_pais(lenData)
    quantity_for_testing = int(lenData * pctTest)

    normalizer = Normalizer()
    data = normalizer.prepare_data_tensor(samples, quantity_for_testing)

    lrClassifier = LogisticRegression(1, l_regulizer)

    firstRound = cross_validation(k, lrClassifier, data, lenData,
                                  "trainingFeatures", "testingFeatures",
                                  "First")

    lrClassifier = LogisticRegression(2, l_regulizer)
    print("Paso primero")

    secondRound = cross_validation(k, lrClassifier, data, lenData,
                                   "trainingFeatures", "testingFeatures",
                                   "Second")
    print("Paso segundo")

    secondWithFirst = cross_validation(k, lrClassifier, data, lenData,
                                       "trainingFeaturesFirstInclude",
                                       "testingFeaturesFirstInclude", "Second")
    print("Paso tercero")

    normalData = normalizer.get_normal_data()
    # predictions = [firstRound, secondRound, secondWithFirst]
    predictions = [secondRound]
    show_accuracy("LR", predictions)
    make_csv(k, normalData, lenData, pctTest, predictions)
コード例 #3
0
def main(argv):
    poblacion = 0
    porcentaje = 0
    prueba = 0
    try:
        opts, args = getopt.getopt(argv, "hi:o:",
                                   ["poblacion=", "porcentaje-pruebas="])
    except getopt.GetoptError:
        print(
            'main.py --poblacion <cantidad> --porcentaje-prueba <porcentaje, prueba>'
        )
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print(
                'main.py --poblacion <cantidad> --porcentaje-prueba <porcentaje, prueba>'
            )
            sys.exit()
        elif opt in ("--poblacion"):
            poblacion = arg

        elif opt in ("--porcentaje-pruebas"):
            porcentaje = arg
    print('Poblacion', poblacion)
    print('Porcentaje', porcentaje)
    print(generar_muestra_pais(5))
コード例 #4
0
def kd_tree_classification(k, lenData, pctTest, params, neightboards):

    clear_csv()

    samples = []

    if (params[0] == "PAIS"):
        samples = generar_muestra_pais(lenData)
    else:
        samples = generar_muestra_provincia(lenData, params[1])
    quantity_for_testing = int(lenData * pctTest)

    normalizer = Normalizer()
    data = normalizer.prepare_data(samples, quantity_for_testing)

    kdTree = Kd_Tree(neightboards)
    firstRound = cross_validation(k, kdTree, data, lenData, "trainingFeatures",
                                  "testingFeatures", "First")

    secondRound = cross_validation(k, kdTree, data, lenData,
                                   "trainingFeatures", "testingFeatures",
                                   "Second")

    secondWithFirst = cross_validation(k, kdTree, data, lenData,
                                       "trainingFeaturesFirstInclude",
                                       "testingFeaturesFirstInclude", "Second")

    normalData = normalizer.get_normal_data()
    predictions = [firstRound, secondRound, secondWithFirst]

    show_accuracy("KD-TREE", predictions)
    make_csv(k, normalData, lenData, pctTest, predictions)
コード例 #5
0
def svm_classification(
        k, lenData, pctTest, params, C=1, gamma=1, kernel="rbf"):

    clear_csv()

    samples = []

    print(params)
    if (params[0] == "PAIS"):
        samples = generar_muestra_pais(lenData)
    else:
        samples = generar_muestra_provincia(lenData, params[1])

    quantity_for_testing = int(lenData * pctTest)

    normalizer = Normalizer()
    data = normalizer.prepare_data(samples, quantity_for_testing)

    svmClassifier = SVMClassifier(kernel, C, gamma)

    firstRound = cross_validation(
        k,
        svmClassifier,
        data,
        lenData,
        "trainingFeatures",
        "testingFeatures",
        "First"
    )

    secondRound = cross_validation(
        k,
        svmClassifier,
        data,
        lenData,
        "trainingFeatures",
        "testingFeatures",
        "Second"
    )

    secondWithFirst = cross_validation(
        k,
        svmClassifier,
        data,
        lenData,
        "trainingFeaturesFirstInclude",
        "testingFeaturesFirstInclude",
        "Second"
    )

    normalData = normalizer.get_normal_data()
    predictions = [firstRound, secondRound, secondWithFirst]

    show_accuracy("SVM", predictions)
    make_csv(k, normalData, lenData, pctTest, predictions)
コード例 #6
0
def lr_classification(k, lenData, pctTest, l_regulizer=1):

    clear_csv()

    samples = generar_muestra_pais(lenData)
    quantity_for_testing = int(lenData * pctTest)

    normalizer = Normalizer()
    data = normalizer.prepare_data_tensor(samples, quantity_for_testing)

    lrClassifier = LogisticRegression(1, l_regulizer)

    firstRound = cross_validation(
        k,
        lrClassifier,
        data,
        lenData,
        "trainingFeatures",
        "testingFeatures",
        "First"
    )

    lrClassifier = LogisticRegression(2, l_regulizer)
    print("Paso primero")

    secondRound = cross_validation(
        k,
        lrClassifier,
        data,
        lenData,
        "trainingFeatures",
        "testingFeatures",
        "Second"
    )
    print("Paso segundo")

    secondWithFirst = cross_validation(
        k,
        lrClassifier,
        data,
        lenData,
        "trainingFeaturesFirstInclude",
        "testingFeaturesFirstInclude",
        "Second"
    )
    print("Paso tercero")

    normalData = normalizer.get_normal_data()
    # predictions = [firstRound, secondRound, secondWithFirst]
    predictions = [secondRound]
    show_accuracy("LR", predictions)
    make_csv(k, normalData, lenData, pctTest, predictions)
コード例 #7
0
def kd_tree_classification(k, lenData, pctTest, params, neightboards):

    clear_csv()

    samples = []

    if (params[0] == "PAIS"):
        samples = generar_muestra_pais(lenData)
    else:
        samples = generar_muestra_provincia(lenData, params[1])
    quantity_for_testing = int(lenData * pctTest)

    normalizer = Normalizer()
    data = normalizer.prepare_data(samples, quantity_for_testing)

    kdTree = Kd_Tree(neightboards)
    firstRound = cross_validation(
        k,
        kdTree,
        data,
        lenData,
        "trainingFeatures",
        "testingFeatures",
        "First"
    )

    secondRound = cross_validation(
        k,
        kdTree,
        data,
        lenData,
        "trainingFeatures",
        "testingFeatures",
        "Second"
    )

    secondWithFirst = cross_validation(
        k,
        kdTree,
        data,
        lenData,
        "trainingFeaturesFirstInclude",
        "testingFeaturesFirstInclude",
        "Second"
    )

    normalData = normalizer.get_normal_data()
    predictions = [firstRound, secondRound, secondWithFirst]

    show_accuracy("KD-TREE", predictions)
    make_csv(k, normalData, lenData, pctTest, predictions)
コード例 #8
0
def desicion_tree(k, lenData, pctTest, params, threshold):

    clear_csv()

    samples = []

    if (params[0] == "PAIS"):
        samples = generar_muestra_pais(lenData)
    else:
        samples = generar_muestra_provincia(lenData, params[1])
    quantity_for_testing = int(lenData * pctTest)
    normalizer = Normalizer()
    data = normalizer.separate_data_2(samples, quantity_for_testing)

    decisionTree = DecisionTree(threshold)
    firstRound = cross_validation(
        k,
        decisionTree,
        data,
        lenData,
        "trainingFeaturesFirst",
        "testingFeaturesFirst",
        "First"
    )

    secondRound = cross_validation(
        k,
        decisionTree,
        data,
        lenData,
        "trainingFeaturesSecond",
        "testingFeaturesSecond",
        "Second"
    )

    secondWithFirst = cross_validation(
        k,
        decisionTree,
        data,
        lenData,
        "trainingFeaturesFirstInclude",
        "testingFeaturesFirstInclude",
        "Second"
    )

    normalData = normalizer.get_normal_data()
    predictions = [firstRound, secondRound, secondWithFirst]

    show_accuracy("DT", predictions)
    make_csv(k, normalData, lenData, pctTest, predictions)
コード例 #9
0
def pruebas():
    # svm_classification(1000, 0.2, C=10, gamma=0.00833333333, kernel="rbf")
    lenData = 2500
    print(lenData)
    print("kernel: ", "sigmoid", " C: ", 1, " G: ", 0.000000001)
    pctTest = 0.2

    # samples = generar_muestra_provincia(lenData, "SAN JOSE")
    # quantity_for_testing = int(lenData*pctTest)

    # normalizer = Normalizer()
    # data = normalizer.prepare_data(samples, quantity_for_testing)

    # svm_classification(10, lenData, pctTest, C=1, gamma=1, kernel="rbf")

    time1 = time.time()

    for i in range(0, 30):
        samples = generar_muestra_pais(lenData)
        quantity_for_testing = int(lenData * pctTest)

        normalizer = Normalizer()
        data = normalizer.prepare_data(samples, quantity_for_testing)
        svm_classification(10,
                           lenData,
                           pctTest,
                           C=1,
                           gamma=0.000000001,
                           kernel="sigmoid")

    time2 = time.time()

    print("ms: ", ((time2 - time1) * 1000.0))

    totalacc = 0.0
    for i in range(0, len(accList), 3):
        totalacc += accList[i][1]
    print("ER: ", 1 - (totalacc / 30.0))

    totalacc = 0.0
    for i in range(1, len(accList), 3):
        totalacc += accList[i][1]
    print("ER: ", 1 - (totalacc / 30.0))

    totalacc = 0.0
    for i in range(2, len(accList), 3):
        totalacc += accList[i][1]
    print("ER: ", 1 - (totalacc / 30.0))
コード例 #10
0
def pruebas():
    # svm_classification(1000, 0.2, C=10, gamma=0.00833333333, kernel="rbf")
    lenData = 2500
    print(lenData)
    print("kernel: ", "sigmoid", " C: ", 1, " G: ", 0.000000001)
    pctTest = 0.2

    # samples = generar_muestra_provincia(lenData, "SAN JOSE")
    # quantity_for_testing = int(lenData*pctTest)

    # normalizer = Normalizer()
    # data = normalizer.prepare_data(samples, quantity_for_testing)

    # svm_classification(10, lenData, pctTest, C=1, gamma=1, kernel="rbf")

    time1 = time.time()

    for i in range(0, 30):
        samples = generar_muestra_pais(lenData)
        quantity_for_testing = int(lenData*pctTest)

        normalizer = Normalizer()
        data = normalizer.prepare_data(samples, quantity_for_testing)
        svm_classification(
            10, lenData, pctTest, C=1, gamma=0.000000001, kernel="sigmoid")

    time2 = time.time()

    print("ms: ", ((time2-time1)*1000.0))

    totalacc = 0.0
    for i in range(0, len(accList), 3):
        totalacc += accList[i][1]
    print("ER: ", 1-(totalacc/30.0))

    totalacc = 0.0
    for i in range(1, len(accList), 3):
        totalacc += accList[i][1]
    print("ER: ", 1-(totalacc/30.0))

    totalacc = 0.0
    for i in range(2, len(accList), 3):
        totalacc += accList[i][1]
    print("ER: ", 1-(totalacc/30.0))
コード例 #11
0
def svm_classification(k,
                       lenData,
                       pctTest,
                       params,
                       C=1,
                       gamma=1,
                       kernel="rbf"):

    clear_csv()

    samples = []

    print(params)
    if (params[0] == "PAIS"):
        samples = generar_muestra_pais(lenData)
    else:
        samples = generar_muestra_provincia(lenData, params[1])

    quantity_for_testing = int(lenData * pctTest)

    normalizer = Normalizer()
    data = normalizer.prepare_data(samples, quantity_for_testing)

    svmClassifier = SVMClassifier(kernel, C, gamma)

    firstRound = cross_validation(k, svmClassifier, data, lenData,
                                  "trainingFeatures", "testingFeatures",
                                  "First")

    secondRound = cross_validation(k, svmClassifier, data, lenData,
                                   "trainingFeatures", "testingFeatures",
                                   "Second")

    secondWithFirst = cross_validation(k, svmClassifier, data, lenData,
                                       "trainingFeaturesFirstInclude",
                                       "testingFeaturesFirstInclude", "Second")

    normalData = normalizer.get_normal_data()
    predictions = [firstRound, secondRound, secondWithFirst]

    show_accuracy("SVM", predictions)
    make_csv(k, normalData, lenData, pctTest, predictions)
コード例 #12
0
def main(argv):
   poblacion = 0
   porcentaje = 0
   prueba = 0
   try:
      opts, args = getopt.getopt(argv,"hi:o:",["poblacion=","porcentaje-pruebas="])
   except getopt.GetoptError:
      print ('main.py --poblacion <cantidad> --porcentaje-prueba <porcentaje, prueba>')
      sys.exit(2)
   for opt, arg in opts:
      if opt == '-h':
         print ('main.py --poblacion <cantidad> --porcentaje-prueba <porcentaje, prueba>')
         sys.exit()
      elif opt in ("--poblacion"):
         poblacion = arg
        
      elif opt in ("--porcentaje-pruebas"):
      		porcentaje =arg
   print ('Poblacion', poblacion)
   print ('Porcentaje', porcentaje)
   print(generar_muestra_pais(5))
コード例 #13
0
                    # empieza el entrenamiento
                    _, c = sess.run([optimizer, cost], feed_dict={self.X: train_x,
                                                                  self.y: train_y})
                    cost_in_each_epoch += c
    ##                # you can uncomment next two lines of code for printing cost when training
    ##                if (epoch+1) % display_step == 0:
    ##                    print("Epoch: {}".format(epoch + 1), "cost={}".format(cost_in_each_epoch))

                print("Accuracy Training:", accuracy.eval({X: train_x, y: train_y}))
##        sess = tf.Session()
##        with sess.as_default():
##            return self.toparty(self.y.eval({self.X: test_x, self.y: test_y}).tolist())
##            


samples = generar_muestra_pais(100)
quantity_for_testing = int(100*0.2)
normalizer = Normalizer()
data = normalizer.prepare_data(samples, quantity_for_testing)
classes = np.append(
        data["trainingClassesFirst"],
        data["testingClassesFirst"],
        axis=0
    )
sample = { "trainingFeatures": data["trainingFeatures"], "trainingClasses": data["trainingClassesFirst"],"testingFeatures": data["testingFeatures"], "testingClasses": data["testingClassesFirst"]}
sample2 = { "testingFeatures": data["testingFeatures"], "testingClasses": data["testingClassesFirst"]}
print(sample2["testingClasses"])
prueba = logistic_regression_classifier(1,classes)
prueba.train(sample)
print(prueba.classify(sample2))
コード例 #14
0
    ##                if (epoch+1) % display_step == 0:
    ##                    print("Epoch: {}".format(epoch + 1), "cost={}".format(cost_in_each_epoch))

                print("Accuracy Training:",
                      accuracy.eval({
                          X: train_x,
                          y: train_y
                      }))


##        sess = tf.Session()
##        with sess.as_default():
##            return self.toparty(self.y.eval({self.X: test_x, self.y: test_y}).tolist())
##

samples = generar_muestra_pais(100)
quantity_for_testing = int(100 * 0.2)
normalizer = Normalizer()
data = normalizer.prepare_data(samples, quantity_for_testing)
classes = np.append(data["trainingClassesFirst"],
                    data["testingClassesFirst"],
                    axis=0)
sample = {
    "trainingFeatures": data["trainingFeatures"],
    "trainingClasses": data["trainingClassesFirst"],
    "testingFeatures": data["testingFeatures"],
    "testingClasses": data["testingClassesFirst"]
}
sample2 = {
    "testingFeatures": data["testingFeatures"],
    "testingClasses": data["testingClassesFirst"]