Esempio n. 1
0
 def test_dame_gender_name2gender_in_dataset(self):
     g = Gender()
     guess = g.name2gender_in_dataset("David",
                                      dataset='files/names/names_es')
     self.assertTrue(guess, 1)
     guess = g.name2gender_in_dataset("David",
                                      dataset='files/names/all.csv')
     self.assertTrue(guess, 1)
     guess = g.name2gender_in_dataset("David",
                                      dataset='files/names/yob2017.csv')
     self.assertTrue(guess, 1)
     guess = g.name2gender_in_dataset("Laura",
                                      dataset='files/names/names_es')
     self.assertTrue(guess, 0)
     guess = g.name2gender_in_dataset("Laura",
                                      dataset='files/names/all.csv')
     self.assertTrue(guess, 0)
     guess = g.name2gender_in_dataset("Laura",
                                      dataset='files/names/yob2017.csv')
     self.assertTrue(guess, 0)
     guess = g.name2gender_in_dataset("Teppei",
                                      dataset='files/names/yob2017.csv')
     self.assertTrue(guess, 2)
     guess = g.name2gender_in_dataset("Filka",
                                      dataset='files/names/nam_dict.txt')
     self.assertTrue(guess, 0)
Esempio n. 2
0
 def test_dame_gender_dataset2genderlist_method_returns_correct_result(
         self):
     g = Gender()
     gl = g.dataset2genderlist(dataset="files/names/all.csv")
     self.assertEqual(gl[0:4], [1, 1, 1, 1])
     gl2 = g.dataset2genderlist(dataset="files/names/yob2017.txt")
     self.assertEqual(gl2[0:4], [0, 0, 0, 0])
Esempio n. 3
0
 def test_dame_gender_features_list_no_categorical(self):
     g = Gender()
     flnc = g.features_list_no_categorical('files/names/partial.csv')
     self.assertTrue(len(flnc[0]) > 25)
     self.assertTrue(flnc[0], [
         0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0,
         0, 0, 0, 0, 3, 3
     ])
Esempio n. 4
0
 def test_dame_gender_confusion_matrix_gender(self):
     g = Gender()
     cm = g.confusion_matrix_gender(path="files/names/min.csv")
     am = [[1, 0, 0], [0, 5, 0], [0, 5, 0]]
     self.assertEqual(cm, am)
     cm = g.confusion_matrix_gender(path="files/names/partial.csv")
     am = [[3, 0, 0], [0, 16, 0], [0, 16, 0]]
     self.assertEqual(cm, am)
Esempio n. 5
0
 def test_dame_gender_features(self):
     g = Gender()
     f = g.features("David")
     self.assertEqual(f['has(a)'], True)
     self.assertEqual(f['count(i)'], 1)
     self.assertEqual(f['count(v)'], 1)
     self.assertEqual(f['last_letter'], 'd')
     self.assertEqual(f['first_letter'], 'd')
Esempio n. 6
0
 def test_dame_gender_count_true2guess_method_returns_correct_result(self):
     g = Gender()
     v1 = [1, 0, 1, 1]
     v2 = [1, 1, 1, 0]
     self.assertEqual(g.count_true2guess(v1, v2, 1, 1), 2)
     vv1 = [1, 0, 1, 1, 1]
     vv2 = [1, 1, 1, 0]
     self.assertEqual(g.count_true2guess(vv2, vv1, 1, 1), 2)  # malemale
     self.assertEqual(g.count_true2guess(vv2, vv1, 0, 1), 1)  # femalemale
     self.assertEqual(g.count_true2guess(vv2, vv1, 1, 0), 1)  # malefemale
Esempio n. 7
0
 def test_dame_gender_guess_list(self):
     g = Gender()
     self.assertEqual([
         'male', 'male', 'male', 'male', 'unknown', 'male', 'female',
         'female', 'male', 'male', 'male', 'male', 'male', 'male', 'male',
         'male', 'male', 'male', 'female', 'male', 'male'
     ], g.guess_list(path="files/names/partial.csv", binary=False))
     self.assertEqual(
         [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1],
         g.guess_list(path="files/names/partial.csv", binary=True))
Esempio n. 8
0
 def test_dame_gender_gender_list(self):
     g = Gender()
     gl = g.gender_list(path="files/names/partial.csv")
     self.assertEqual(
         gl,
         [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1])
     self.assertEqual(len(gl), 21)
     self.assertEqual(g.females, 3)
     self.assertEqual(g.males, 16)
     self.assertEqual(g.unknown, 2)
Esempio n. 9
0
 def test_dame_gender_guess_list_method_returns_correct_result(self):
     g = Gender()
     self.assertEqual([
         'unknown', 'male', 'male', 'male', 'unknown', 'male', 'female',
         'female', 'male', 'male', 'male', 'male', 'male', 'male',
         'unknown', 'male', 'male', 'male', 'female', 'male', 'unknown'
     ], g.guess_list(path="files/names/partial.csv", binary=False))
     self.assertEqual(
         [2, 1, 1, 1, 2, 1, 0, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 0, 1, 2],
         g.guess_list(path="files/names/partial.csv", binary=True))
Esempio n. 10
0
 def test_dame_gender_gender_list_method_returns_correct_result(self):
     g = Gender()
     gl = g.gender_list()
     self.assertEqual(
         gl,
         [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1])
     self.assertEqual(len(gl), 21)
     self.assertEqual(g.females, 3)
     self.assertEqual(g.males, 16)
     self.assertEqual(g.unknown, 2)
Esempio n. 11
0
 def test_dame_gender_json2names(self):
     g = Gender()
     j2n = g.json2names(jsonf="files/names/namsorfiles_names_min.csv.json")
     self.assertEqual(
         ["Pierre", "Raul", "Adriano", "Ralf", "Guillermo", "Sabina"], j2n)
     j2ns = g.json2names(jsonf="files/names/namsorfiles_names_min.csv.json",
                         surnames=True)
     self.assertEqual(
         j2ns, [["Pierre", "grivel"], ["Raul", "serapioni"],
                ["Adriano", "moura"], ["Ralf", "kieser"],
                ["Guillermo", "leon-de-la-barra"], ["Sabina", "pannek"]])
Esempio n. 12
0
 def test_dame_gender_features_int(self):
     g = Gender()
     features_int = g.features_int("David")
     #        self.assertTrue(features_int["first_letter"] == 100)
     self.assertTrue(features_int["last_letter"] == 100)
     self.assertTrue(features_int["vocals"] == 2)
     self.assertTrue(features_int["consonants"] == 2)
     #        self.assertTrue(features_int["first_letter_vocal"] == 0)
     self.assertTrue(features_int["last_letter_vocal"] == 0)
     #        self.assertTrue(features_int["first_letter_consonant"] == 1)
     self.assertTrue(features_int["last_letter_consonant"] == 1)
     self.assertTrue(features_int["last_letter_a"] == 0)
     self.assertEqual(len(features_int), 36)
Esempio n. 13
0
 def test_dame_gender_check_names(self):
     g = Gender()
     self.assertTrue(
         g.json_eq_csv_in_names(
             jsonf="files/names/namsorfiles_names_min.csv.json",
             path="files/names/min.csv"))
     self.assertTrue(
         g.json_eq_csv_in_names(
             jsonf="files/names/namsorfiles_names_partial.csv.json",
             path="files/names/partial.csv"))
     self.assertTrue(
         g.json_eq_csv_in_names(
             jsonf="files/names/genderizefiles_names_min.csv.json",
             path="files/names/min.csv"))
     self.assertTrue(
         g.json_eq_csv_in_names(
             jsonf="files/names/genderizefiles_names_partial.csv.json",
             path="files/names/partial.csv"))
     self.assertTrue(
         g.json_eq_csv_in_names(
             jsonf="files/names/nameapifiles_names_min.csv.json",
             path="files/names/min.csv"))
     self.assertTrue(
         g.json_eq_csv_in_names(
             jsonf="files/names/nameapifiles_names_partial.csv.json",
             path="files/names/partial.csv"))
Esempio n. 14
0
 def test_dame_gender_features_int_method_returns_correct_result(self):
     g = Gender()
     features_int = g.features_int("David")
     self.assertTrue(features_int["first_letter"] == 100)
     self.assertTrue(features_int["last_letter"] == 100)
     self.assertTrue(features_int["vocals"] == 2)
     self.assertTrue(features_int["consonants"] == 2)
     self.assertTrue(features_int["first_letter_vocal"] == 0)
     self.assertTrue(features_int["last_letter_vocal"] == 0)
     self.assertTrue(features_int["first_letter_consonant"] == 1)
     self.assertTrue(features_int["last_letter_consonant"] == 1)
     #self.assertTrue(features_int["syllables"] == 2)
     self.assertTrue(features_int["last_letter_a"] == 0)
     self.assertTrue(len(features_int) > 0)
Esempio n. 15
0
 def test_dame_gender_name_frec(self):
     g = Gender()
     frec1 = g.name_frec("INES", dataset='ine')
     self.assertEqual(int(frec1['females']), 61920)
     self.assertEqual(int(frec1['males']), 0)
     frec2 = g.name_frec("BEATRIZ", dataset='ine')
     self.assertEqual(int(frec2['females']), 123445)
     frec3 = g.name_frec("ALMUDENA", dataset='ine')
     self.assertEqual(int(frec3['females']), 30450)
     frec5 = g.name_frec("ELIZABETH", dataset='us')
     self.assertEqual(int(frec5['females']), 1581894)
     frec5n = g.name_frec("ELISABETH", dataset='us')
     self.assertEqual(int(frec5n['females']), 43531)
     frec6 = g.name_frec("MARIA", dataset='uk')
     self.assertEqual(int(frec6['females']), 9499)
     frec7 = g.name_frec("JULIAN", dataset='uk')
     self.assertEqual(int(frec7['males']), 1741)
     frec8 = g.name_frec("A", dataset='uk')
     self.assertEqual(int(frec8['males']), 49)
Esempio n. 16
0
 def test_dame_gender_guess(self):
     g = Gender()
     r = g.guess(name="David", binary=True, dataset="ine")
     self.assertEqual(r, 1)
     r = g.guess(name="Andrea", binary=True)
     self.assertEqual(r, 0)
     r = g.guess(name="David", binary=False)
     self.assertEqual(r, "male")
     r = g.guess(name="Laura", binary=True)
     self.assertEqual(r, 0)
     r = g.guess(name="Laura", binary=False)
     self.assertEqual(r, "female")
     r = g.guess(name="Andrea", binary=True)
     self.assertEqual(r, 0)
Esempio n. 17
0
 def test_dame_gender_guess_method_returns_correct_result(self):
     g = Gender()
     r = g.guess(name="David", binary=True)
     self.assertEqual(r, 1)
     r = g.guess(name="Andrea", binary=True)
     self.assertEqual(r, 2)
     r = g.guess(name="David", binary=False)
     self.assertEqual(r, "male")
     r = g.guess(name="Laura", binary=True)
     self.assertEqual(r, 0)
     r = g.guess(name="Laura", binary=False)
     self.assertEqual(r, "female")
     r = g.guess(name="Andrea", binary=True)
     self.assertEqual(r, 2)
Esempio n. 18
0
 def test_dame_gender_name_frec_method_returns_correct_result(self):
     g = Gender()
     frec1 = g.name_frec("INES", dataset='ine')
     self.assertEqual(int(frec1['females']), 61337)
     self.assertEqual(int(frec1['males']), 0)
     frec2 = g.name_frec("BEATRIZ", dataset='ine')
     self.assertTrue(int(frec2['females']) > 10)
     frec3 = g.name_frec("ALMUDENA", dataset='ine')
     self.assertTrue(int(frec2['females']) > 10)
     frec4 = g.name_frec("JULIA", dataset='uscensus')
     self.assertTrue(int(frec2['females']) > 10)
     frec5 = g.name_frec("ELISABETH", dataset='uscensus')
     self.assertTrue(int(frec2['females']) > 10)
     frec6 = g.name_frec("MARIA", dataset='ukcensus')
     self.assertTrue(int(frec6['females']) > 10)
Esempio n. 19
0
 def test_dame_gender_recall(self):
     g = Gender()
     score1 = g.recall([1, 1], [1, 1])
     self.assertEqual(score1, 1)
     score2 = g.recall([1, 1, 1, 0], [1, 1, 2, 0])
     self.assertEqual(score2, 0.75)
     score3 = g.recall([1, 1, 1, 1, 2, 1], [1, 1, 1, 1, 2, 1])
     self.assertEqual(score3, 1)
     score4 = g.recall(
         [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1],
         [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1])
     self.assertEqual(score4, 1)
Esempio n. 20
0
 def test_dame_gender_precision(self):
     g = Gender()
     score1 = g.precision([1, 1], [1, 1])
     self.assertEqual(score1, 1)
     score2 = g.precision([1, 1, 1, 0], [1, 1, 2, 0])
     self.assertEqual(score2, 1)
     score3 = g.precision([1, 1, 1, 1, 2, 1], [1, 1, 1, 1, 2, 1])
     self.assertEqual(score3, 1)
     score4 = g.precision(
         [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1],
         [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1])
     self.assertEqual(score4, 1)
Esempio n. 21
0
 def test_dame_gender_dame_accuracy_score_dame(self):
     g = Gender()
     score1 = g.accuracy_score_dame([1, 1], [1, 1])
     self.assertEqual(score1, 1)
     score2 = g.accuracy_score_dame([1, 1, 1, 0], [1, 1, 2, 0])
     self.assertEqual(score2, 0.75)
     score3 = g.accuracy_score_dame([1, 1, 1, 1, 2, 1], [1, 1, 1, 1, 2, 1])
     self.assertEqual(score3, 1)
     score4 = g.accuracy_score_dame(
         [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1],
         [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1])
     self.assertEqual(score4, 1)
Esempio n. 22
0
 def test_dame_gender_dame_accuracy_score_dame_method_returns_correct_result(
         self):
     g = Gender()
     score1 = g.accuracy_score_dame([1, 1], [1, 1])
     self.assertEqual(score1, 1)
     score2 = g.accuracy_score_dame([1, 1, 1, 0], [1, 1, 2, 0])
     self.assertEqual(score2, 0.75)
     score3 = g.accuracy_score_dame([1, 1, 1, 1, 2, 1], [1, 1, 1, 1, 2, 1])
     self.assertEqual(score3, 1)
     score4 = g.accuracy_score_dame(
         [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1],
         [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1])
     self.assertEqual(score4, 1)
Esempio n. 23
0
 def test_dame_gender_females_list(self):
     g = Gender()
     f = g.females_list(corpus="es")
     self.assertTrue("Eva" in f)
     self.assertEqual(len(f), 24818)
     f2 = g.females_list(corpus="uk")
     self.assertTrue("Ana" in f2)
     self.assertEqual(len(f2), 19060)
     f3 = g.females_list(corpus="us")
     self.assertTrue("Ana" in f3)
     self.assertEqual(len(f3), 58013)
     f4 = g.females_list(corpus="uy")
     self.assertTrue("Ana" in f4)
     self.assertEqual(len(f4), 13597)
Esempio n. 24
0
 def test_dame_gender_males_list(self):
     g = Gender()
     m = g.males_list(corpus="es")
     self.assertTrue("Adrian" in m)
     self.assertEqual(len(m), 24511)
     m2 = g.males_list(corpus="uk")
     self.assertTrue("Adrian" in m2)
     self.assertEqual(len(m2), 15206)
     m3 = g.males_list(corpus="us")
     self.assertTrue("Adrian" in m3)
     self.assertEqual(len(m3), 33181)
     m4 = g.males_list(corpus="uy")
     self.assertTrue("Adrian" in m4)
     self.assertEqual(len(m4), 9107)
Esempio n. 25
0
 def test_dame_gender_csv2names(self):
     g = Gender()
     names = g.csv2names(path='files/names/partial.csv')
     self.assertTrue(len(names) > 10)
     names = g.csv2names(path='files/names/min.csv')
     self.assertEqual(
         ['Pierre', 'Raul', 'Adriano', 'Ralf', 'Guillermo', 'Sabina'],
         names)
     names = g.csv2names(path='files/names/min.csv', surnames=False)
     self.assertEqual(
         ['Pierre', 'Raul', 'Adriano', 'Ralf', 'Guillermo', 'Sabina'],
         names)
     names = g.csv2names(path='files/names/min.csv', surnames=True)
     self.assertEqual(
         [['Pierre', 'grivel'], ['Raul', 'serapioni'], ['Adriano', 'moura'],
          ['Ralf', 'kieser'], ['Guillermo', 'leon-de-la-barra'],
          ['Sabina', 'pannek']], names)
Esempio n. 26
0
 def test_dame_gender_features_list2csv(self):
     # TODO: You can write asserts to verify the first line
     g = Gender()
     csv1 = g.features_list2csv(path="files/names/min.csv")
     csv2 = g.features_list2csv(path="files/names/min.csv",
                                categorical="categorical")
     csv3 = g.features_list2csv(path="files/names/min.csv",
                                categorical="nocategorical")
     csv4 = g.features_list2csv(path="files/names/allnoundefined.csv",
                                categorical="noundefined")
     self.assertTrue(os.path.isfile("files/features_list.csv"))
     file = open("files/features_list.csv", "r")
     if (file):
         self.assertTrue(os.path.isfile("files/features_list_cat.csv"))
         self.assertTrue(os.path.isfile("files/features_list_no_cat.csv"))
         self.assertTrue(
             os.path.isfile("files/features_list_no_undefined.csv"))
Esempio n. 27
0
 def test_dame_gender_first_uneq(self):
     g = Gender()
     self.assertEqual(
         "sabina",
         g.first_uneq_json_and_csv_in_names(
             jsonf="files/names/genderizefiles_names_min.csv.json",
             path="files/names/min.csv")[0])
     self.assertEqual(
         5,
         g.first_uneq_json_and_csv_in_names(
             jsonf="files/names/genderizefiles_names_min.csv.json",
             path="files/names/min.csv")[1])
     self.assertEqual(
         "guillermo",
         g.first_uneq_json_and_csv_in_names(
             jsonf="files/names/nameapifiles_names_min.csv.json",
             path="files/names/partial.csv")[0])
     self.assertEqual(
         4,
         g.first_uneq_json_and_csv_in_names(
             jsonf="files/names/nameapifiles_names_min.csv.json",
             path="files/names/partial.csv")[1])
Esempio n. 28
0
from pprint import pprint
from sklearn.decomposition import PCA
from app.dame_sexmachine import DameSexmachine
from app.dame_gender import Gender

## PARAMETERS
parser = argparse.ArgumentParser()
parser.add_argument("--categorical",
                    default="both",
                    choices=['both', 'noletters', 'nocategorical', 'all'])
parser.add_argument("--components", default=0, type=int)
args = parser.parse_args()

if (args.components > 0):
    ## LOAD DATASET
    g = Gender()
    if (args.categorical == "both"):
        g.features_list2csv(categorical="both",
                            path="files/names/allnoundefined.csv")
        features = "files/features_list_no_undefined.csv"
    elif (args.categorical == "noletters"):
        g.features_list2csv(categorical="noletters",
                            path="files/names/allnoundefined.csv")
        features = "files/features_list_cat.csv"
    elif (args.categorical == "nocategorical"):
        g.features_list2csv(categorical="nocategorical",
                            path="files/names/allnoundefined.csv")
        features = "files/features_list_no_cat.csv"
    else:
        g.features_list2csv(categorical="both", path="files/names/all.csv")
        features = "files/features_list.csv"
Esempio n. 29
0
from pprint import pprint
import re
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('--ml', choices=['nltk', 'svc', 'sgd', 'gaussianNB', 'multinomialNB', 'bernoulliNB', 'forest', 'tree', 'mlp', 'adaboost'])
parser.add_argument('--dataset', choices=['uk', 'us', 'uy'])
parser.add_argument('--nltk', default=False, action="store_true")
parser.add_argument('--allstuff', default=False, action="store_true")
args = parser.parse_args()

if (args.nltk):
    nltk.download('names')
    nltk.download('punkt')

g = Gender()

def create_file(dataset):
    uspathmales = "files/names/names_us/usmales.csv"
    uspathfemales = "files/names/names_us/usfemales.csv"
    ukpathmales = "files/names/names_uk/ukmales.csv"
    ukpathfemales = "files/names/names_uk/ukfemales.csv"
    uypathmales = "files/names/names_uy/uymasculinos.csv"
    uypathfemales = "files/names/names_uy/uyfemeninos.csv"
    if (dataset == "us"):
        pathmales = uspathmales
        pathfemales = uspathfemales
    elif (dataset == "uk"):
        pathmales = ukpathmales
        pathfemales = ukpathfemales
    elif (dataset == "uy"):
Esempio n. 30
0
 def test_dame_gender_filenamdict2list(self):
     g = Gender()
     name = g.filenamdict2list()[0]
     self.assertEqual(name, "Aad")