def test_sexmachine_features(self):
     s = DameSexmachine()
     f = s.features("David")
     self.assertEqual(f['has(a)'], True)
     self.assertEqual(f['count(i)'], 1)
     self.assertEqual(f['count(v)'], 1)
     self.assertEqual(f['last_letter'], 'd')
     self.assertEqual(f['first_letter'], 'd')
예제 #2
0
 def test_sexmachine_gender_list_method_returns_correct_result(self):
     s = DameSexmachine()
     gl = s.gender_list()
     self.assertEqual(gl, [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1])
     self.assertEqual(len(gl), 21)
     self.assertEqual(s.females, 3)
     self.assertEqual(s.males, 16)
     self.assertEqual(s.unknown, 2)
 def test_sexmachine_tree_load(self):
     s = DameSexmachine()
     m = s.tree_load()
     predicted = m.predict([[
         0, 0, 1, 0, 21, 0, 0, 0, 0, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0,
         0, 0, 0, 2, 0, 0, 0, 34, 1, 0, 1
     ]])
     a = np.array([0])
     self.assertEqual(predicted, a)
 def test_sexmachine_sgd_load(self):
     s = DameSexmachine()
     m = s.sgd_load()
     predicted = m.predict([[
         0, 0, 1, 0, 21, 0, 0, 0, 0, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0,
         0, 0, 0, 2, 0, 0, 0, 34, 1, 0, 1
     ]])
     n = np.array([1])
     self.assertEqual(n, predicted)
 def test_sexmachine_bernoulliNB_load(self):
     s = DameSexmachine()
     m = s.bernoulliNB_load()
     predicted = m.predict([[
         0, 0, 1, 0, 21, 0, 0, 0, 0, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0,
         0, 0, 0, 2, 0, 0, 0, 34, 1, 0, 1
     ]])
     n = np.array([2])
     self.assertTrue(np.array_equal(predicted, n))
 def test_sexmachine_adaboost_load(self):
     s = DameSexmachine()
     m = s.adaboost_load()
     predicted = m.predict([[
         0, 0, 1, 0, 21, 0, 0, 0, 0, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0,
         0, 0, 0, 2, 0, 0, 0, 34, 1, 0, 1
     ]])
     n = np.array([1])
     self.assertTrue(np.array_equal(predicted, n))
예제 #7
0
 def test_dame_sexmachine_guess_list_method_returns_correct_result(self):
     ds = DameSexmachine()
     self.assertEqual(['male', 'male', 'male', 'male', 'female', 'male', 'female', 'female', 'male', 'male', 'male', 'male', 'male', 'male', 'female', 'male', 'male', 'male', 'female', 'male', 'male'], ds.guess_list(path="files/names/partial.csv", binary=False))
     self.assertEqual([1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1], ds.guess_list(path="files/names/partial.csv",binary=True))
     self.assertEqual([1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1], ds.guess_list(path="files/names/partial.csv",binary=True, ml="nltk"))
     # sgd_model = ds.sgd_load()
     # self.assertEqual([0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0], ds.guess_list(path="files/names/partial.csv",binary=True, ml="sgd"))
     svc_model = ds.svc_load()
     self.assertEqual([1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1], ds.guess_list(path="files/names/partial.csv",binary=True, ml="svc"))
예제 #8
0
 def test_sexmachine_multinomialNB_load_method_returns_correct_result(self):
     s = DameSexmachine()
     m = s.multinomialNB_load()
     array = [[ 0,  0,  1,  0, 21,  0,  0,  0,  0, 34,  2,  0,  0,  0,  0,  0, 0,
                0,  0,  0,  5,  0,  0,  0,  0,  0,  2,  0,  0,  0, 34,  1,  0, 0],
              [ 0,  0,  0,  0, 21,  0,  0,  0,  0, 34,  0,  0,  0,  0,  0,  1, 0,
                0,  0,  0,  5,  0,  0,  1,  0,  0,  1,  0,  0,  1, 34,  0,  0, 1]]
     predicted= m.predict(array)
     n = np.array([1, 1])
     self.assertTrue(np.array_equal(predicted, n))
 def test_sexmachine_gender_list(self):
     s = DameSexmachine()
     gl = s.gender_list(path="files/names/partial.csv")
     self.assertEqual(
         gl,
         [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1])
     self.assertEqual(len(gl), 21)
     self.assertEqual(s.females, 3)
     self.assertEqual(s.males, 16)
     self.assertEqual(s.unknown, 2)
 def test_dame_gender_guess_list(self):
     ds = DameSexmachine()
     self.assertEqual([
         'male', 'male', 'male', 'male', 'male', 'male', 'female', 'female',
         'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male',
         'male', 'male', 'female', 'male', 'male'
     ], ds.guess_list(path="files/names/partial.csv", binary=False))
     self.assertEqual(
         [1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1],
         ds.guess_list(path="files/names/partial.csv",
                       binary=True,
                       ml="nltk"))
 def test_dame_gender_confusion_matrix_gender(self):
     ds = DameSexmachine()
     cm = ds.confusion_matrix_gender(path="files/names/min.csv")
     am = [[1, 0, 0], [0, 5, 0], [0, 5, 0]]
     self.assertEqual(cm, am)
     cm = ds.confusion_matrix_gender(path="files/names/min.csv", ml="nltk")
     am = [[1, 0, 0], [0, 5, 0], [0, 5, 0]]
     self.assertEqual(cm, am)
     cm = ds.confusion_matrix_gender(path="files/names/min.csv",
                                     jsonf="files/names/min.csv.json",
                                     ml="nltk")
     am = [[1, 0, 0], [0, 5, 0], [0, 5, 0]]
     self.assertEqual(cm, am)
 def test_sexmachine_features_int(self):
     s = DameSexmachine()
     dicc = s.features_int("David")
     self.assertEqual(chr(dicc['last_letter']), 'd')
     self.assertEqual(chr(dicc['first_letter']), 'd')
     self.assertEqual(dicc['count(a)'], 1)
     self.assertEqual(dicc['count(b)'], 0)
     self.assertEqual(dicc['count(c)'], 0)
     self.assertEqual(dicc['count(d)'], 2)
     self.assertEqual(dicc['count(e)'], 0)
     self.assertEqual(dicc['count(f)'], 0)
     self.assertEqual(dicc['count(h)'], 0)
     self.assertEqual(dicc['count(i)'], 1)
     self.assertEqual(dicc['count(v)'], 1)
     self.assertTrue(dicc['count(a)'] > 0)
     self.assertTrue(dicc['vocals'], 2)
     self.assertTrue(dicc['consonants'], 3)
     self.assertEqual(dicc['first_letter_vocal'], 0)
     self.assertEqual(dicc['last_letter_vocal'], 0)
     self.assertTrue(len(dicc.values()) > 30)
 def test_dame_sexmachine_guess(self):
     s = DameSexmachine()
     self.assertEqual(s.guess("David"), 'male')
     self.assertEqual(s.guess("David", binary=True, ml="svc"), 1)
     self.assertEqual(s.guess("Laura", binary=True, ml="svc"), 0)
     self.assertEqual(s.guess("Palabra", binary=True, ml="svc"), 1)
     self.assertEqual(s.guess("Laura", binary=True, ml="sgd"), 0)
     self.assertEqual(s.guess("Palabra", binary=True, ml="svc"), 1)
     self.assertEqual(s.guess("David", binary=True, ml="gaussianNB"), 1)
     self.assertEqual(s.guess("David", binary=True, ml="multinomialNB"), 1)
     self.assertEqual(s.guess("David", binary=True, ml="bernoulliNB"), 1)
     self.assertEqual(s.guess("Laura"), 'female')
     # With accents:
     self.assertEqual(s.guess("Inés"), 'female')
     # Without accents:
     self.assertEqual(s.guess("Ines"), 'female')
     self.assertEqual(s.guess("Nodiccionario"), 'male')
     self.assertEqual(s.guess("Nadiccionaria"), 'female')
     self.assertEqual(s.guess("David", binary=True), 1)
     self.assertEqual(s.guess("Laura", binary=True), 0)
     self.assertEqual(s.guess("Nodiccionario", binary=True), 1)
     self.assertEqual(s.guess("Nadiccionaria", binary=True), 0)
예제 #14
0
 def test_dame_sexmachine_guess_method_returns_correct_result(self):
     s = DameSexmachine()
     self.assertEqual(s.guess("David"), 'male')
     self.assertEqual(s.guess("David", binary=True, ml="svc"), 1)
     self.assertEqual(s.guess("Laura", binary=True, ml="svc"), 0)
     self.assertEqual(s.guess("Palabra", binary=True, ml="svc"), 1)
     self.assertEqual(s.guess("Inés", binary=True, ml="svc"), 0)
     self.assertEqual(s.guess("David", binary=True, ml="sgd"), 1)
     self.assertEqual(s.guess("Laura", binary=True, ml="sgd"), 0)
     self.assertEqual(s.guess("Palabra", binary=True, ml="svc"), 1)
     self.assertEqual(s.guess("David", binary=True, ml="gaussianNB"), 1)
     self.assertEqual(s.guess("David", binary=True, ml="multinomialNB"), 1)
     self.assertEqual(s.guess("David", binary=True, ml="bernoulliNB"), 1)
     self.assertEqual(s.guess("Laura"), 'female')
     self.assertEqual(s.guess("Inés"), 'female') # Con acento
     self.assertEqual(s.guess("Ines"), 'female') # Sin acento
     self.assertEqual(s.guess("Nodiccionario"), 'male') # Sin estar en diccionario
     self.assertEqual(s.guess("Nadiccionaria"), 'female') # En diccionario
     self.assertEqual(s.guess("David", binary=True), 1)
     self.assertEqual(s.guess("Laura", binary=True), 0)
     self.assertEqual(s.guess("Nodiccionario", binary=True), 1)
     self.assertEqual(s.guess("Nadiccionaria", binary=True), 0)
 def test_sexmachine_classifier_load(self):
     s = DameSexmachine()
     m = s.classifier_load()
     n = s.features("David")
     guess = m.classify(n)
     self.assertTrue(1, n)
예제 #16
0
 def test_sexmachine_confusion_matrix_method_returns_correct_result(self):
     s = DameSexmachine()
     cm = s.confusion_matrix()
     am = np.array([[3, 0, 0],[1, 15, 0],[1, 1, 0]])
     self.assertTrue(np.array_equal(cm,am))
 def test_dame_sexmachine_features_int(self):
     s = DameSexmachine()
     f = s.features_int("David")
     self.assertTrue(len(f) > 0)
예제 #18
0
 def test_sexmachine_string2gender_method_returns_correct_result(self):
     s = DameSexmachine()
     gender1 = s.string2gender("Arroyo Menéndez, David")
     gender2 = s.string2gender("xxxxx Laura")
     self.assertTrue(gender1, 'male')
     self.assertTrue(gender2, 'female')
예제 #19
0
 def test_dame_sexmachine_features_int_method_returns_correct_result(self):
     s = DameSexmachine()
     f = s.features_int("David")
     self.assertTrue(f['syllables'] > 0)
     self.assertTrue(len(f) > 0)
예제 #20
0
 def test_sexmachine_bernoulliNB_load_method_returns_correct_result(self):
     s = DameSexmachine()
     m = s.bernoulliNB_load()
     predicted = m.predict([[ 0,  0,  1,  0, 21,  0,  0,  0,  0, 34,  2,  0,  0,  0,  0,  0, 0,  0,  0,  5,  0,  0,  0,  0,  0,  2,  0,  0,  0, 34,  1,  0, 1, 0]])
     n = np.array([2])
     self.assertTrue(np.array_equal(predicted, n))
예제 #21
0
 def test_sexmachine_sgd_load_method_returns_correct_result(self):
     s = DameSexmachine()
     m = s.sgd_load()
     predicted = m.predict([[ 0,  0,  1,  0, 21,  0,  0,  0,  0, 34,  2,  0,  0,  0,  0,  0, 0,  0,  0,  5,  0,  0,  0,  0,  0,  2,  0,  0,  0, 34,  1,  0, 1, 0]])
     n = np.array([2])
     self.assertEqual(n, predicted)
 def test_sexmachine_accuracy(self):
     s = DameSexmachine()
     self.assertTrue(s.accuracy(path="files/names/partial.csv") > 0.5)
예제 #23
0
 def test_string2array_method_returns_correct_result(self):
     array = "muchos    espacios en blanco"
     s = DameSexmachine()
     arr = s.string2array(array)
     self.assertEqual(["muchos", "espacios", "en", "blanco"], arr)
예제 #24
0
 def test_dame_sexmachine_guess_surname_method_returns_correct_result(self):
     s = DameSexmachine()
     self.assertTrue(s.guess_surname("Smith"))
 def test_sexmachine_classifier(self):
     s = DameSexmachine()
     classifier = s.classifier(locale="us")
     n = s.features("David")
     guess = classifier.classify(n)
     self.assertTrue(1, n)
 def test_sexmachine_features_list(self):
     s = DameSexmachine()
     fl = s.features_list()
     self.assertTrue(len(fl) > 20)
예제 #27
0
 def test_sexmachine_accuracy_method_returns_correct_result(self):
     s = DameSexmachine()
     self.assertTrue(s.accuracy(path="files/names/partial.csv") > 0.5)
 def test_sexmachine_features_list_all(self):
     s = DameSexmachine()
     fl = s.features_list(path="files/names/all.csv")
     self.assertTrue(len(fl) > 1000)
예제 #29
0
from app.dame_sexmachine import DameSexmachine
from app.dame_utils import DameUtils

import argparse

parser = argparse.ArgumentParser()
parser.add_argument('ml',
                    choices=[
                        'nltk', 'svc', 'sgd', 'gaussianNB', 'multinomialNB',
                        'bernoulliNB', 'forest', 'tree', 'mlp'
                    ])
parser.add_argument('--noshow', dest='noshow', action='store_true')
parser.add_argument('--verbose', default=False, action="store_true")
args = parser.parse_args()

ds = DameSexmachine()
X = np.array(ds.features_list(path="files/names/allnoundefined.csv"))
y = ds.gender_list(path="files/names/allnoundefined.csv")
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

if (args.verbose):
    print(X)
    print(y)

if (args.ml == "svc"):
    svc = SVC(random_state=42)
    svc.fit(X_train, y_train)
    svc_disp = plot_roc_curve(svc, X_test, y_test)

elif (args.ml == "forest"):
    rfc = RandomForestClassifier(n_estimators=10, random_state=42)
 def test_sexmachine_gender_list_all(self):
     s = DameSexmachine()
     gl = s.gender_list(path="files/names/all.csv")
     self.assertTrue(len(gl) > 1000)