def test_sexmachine_features(self): s = DameSexmachine() f = s.features("David") self.assertEqual(f['has(a)'], True) self.assertEqual(f['count(i)'], 1) self.assertEqual(f['count(v)'], 1) self.assertEqual(f['last_letter'], 'd') self.assertEqual(f['first_letter'], 'd')
def test_sexmachine_gender_list_method_returns_correct_result(self): s = DameSexmachine() gl = s.gender_list() self.assertEqual(gl, [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1]) self.assertEqual(len(gl), 21) self.assertEqual(s.females, 3) self.assertEqual(s.males, 16) self.assertEqual(s.unknown, 2)
def test_sexmachine_tree_load(self): s = DameSexmachine() m = s.tree_load() predicted = m.predict([[ 0, 0, 1, 0, 21, 0, 0, 0, 0, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 2, 0, 0, 0, 34, 1, 0, 1 ]]) a = np.array([0]) self.assertEqual(predicted, a)
def test_sexmachine_sgd_load(self): s = DameSexmachine() m = s.sgd_load() predicted = m.predict([[ 0, 0, 1, 0, 21, 0, 0, 0, 0, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 2, 0, 0, 0, 34, 1, 0, 1 ]]) n = np.array([1]) self.assertEqual(n, predicted)
def test_sexmachine_bernoulliNB_load(self): s = DameSexmachine() m = s.bernoulliNB_load() predicted = m.predict([[ 0, 0, 1, 0, 21, 0, 0, 0, 0, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 2, 0, 0, 0, 34, 1, 0, 1 ]]) n = np.array([2]) self.assertTrue(np.array_equal(predicted, n))
def test_sexmachine_adaboost_load(self): s = DameSexmachine() m = s.adaboost_load() predicted = m.predict([[ 0, 0, 1, 0, 21, 0, 0, 0, 0, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 2, 0, 0, 0, 34, 1, 0, 1 ]]) n = np.array([1]) self.assertTrue(np.array_equal(predicted, n))
def test_dame_sexmachine_guess_list_method_returns_correct_result(self): ds = DameSexmachine() self.assertEqual(['male', 'male', 'male', 'male', 'female', 'male', 'female', 'female', 'male', 'male', 'male', 'male', 'male', 'male', 'female', 'male', 'male', 'male', 'female', 'male', 'male'], ds.guess_list(path="files/names/partial.csv", binary=False)) self.assertEqual([1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1], ds.guess_list(path="files/names/partial.csv",binary=True)) self.assertEqual([1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1], ds.guess_list(path="files/names/partial.csv",binary=True, ml="nltk")) # sgd_model = ds.sgd_load() # self.assertEqual([0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0], ds.guess_list(path="files/names/partial.csv",binary=True, ml="sgd")) svc_model = ds.svc_load() self.assertEqual([1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1], ds.guess_list(path="files/names/partial.csv",binary=True, ml="svc"))
def test_sexmachine_gender_list(self): s = DameSexmachine() gl = s.gender_list(path="files/names/partial.csv") self.assertEqual( gl, [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1]) self.assertEqual(len(gl), 21) self.assertEqual(s.females, 3) self.assertEqual(s.males, 16) self.assertEqual(s.unknown, 2)
def test_sexmachine_multinomialNB_load_method_returns_correct_result(self): s = DameSexmachine() m = s.multinomialNB_load() array = [[ 0, 0, 1, 0, 21, 0, 0, 0, 0, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 2, 0, 0, 0, 34, 1, 0, 0], [ 0, 0, 0, 0, 21, 0, 0, 0, 0, 34, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 5, 0, 0, 1, 0, 0, 1, 0, 0, 1, 34, 0, 0, 1]] predicted= m.predict(array) n = np.array([1, 1]) self.assertTrue(np.array_equal(predicted, n))
def test_dame_gender_guess_list(self): ds = DameSexmachine() self.assertEqual([ 'male', 'male', 'male', 'male', 'male', 'male', 'female', 'female', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'female', 'male', 'male' ], ds.guess_list(path="files/names/partial.csv", binary=False)) self.assertEqual( [1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1], ds.guess_list(path="files/names/partial.csv", binary=True, ml="nltk"))
def test_dame_gender_confusion_matrix_gender(self): ds = DameSexmachine() cm = ds.confusion_matrix_gender(path="files/names/min.csv") am = [[1, 0, 0], [0, 5, 0], [0, 5, 0]] self.assertEqual(cm, am) cm = ds.confusion_matrix_gender(path="files/names/min.csv", ml="nltk") am = [[1, 0, 0], [0, 5, 0], [0, 5, 0]] self.assertEqual(cm, am) cm = ds.confusion_matrix_gender(path="files/names/min.csv", jsonf="files/names/min.csv.json", ml="nltk") am = [[1, 0, 0], [0, 5, 0], [0, 5, 0]] self.assertEqual(cm, am)
def test_sexmachine_features_int(self): s = DameSexmachine() dicc = s.features_int("David") self.assertEqual(chr(dicc['last_letter']), 'd') self.assertEqual(chr(dicc['first_letter']), 'd') self.assertEqual(dicc['count(a)'], 1) self.assertEqual(dicc['count(b)'], 0) self.assertEqual(dicc['count(c)'], 0) self.assertEqual(dicc['count(d)'], 2) self.assertEqual(dicc['count(e)'], 0) self.assertEqual(dicc['count(f)'], 0) self.assertEqual(dicc['count(h)'], 0) self.assertEqual(dicc['count(i)'], 1) self.assertEqual(dicc['count(v)'], 1) self.assertTrue(dicc['count(a)'] > 0) self.assertTrue(dicc['vocals'], 2) self.assertTrue(dicc['consonants'], 3) self.assertEqual(dicc['first_letter_vocal'], 0) self.assertEqual(dicc['last_letter_vocal'], 0) self.assertTrue(len(dicc.values()) > 30)
def test_dame_sexmachine_features_int(self): s = DameSexmachine() f = s.features_int("David") self.assertTrue(len(f) > 0)
def test_sexmachine_sgd_load_method_returns_correct_result(self): s = DameSexmachine() m = s.sgd_load() predicted = m.predict([[ 0, 0, 1, 0, 21, 0, 0, 0, 0, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 2, 0, 0, 0, 34, 1, 0, 1, 0]]) n = np.array([2]) self.assertEqual(n, predicted)
def test_sexmachine_bernoulliNB_load_method_returns_correct_result(self): s = DameSexmachine() m = s.bernoulliNB_load() predicted = m.predict([[ 0, 0, 1, 0, 21, 0, 0, 0, 0, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 2, 0, 0, 0, 34, 1, 0, 1, 0]]) n = np.array([2]) self.assertTrue(np.array_equal(predicted, n))
def test_string2array_method_returns_correct_result(self): array = "muchos espacios en blanco" s = DameSexmachine() arr = s.string2array(array) self.assertEqual(["muchos", "espacios", "en", "blanco"], arr)
def test_sexmachine_features_list(self): s = DameSexmachine() fl = s.features_list() self.assertTrue(len(fl) > 20)
# along with Damegender; see the file LICENSE. If not, write to # the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, # Boston, MA 02110-1301 USA, from app.dame_sexmachine import DameSexmachine from app.dame_perceval import DamePerceval from app.dame_utils import DameUtils import sys import argparse parser = argparse.ArgumentParser() parser.add_argument("url", help="Uniform Resource Link") parser.add_argument('--directory') parser.add_argument('--version', action='version', version='0.1') args = parser.parse_args() if (len(sys.argv) > 1): s = DameSexmachine() gg = DamePerceval() du = DameUtils() l = gg.list_mailers(args.url) l = du.delete_duplicated(l) females = 0 males = 0 unknowns = 0 for g in l: sm = s.guess(g, binary=True) if (sm == 0): females = females + 1 elif (sm == 1): males = males + 1 else:
def test_sexmachine_accuracy(self): s = DameSexmachine() self.assertTrue(s.accuracy(path="files/names/partial.csv") > 0.5)
def test_sexmachine_classifier_load(self): s = DameSexmachine() m = s.classifier_load() n = s.features("David") guess = m.classify(n) self.assertTrue(1, n)
def test_sexmachine_classifier(self): s = DameSexmachine() classifier = s.classifier(locale="us") n = s.features("David") guess = classifier.classify(n) self.assertTrue(1, n)
def test_sexmachine_string2gender_method_returns_correct_result(self): s = DameSexmachine() gender1 = s.string2gender("Arroyo Menéndez, David") gender2 = s.string2gender("xxxxx Laura") self.assertTrue(gender1, 'male') self.assertTrue(gender2, 'female')
def test_sexmachine_gender_list_all(self): s = DameSexmachine() gl = s.gender_list(path="files/names/all.csv") self.assertTrue(len(gl) > 1000)
def test_dame_sexmachine_guess_surname_method_returns_correct_result(self): s = DameSexmachine() self.assertTrue(s.guess_surname("Smith"))
def test_sexmachine_features_list_all(self): s = DameSexmachine() fl = s.features_list(path="files/names/all.csv") self.assertTrue(len(fl) > 1000)
def test_dame_sexmachine_guess(self): s = DameSexmachine() self.assertEqual(s.guess("David"), 'male') self.assertEqual(s.guess("David", binary=True, ml="svc"), 1) self.assertEqual(s.guess("Laura", binary=True, ml="svc"), 0) self.assertEqual(s.guess("Palabra", binary=True, ml="svc"), 1) self.assertEqual(s.guess("Laura", binary=True, ml="sgd"), 0) self.assertEqual(s.guess("Palabra", binary=True, ml="svc"), 1) self.assertEqual(s.guess("David", binary=True, ml="gaussianNB"), 1) self.assertEqual(s.guess("David", binary=True, ml="multinomialNB"), 1) self.assertEqual(s.guess("David", binary=True, ml="bernoulliNB"), 1) self.assertEqual(s.guess("Laura"), 'female') # With accents: self.assertEqual(s.guess("Inés"), 'female') # Without accents: self.assertEqual(s.guess("Ines"), 'female') self.assertEqual(s.guess("Nodiccionario"), 'male') self.assertEqual(s.guess("Nadiccionaria"), 'female') self.assertEqual(s.guess("David", binary=True), 1) self.assertEqual(s.guess("Laura", binary=True), 0) self.assertEqual(s.guess("Nodiccionario", binary=True), 1) self.assertEqual(s.guess("Nadiccionaria", binary=True), 0)
def test_dame_sexmachine_guess_method_returns_correct_result(self): s = DameSexmachine() self.assertEqual(s.guess("David"), 'male') self.assertEqual(s.guess("David", binary=True, ml="svc"), 1) self.assertEqual(s.guess("Laura", binary=True, ml="svc"), 0) self.assertEqual(s.guess("Palabra", binary=True, ml="svc"), 1) self.assertEqual(s.guess("Inés", binary=True, ml="svc"), 0) self.assertEqual(s.guess("David", binary=True, ml="sgd"), 1) self.assertEqual(s.guess("Laura", binary=True, ml="sgd"), 0) self.assertEqual(s.guess("Palabra", binary=True, ml="svc"), 1) self.assertEqual(s.guess("David", binary=True, ml="gaussianNB"), 1) self.assertEqual(s.guess("David", binary=True, ml="multinomialNB"), 1) self.assertEqual(s.guess("David", binary=True, ml="bernoulliNB"), 1) self.assertEqual(s.guess("Laura"), 'female') self.assertEqual(s.guess("Inés"), 'female') # Con acento self.assertEqual(s.guess("Ines"), 'female') # Sin acento self.assertEqual(s.guess("Nodiccionario"), 'male') # Sin estar en diccionario self.assertEqual(s.guess("Nadiccionaria"), 'female') # En diccionario self.assertEqual(s.guess("David", binary=True), 1) self.assertEqual(s.guess("Laura", binary=True), 0) self.assertEqual(s.guess("Nodiccionario", binary=True), 1) self.assertEqual(s.guess("Nadiccionaria", binary=True), 0)
def test_dame_sexmachine_features_int_method_returns_correct_result(self): s = DameSexmachine() f = s.features_int("David") self.assertTrue(f['syllables'] > 0) self.assertTrue(len(f) > 0)
from app.dame_sexmachine import DameSexmachine from app.dame_utils import DameUtils import argparse parser = argparse.ArgumentParser() parser.add_argument('ml', choices=[ 'nltk', 'svc', 'sgd', 'gaussianNB', 'multinomialNB', 'bernoulliNB', 'forest', 'tree', 'mlp' ]) parser.add_argument('--noshow', dest='noshow', action='store_true') parser.add_argument('--verbose', default=False, action="store_true") args = parser.parse_args() ds = DameSexmachine() X = np.array(ds.features_list(path="files/names/allnoundefined.csv")) y = ds.gender_list(path="files/names/allnoundefined.csv") X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) if (args.verbose): print(X) print(y) if (args.ml == "svc"): svc = SVC(random_state=42) svc.fit(X_train, y_train) svc_disp = plot_roc_curve(svc, X_test, y_test) elif (args.ml == "forest"): rfc = RandomForestClassifier(n_estimators=10, random_state=42)
fo = open(pathmales, "w") for m in males: fo.write(m+"\n") fo.close() fo2 = open(pathfemales, "w") for f in females: fo2.write(f+"\n") fo2.close() if (args.dataset): print(args.dataset) create_file(args.dataset) if (args.ml): s = DameSexmachine() if (args.ml == "nltk"): s.classifier() elif (args.ml == "sgd"): s.sgd() elif (args.ml == "svc"): s.svc() elif (args.ml == "gaussianNB"): s.gaussianNB() elif (args.ml == "multinomialNB"): s.multinomialNB() elif (args.ml == "bernoulliNB"): s.bernoulliNB() elif (args.ml == "forest"): s.forest() elif (args.ml == "adaboost"):