def test_sexmachine_features(self):
     s = DameSexmachine()
     f = s.features("David")
     self.assertEqual(f['has(a)'], True)
     self.assertEqual(f['count(i)'], 1)
     self.assertEqual(f['count(v)'], 1)
     self.assertEqual(f['last_letter'], 'd')
     self.assertEqual(f['first_letter'], 'd')
Example #2
0
 def test_sexmachine_gender_list_method_returns_correct_result(self):
     s = DameSexmachine()
     gl = s.gender_list()
     self.assertEqual(gl, [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1])
     self.assertEqual(len(gl), 21)
     self.assertEqual(s.females, 3)
     self.assertEqual(s.males, 16)
     self.assertEqual(s.unknown, 2)
 def test_sexmachine_tree_load(self):
     s = DameSexmachine()
     m = s.tree_load()
     predicted = m.predict([[
         0, 0, 1, 0, 21, 0, 0, 0, 0, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0,
         0, 0, 0, 2, 0, 0, 0, 34, 1, 0, 1
     ]])
     a = np.array([0])
     self.assertEqual(predicted, a)
 def test_sexmachine_sgd_load(self):
     s = DameSexmachine()
     m = s.sgd_load()
     predicted = m.predict([[
         0, 0, 1, 0, 21, 0, 0, 0, 0, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0,
         0, 0, 0, 2, 0, 0, 0, 34, 1, 0, 1
     ]])
     n = np.array([1])
     self.assertEqual(n, predicted)
 def test_sexmachine_bernoulliNB_load(self):
     s = DameSexmachine()
     m = s.bernoulliNB_load()
     predicted = m.predict([[
         0, 0, 1, 0, 21, 0, 0, 0, 0, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0,
         0, 0, 0, 2, 0, 0, 0, 34, 1, 0, 1
     ]])
     n = np.array([2])
     self.assertTrue(np.array_equal(predicted, n))
 def test_sexmachine_adaboost_load(self):
     s = DameSexmachine()
     m = s.adaboost_load()
     predicted = m.predict([[
         0, 0, 1, 0, 21, 0, 0, 0, 0, 34, 2, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0,
         0, 0, 0, 2, 0, 0, 0, 34, 1, 0, 1
     ]])
     n = np.array([1])
     self.assertTrue(np.array_equal(predicted, n))
Example #7
0
 def test_dame_sexmachine_guess_list_method_returns_correct_result(self):
     ds = DameSexmachine()
     self.assertEqual(['male', 'male', 'male', 'male', 'female', 'male', 'female', 'female', 'male', 'male', 'male', 'male', 'male', 'male', 'female', 'male', 'male', 'male', 'female', 'male', 'male'], ds.guess_list(path="files/names/partial.csv", binary=False))
     self.assertEqual([1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1], ds.guess_list(path="files/names/partial.csv",binary=True))
     self.assertEqual([1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1], ds.guess_list(path="files/names/partial.csv",binary=True, ml="nltk"))
     # sgd_model = ds.sgd_load()
     # self.assertEqual([0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0], ds.guess_list(path="files/names/partial.csv",binary=True, ml="sgd"))
     svc_model = ds.svc_load()
     self.assertEqual([1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1], ds.guess_list(path="files/names/partial.csv",binary=True, ml="svc"))
 def test_sexmachine_gender_list(self):
     s = DameSexmachine()
     gl = s.gender_list(path="files/names/partial.csv")
     self.assertEqual(
         gl,
         [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1])
     self.assertEqual(len(gl), 21)
     self.assertEqual(s.females, 3)
     self.assertEqual(s.males, 16)
     self.assertEqual(s.unknown, 2)
Example #9
0
 def test_sexmachine_multinomialNB_load_method_returns_correct_result(self):
     s = DameSexmachine()
     m = s.multinomialNB_load()
     array = [[ 0,  0,  1,  0, 21,  0,  0,  0,  0, 34,  2,  0,  0,  0,  0,  0, 0,
                0,  0,  0,  5,  0,  0,  0,  0,  0,  2,  0,  0,  0, 34,  1,  0, 0],
              [ 0,  0,  0,  0, 21,  0,  0,  0,  0, 34,  0,  0,  0,  0,  0,  1, 0,
                0,  0,  0,  5,  0,  0,  1,  0,  0,  1,  0,  0,  1, 34,  0,  0, 1]]
     predicted= m.predict(array)
     n = np.array([1, 1])
     self.assertTrue(np.array_equal(predicted, n))
 def test_dame_gender_guess_list(self):
     ds = DameSexmachine()
     self.assertEqual([
         'male', 'male', 'male', 'male', 'male', 'male', 'female', 'female',
         'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male',
         'male', 'male', 'female', 'male', 'male'
     ], ds.guess_list(path="files/names/partial.csv", binary=False))
     self.assertEqual(
         [1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1],
         ds.guess_list(path="files/names/partial.csv",
                       binary=True,
                       ml="nltk"))
 def test_dame_gender_confusion_matrix_gender(self):
     ds = DameSexmachine()
     cm = ds.confusion_matrix_gender(path="files/names/min.csv")
     am = [[1, 0, 0], [0, 5, 0], [0, 5, 0]]
     self.assertEqual(cm, am)
     cm = ds.confusion_matrix_gender(path="files/names/min.csv", ml="nltk")
     am = [[1, 0, 0], [0, 5, 0], [0, 5, 0]]
     self.assertEqual(cm, am)
     cm = ds.confusion_matrix_gender(path="files/names/min.csv",
                                     jsonf="files/names/min.csv.json",
                                     ml="nltk")
     am = [[1, 0, 0], [0, 5, 0], [0, 5, 0]]
     self.assertEqual(cm, am)
 def test_sexmachine_features_int(self):
     s = DameSexmachine()
     dicc = s.features_int("David")
     self.assertEqual(chr(dicc['last_letter']), 'd')
     self.assertEqual(chr(dicc['first_letter']), 'd')
     self.assertEqual(dicc['count(a)'], 1)
     self.assertEqual(dicc['count(b)'], 0)
     self.assertEqual(dicc['count(c)'], 0)
     self.assertEqual(dicc['count(d)'], 2)
     self.assertEqual(dicc['count(e)'], 0)
     self.assertEqual(dicc['count(f)'], 0)
     self.assertEqual(dicc['count(h)'], 0)
     self.assertEqual(dicc['count(i)'], 1)
     self.assertEqual(dicc['count(v)'], 1)
     self.assertTrue(dicc['count(a)'] > 0)
     self.assertTrue(dicc['vocals'], 2)
     self.assertTrue(dicc['consonants'], 3)
     self.assertEqual(dicc['first_letter_vocal'], 0)
     self.assertEqual(dicc['last_letter_vocal'], 0)
     self.assertTrue(len(dicc.values()) > 30)
 def test_dame_sexmachine_features_int(self):
     s = DameSexmachine()
     f = s.features_int("David")
     self.assertTrue(len(f) > 0)
Example #14
0
 def test_sexmachine_sgd_load_method_returns_correct_result(self):
     s = DameSexmachine()
     m = s.sgd_load()
     predicted = m.predict([[ 0,  0,  1,  0, 21,  0,  0,  0,  0, 34,  2,  0,  0,  0,  0,  0, 0,  0,  0,  5,  0,  0,  0,  0,  0,  2,  0,  0,  0, 34,  1,  0, 1, 0]])
     n = np.array([2])
     self.assertEqual(n, predicted)
Example #15
0
 def test_sexmachine_bernoulliNB_load_method_returns_correct_result(self):
     s = DameSexmachine()
     m = s.bernoulliNB_load()
     predicted = m.predict([[ 0,  0,  1,  0, 21,  0,  0,  0,  0, 34,  2,  0,  0,  0,  0,  0, 0,  0,  0,  5,  0,  0,  0,  0,  0,  2,  0,  0,  0, 34,  1,  0, 1, 0]])
     n = np.array([2])
     self.assertTrue(np.array_equal(predicted, n))
Example #16
0
 def test_string2array_method_returns_correct_result(self):
     array = "muchos    espacios en blanco"
     s = DameSexmachine()
     arr = s.string2array(array)
     self.assertEqual(["muchos", "espacios", "en", "blanco"], arr)
 def test_sexmachine_features_list(self):
     s = DameSexmachine()
     fl = s.features_list()
     self.assertTrue(len(fl) > 20)
Example #18
0
# along with Damegender; see the file LICENSE.  If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301 USA,

from app.dame_sexmachine import DameSexmachine
from app.dame_perceval import DamePerceval
from app.dame_utils import DameUtils
import sys
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("url", help="Uniform Resource Link")
parser.add_argument('--directory')
parser.add_argument('--version', action='version', version='0.1')
args = parser.parse_args()
if (len(sys.argv) > 1):
    s = DameSexmachine()
    gg = DamePerceval()
    du = DameUtils()
    l = gg.list_mailers(args.url)
    l = du.delete_duplicated(l)

    females = 0
    males = 0
    unknowns = 0
    for g in l:
        sm = s.guess(g, binary=True)
        if (sm == 0):
            females = females + 1
        elif (sm == 1):
            males = males + 1
        else:
 def test_sexmachine_accuracy(self):
     s = DameSexmachine()
     self.assertTrue(s.accuracy(path="files/names/partial.csv") > 0.5)
 def test_sexmachine_classifier_load(self):
     s = DameSexmachine()
     m = s.classifier_load()
     n = s.features("David")
     guess = m.classify(n)
     self.assertTrue(1, n)
 def test_sexmachine_classifier(self):
     s = DameSexmachine()
     classifier = s.classifier(locale="us")
     n = s.features("David")
     guess = classifier.classify(n)
     self.assertTrue(1, n)
Example #22
0
 def test_sexmachine_string2gender_method_returns_correct_result(self):
     s = DameSexmachine()
     gender1 = s.string2gender("Arroyo Menéndez, David")
     gender2 = s.string2gender("xxxxx Laura")
     self.assertTrue(gender1, 'male')
     self.assertTrue(gender2, 'female')
 def test_sexmachine_gender_list_all(self):
     s = DameSexmachine()
     gl = s.gender_list(path="files/names/all.csv")
     self.assertTrue(len(gl) > 1000)
Example #24
0
 def test_dame_sexmachine_guess_surname_method_returns_correct_result(self):
     s = DameSexmachine()
     self.assertTrue(s.guess_surname("Smith"))
 def test_sexmachine_features_list_all(self):
     s = DameSexmachine()
     fl = s.features_list(path="files/names/all.csv")
     self.assertTrue(len(fl) > 1000)
 def test_dame_sexmachine_guess(self):
     s = DameSexmachine()
     self.assertEqual(s.guess("David"), 'male')
     self.assertEqual(s.guess("David", binary=True, ml="svc"), 1)
     self.assertEqual(s.guess("Laura", binary=True, ml="svc"), 0)
     self.assertEqual(s.guess("Palabra", binary=True, ml="svc"), 1)
     self.assertEqual(s.guess("Laura", binary=True, ml="sgd"), 0)
     self.assertEqual(s.guess("Palabra", binary=True, ml="svc"), 1)
     self.assertEqual(s.guess("David", binary=True, ml="gaussianNB"), 1)
     self.assertEqual(s.guess("David", binary=True, ml="multinomialNB"), 1)
     self.assertEqual(s.guess("David", binary=True, ml="bernoulliNB"), 1)
     self.assertEqual(s.guess("Laura"), 'female')
     # With accents:
     self.assertEqual(s.guess("Inés"), 'female')
     # Without accents:
     self.assertEqual(s.guess("Ines"), 'female')
     self.assertEqual(s.guess("Nodiccionario"), 'male')
     self.assertEqual(s.guess("Nadiccionaria"), 'female')
     self.assertEqual(s.guess("David", binary=True), 1)
     self.assertEqual(s.guess("Laura", binary=True), 0)
     self.assertEqual(s.guess("Nodiccionario", binary=True), 1)
     self.assertEqual(s.guess("Nadiccionaria", binary=True), 0)
Example #27
0
 def test_dame_sexmachine_guess_method_returns_correct_result(self):
     s = DameSexmachine()
     self.assertEqual(s.guess("David"), 'male')
     self.assertEqual(s.guess("David", binary=True, ml="svc"), 1)
     self.assertEqual(s.guess("Laura", binary=True, ml="svc"), 0)
     self.assertEqual(s.guess("Palabra", binary=True, ml="svc"), 1)
     self.assertEqual(s.guess("Inés", binary=True, ml="svc"), 0)
     self.assertEqual(s.guess("David", binary=True, ml="sgd"), 1)
     self.assertEqual(s.guess("Laura", binary=True, ml="sgd"), 0)
     self.assertEqual(s.guess("Palabra", binary=True, ml="svc"), 1)
     self.assertEqual(s.guess("David", binary=True, ml="gaussianNB"), 1)
     self.assertEqual(s.guess("David", binary=True, ml="multinomialNB"), 1)
     self.assertEqual(s.guess("David", binary=True, ml="bernoulliNB"), 1)
     self.assertEqual(s.guess("Laura"), 'female')
     self.assertEqual(s.guess("Inés"), 'female') # Con acento
     self.assertEqual(s.guess("Ines"), 'female') # Sin acento
     self.assertEqual(s.guess("Nodiccionario"), 'male') # Sin estar en diccionario
     self.assertEqual(s.guess("Nadiccionaria"), 'female') # En diccionario
     self.assertEqual(s.guess("David", binary=True), 1)
     self.assertEqual(s.guess("Laura", binary=True), 0)
     self.assertEqual(s.guess("Nodiccionario", binary=True), 1)
     self.assertEqual(s.guess("Nadiccionaria", binary=True), 0)
Example #28
0
 def test_dame_sexmachine_features_int_method_returns_correct_result(self):
     s = DameSexmachine()
     f = s.features_int("David")
     self.assertTrue(f['syllables'] > 0)
     self.assertTrue(len(f) > 0)
Example #29
0
from app.dame_sexmachine import DameSexmachine
from app.dame_utils import DameUtils

import argparse

parser = argparse.ArgumentParser()
parser.add_argument('ml',
                    choices=[
                        'nltk', 'svc', 'sgd', 'gaussianNB', 'multinomialNB',
                        'bernoulliNB', 'forest', 'tree', 'mlp'
                    ])
parser.add_argument('--noshow', dest='noshow', action='store_true')
parser.add_argument('--verbose', default=False, action="store_true")
args = parser.parse_args()

ds = DameSexmachine()
X = np.array(ds.features_list(path="files/names/allnoundefined.csv"))
y = ds.gender_list(path="files/names/allnoundefined.csv")
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

if (args.verbose):
    print(X)
    print(y)

if (args.ml == "svc"):
    svc = SVC(random_state=42)
    svc.fit(X_train, y_train)
    svc_disp = plot_roc_curve(svc, X_test, y_test)

elif (args.ml == "forest"):
    rfc = RandomForestClassifier(n_estimators=10, random_state=42)
Example #30
0
        fo = open(pathmales, "w")
        for m in males:
            fo.write(m+"\n")
        fo.close()
        fo2 = open(pathfemales, "w")
        for f in females:
            fo2.write(f+"\n")
        fo2.close()


if (args.dataset):
    print(args.dataset)
    create_file(args.dataset)

if (args.ml):
    s = DameSexmachine()
    if (args.ml == "nltk"):
        s.classifier()
    elif (args.ml == "sgd"):
        s.sgd()
    elif (args.ml == "svc"):
        s.svc()
    elif (args.ml == "gaussianNB"):
        s.gaussianNB()
    elif (args.ml == "multinomialNB"):
        s.multinomialNB()
    elif (args.ml == "bernoulliNB"):
        s.bernoulliNB()
    elif (args.ml == "forest"):
        s.forest()
    elif (args.ml == "adaboost"):