コード例 #1
0
ファイル: dame_all.py プロジェクト: jgbarah-tests/damegender
 def average(self, name, surname):
     r = 0
     count = 0
     avg = 0
     dgg = DameGenderGuesser()
     guess1 = int(dgg.guess(name, binary="True"))
     if (guess1 != 2):
         r = r + guess1
         count = count + 1
     if (self.config['DEFAULT']['genderapi'] == 'yes'):
         dga = DameGenderApi()
         guess2 = int(dga.guess(name, binary="True"))
         if (guess2 != 2):
             r = r + guess2
             count = count + 1
     if (self.config['DEFAULT']['genderize'] == 'yes'):
         dg = DameGenderize()
         guess3 = int(dg.guess(name, binary="True"))
         if (guess3 != 2):
             r = r + guess3
             count = count + 1
     if (self.config['DEFAULT']['namsor'] == 'yes'):
         dn = DameNamsor()
         guess4 = int(dn.guess(str(name), str(surname), binary="True"))
         if (guess4 != 2):
             r = r + guess4
             count = count + 1
     avg = r / count
     return avg
コード例 #2
0
ファイル: test_dame_namsor.py プロジェクト: uritau/damegender
 def test_dame_namsor_get_method_returns_correct_result(self):
     g = DameNamsor()
     self.assertEqual(['male', -1.0], g.get("David", "Arroyo",
                                            binary=False))
     self.assertEqual(['male', -1.0], g.get("David", "Arroyo", binary=True))
     self.assertEqual(['female', 1.0], g.get("Karen", "Arroyo",
                                             binary=True))
コード例 #3
0
 def test_dame_namsor_accuracy_score_dame(self):
     dn = DameNamsor()
     score1 = dn.accuracy_score_dame([1, 1], [1, 1])
     self.assertEqual(score1, 1)
     score2 = dn.accuracy_score_dame([1, 1, 1, 0], [1, 1, 2, 0])
     self.assertEqual(score2, 0.75)
     score3 = dn.accuracy_score_dame([1, 1, 1, 1, 2, 1], [1, 1, 1, 1, 2, 1])
     self.assertEqual(score3, 1)
コード例 #4
0
ファイル: test_dame_namsor.py プロジェクト: uritau/damegender
 def test_dame_namsor_accuracy_score_dame_method_returns_correct_result(
         self):
     g = DameNamsor()
     score1 = g.accuracy_score_dame([1, 1], [1, 1])
     self.assertEqual(score1, 1)
     score2 = g.accuracy_score_dame([1, 1, 1, 0], [1, 1, 2, 0])
     self.assertEqual(score2, 0.75)
     score3 = g.accuracy_score_dame([1, 1, 1, 1, 2, 1], [1, 1, 1, 1, 2, 1])
     self.assertEqual(score3, 1)
コード例 #5
0
 def test_dame_namsor_download(self):
     dn = DameNamsor()
     du = DameUtils()
     path1 = "files/names/min.csv"
     if (dn.config['DEFAULT']['namsor'] == 'yes'):
         g = dn.download(path1)
         self.assertTrue(
             os.path.isfile("files/names/namsor" + du.path2file(path1) +
                            ".json"))
コード例 #6
0
 def test_dame_namsor_json2guess_list(self):
     dn = DameNamsor()
     j2gl = dn.json2guess_list(
         jsonf="files/names/namsorfiles_names_min.csv.json", binary=False)
     self.assertEqual(['male', 'male', 'male', 'male', 'male', 'female'],
                      j2gl)
     j2gl = dn.json2guess_list(
         jsonf="files/names/namsorfiles_names_min.csv.json", binary=True)
     self.assertEqual([1, 1, 1, 1, 1, 0], j2gl)
コード例 #7
0
 def test_dame_namsor_get(self):
     dn = DameNamsor()
     if (dn.config['DEFAULT']['namsor'] == 'yes'):
         l1 = dn.get("David", "Arroyo", binary=False)
         self.assertEqual(['male', -1.0], [l1[0], round(l1[1])])
         l2 = dn.get("David", "Arroyo", binary=True)
         self.assertEqual(['male', -1.0], [l2[0], round(l2[1])])
         l3 = dn.get("Karen", "Arroyo", binary=True)
         self.assertEqual(['female', 1.0], [l3[0], round(l3[1])])
コード例 #8
0
ファイル: test_dame_namsor.py プロジェクト: uritau/damegender
 def test_dame_namsor_gender_list_method_returns_correct_result(self):
     g = DameNamsor()
     gl = g.gender_list()
     self.assertEqual(
         gl,
         [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1])
     self.assertEqual(len(gl), 21)
     self.assertEqual(g.females, 3)
     self.assertEqual(g.males, 16)
     self.assertEqual(g.unknown, 2)
コード例 #9
0
 def test_dame_namsor_gender_list(self):
     dn = DameNamsor()
     gl = dn.gender_list(path="files/names/partial.csv")
     self.assertEqual(
         gl,
         [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1])
     self.assertEqual(len(gl), 21)
     self.assertEqual(dn.females, 3)
     self.assertEqual(dn.males, 16)
     self.assertEqual(dn.unknown, 2)
コード例 #10
0
ファイル: test_dame_namsor.py プロジェクト: uritau/damegender
 def test_dame_namsor_guess_list_method_returns_correct_result(self):
     g = DameNamsor()
     self.assertEqual([
         'male', 'male', 'male', 'male', 'male', 'male', 'female', 'female',
         'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male',
         'male', 'male', 'female', 'male', 'male'
     ], g.guess_list(path="files/names/partial.csv", binary=False))
     self.assertEqual(
         [1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1],
         g.guess_list(path="files/names/partial.csv", binary=True))
コード例 #11
0
 def test_dame_namsor_guess_list(self):
     dn = DameNamsor()
     if (dn.config['DEFAULT']['namsor'] == 'yes'):
         self.assertEqual([
             'male', 'male', 'male', 'male', 'male', 'male', 'female',
             'female', 'male', 'male', 'male', 'male', 'male', 'male',
             'male', 'male', 'male', 'male', 'female', 'male', 'male'
         ], dn.guess_list(path="files/names/partial.csv", binary=False))
         self.assertEqual([
             1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1
         ], dn.guess_list(path="files/names/partial.csv", binary=True))
コード例 #12
0
ファイル: confusion.py プロジェクト: uritau/damegender
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--csv', default="files/names/min.csv")
parser.add_argument('--api', default="all")
parser.add_argument('--ml', default="nltk", choices=['nltk', 'svc', 'sgd', 'gaussianNB', 'multinomialNB', 'bernoulliNB'])
parser.add_argument('--dimensions', default="3x2", choices=['3x2', '3x3'])
args = parser.parse_args()


print("A confusion matrix C is such that Ci,j is equal to the number of observations known to be in group i but predicted to be in group j.")
print("If the classifier is nice, the diagonal is high because there are true positives")


if (args.api == "all"):
    dn = DameNamsor()
    if (args.dimensions == "3x2"):
        print("Namsor confusion matrix:\n")
        dn.print_confusion_matrix_dame(path=args.csv)
    elif (args.dimensions == "3x3"):
        namsor_confusion_matrix = dn.confusion_matrix(path=args.csv)
        print("Namsor confusion matrix:\n %s" % namsor_confusion_matrix)

    dg = DameGenderize()
    if (args.dimensions == "3x2"):
        print("Genderize confusion matrix:\n")
        dg.print_confusion_matrix_dame(path=args.csv)
    elif (args.dimensions == "3x3"):
        dg = DameGenderize()
        genderize_confusion_matrix = dg.confusion_matrix(path=args.csv)
        print("Genderize confusion matrix:\n %s" % genderize_confusion_matrix)
コード例 #13
0
ファイル: errors.py プロジェクト: jgbarah-tests/damegender
    gl1 = d.gender_list(path=args.csv)
    if (os.path.isfile(args.jsondownloaded)):
        gl2 = d.json2guess_list(jsonf=args.jsondownloaded, binary=True)
    else:
        gl2 = d.guess_list(path=args.csv, binary=True)
#    gl2 = d.guess_list(path=args.csv, binary=True)
    ec = d.error_coded(gl1, gl2)
    print("+ The error code: %s" % ec)
    ecwa = d.error_coded_without_na(gl1, gl2)
    print("+ The error code without na: %s" % ecwa)
    naCoded = d.na_coded(gl1, gl2)
    print("+ The na coded: %s" % naCoded)
    egb = d.error_gender_bias(gl1, gl2)
    print("+ The error gender bias: %s" % egb)
elif (args.api == "namsor"):
    d = DameNamsor()
    print("Namsor with %s has: " % args.csv)
    gl1 = d.gender_list(path=args.csv)
    if (os.path.isfile(args.jsondownloaded)):
        gl2 = d.json2guess_list(jsonf=args.jsondownloaded, binary=True)
    else:
        gl2 = d.guess_list(path=args.csv, binary=True)
    ec = d.error_coded(gl1, gl2)
    print("+ The error code: %s" % ec)
    ecwa = d.error_coded_without_na(gl1, gl2)
    print("+ The error code without na: %s" % ecwa)
    naCoded = d.na_coded(gl1, gl2)
    print("+ The na coded: %s" % naCoded)
    egb = d.error_gender_bias(gl1, gl2)
    print("+ The error gender bias: %s" % egb)
コード例 #14
0
 def test_dame_namsor_features_list(self):
     dn = DameNamsor()
     fl = dn.features_list()
     self.assertTrue(len(fl) > 20)
コード例 #15
0
 def test_dame_namsor_scale(self):
     dn = DameNamsor()
     if (dn.config['DEFAULT']['namsor'] == 'yes'):
         self.assertEqual(-1.0, round(dn.scale("David", "Arroyo")))
コード例 #16
0
 def test_dame_namsor_gender_guess(self):
     dn = DameNamsor()
     if (dn.config['DEFAULT']['namsor'] == 'yes'):
         self.assertEqual(1, dn.guess("David", "Arroyo", binary=True))
         self.assertEqual(0, dn.guess("Andrea", "Arroyo", binary=True))
         self.assertEqual(1, dn.guess("Asdf", "qwer", binary=True))
コード例 #17
0
ファイル: confusion.py プロジェクト: jgbarah-tests/damegender
parser.add_argument('--csv', type=str, required=True, help="files/names/min.csv")
parser.add_argument('--jsondownloaded', default="", help="files/names/genderapifiles_names_min.csv.json")
parser.add_argument('--api', required=True, choices=['namsor', 'genderize', 'genderapi', 'genderguesser', 'damegender', 'nameapi', 'all'])
#parser.add_argument('--ml', required=True, choices=['nltk', 'svc', 'sgd', 'gaussianNB', 'multinomialNB', 'bernoulliNB', 'forest', 'tree', 'mlp', 'adaboost'])
parser.add_argument('--reverse', default=False, action="store_true")
parser.add_argument('--dimensions', default="2x3", choices=['1x1', '1x2', '1x3', '2x1', '2x2', '2x3', '3x1', '3x2', '3x3'])
args = parser.parse_args()

print("A confusion matrix C is such that Ci,j is equal to the number of observations known to be in group i but predicted to be in group j.")
print("If the classifier is nice, the diagonal is high because there are true positives")


if (args.api == "all"):
    dg = Gender()
    if (dg.config['DEFAULT']['namsor'] == 'yes'):
        dn = DameNamsor()
        dn.pretty_cm(path=args.csv, jsonf=args.jsondownloaded, reverse=args.reverse, dimensions=args.dimensions, api=args.api.title())

    if (dg.config['DEFAULT']['genderize'] == 'yes'):
        dg = DameGenderize()
        dg.pretty_cm(path=args.csv, jsonf=args.jsondownloaded, reverse=args.reverse, dimensions=args.dimensions, api=args.api.title())
#        dg.print_confusion_matrix_gender(path=args.csv, dimensions=args.dimensions)

    if (dg.config['DEFAULT']['genderapi'] == 'yes'):
        dga = DameGenderApi()
        dga.pretty_cm(path=args.csv, jsonf=args.jsondownloaded, reverse=args.reverse, dimensions=args.dimensions, api=args.api.title())

    dgg = DameGenderGuesser()
    dgg.print_confusion_matrix_gender(path=args.csv, dimensions=args.dimensions)

    ds = DameSexmachine()
コード例 #18
0
 def test_dame_namsor_getGeo(self):
     dn = DameNamsor()
     if (dn.config['DEFAULT']['namsor'] == 'yes'):
         l1 = dn.get("David", "Arroyo", binary=False)
         self.assertEqual(['male', -1.0], [l1[0], round(l1[1])])
コード例 #19
0
 def test_dame_namsor_json2names(self):
     dn = DameNamsor()
     l = dn.json2names(jsonf="files/names/namsorfiles_names_min.csv.json")
     self.assertEqual(
         ['Pierre', 'Raul', 'Adriano', 'Ralf', 'Guillermo', 'Sabina'], l)
コード例 #20
0
 def test_dame_namsor_init(self):
     g = DameNamsor()
     self.assertEqual(g.males, 0)
     self.assertEqual(g.females, 0)
     self.assertEqual(g.unknown, 0)
コード例 #21
0
ファイル: test_dame_namsor.py プロジェクト: uritau/damegender
 def test_dame_namsor_init_method_returns_correct_result(self):
     g = DameNamsor()
     self.assertEqual(g.males, 0)
     self.assertEqual(g.females, 0)
     self.assertEqual(g.unknown, 0)
コード例 #22
0
args = parser.parse_args()

if (args.api == 'genderize'):
    dg = DameGenderize()
    text1 = dg.download(path=args.csv, surnames=args.surnames)
elif (args.api == 'genderapi'):
    dga = DameGenderApi()
    if (dga.config['DEFAULT']['genderapi'] == 'yes'):
        if (dga.apikey_limit_exceeded_p() == False):
            text1 = dga.download(path=args.csv)
        elif (dga.apikey_count_requests() < len(dga.csv2names(args.csv))):
            print("You don't have enough requests with this api key")
        elif (dga.apikey_count_requests() >= len(dga.csv2names(args.csv))):
            text1 = dga.download(path=args.csv)
        else:
            print("You have not money with this api key")
    else:
        print("You must enable genderapi in config.cfg")
elif (args.api == 'namsor'):
    dn = DameNamsor()
    if (dn.config['DEFAULT']['namsor'] == 'yes'):
        text1 = dn.download(path=args.csv)
    else:
        print("You must enable namsor in config.cfg")
elif (args.api == 'nameapi'):
    dna = DameNameapi()
    if (dna.config['DEFAULT']['nameapi'] == 'yes'):
        text1 = dna.download(path=args.csv)
    else:
        print("You must enable nameapi in config.cfg")
コード例 #23
0
                    choices=['accuracy', 'precision', 'recall', 'f1score'])
parser.add_argument('--api',
                    required=True,
                    choices=[
                        'customsearch', 'namsor', 'genderize', 'genderguesser',
                        'damegender', 'genderapi', 'nameapi', 'all'
                    ])
args = parser.parse_args()

du = DameUtils()

if (args.api == "all"):
    dg = Gender()

    if (dg.config['DEFAULT']['namsor'] == 'yes'):
        dn = DameNamsor()
        dn.pretty_gg_list(path=args.csv,
                          jsonf=args.jsondownloaded,
                          measure=args.measure,
                          api='Namsor')

    if (dg.config['DEFAULT']['genderize'] == 'yes'):
        dg = DameGenderize()
        dg.pretty_gg_list(path=args.csv,
                          jsonf=args.jsondownloaded,
                          measure=args.measure,
                          api='Genderize')

    dgg = DameGenderGuesser()
    dgg.pretty_gg_list(path=args.csv,
                       jsonf=args.jsondownloaded,
コード例 #24
0
ファイル: api2gender.py プロジェクト: uritau/damegender
du = DameUtils()

if (len(sys.argv) > 1):
    if (args.api == "genderguesser"):
        dgg = DameGenderGuesser()
        print(dgg.guess(args.name))
    elif (args.api == "genderapi"):
        dga = DameGenderApi()
        print(dga.guess(args.name, binary=False))
        print("accuracy: " + str(dga.accuracy(args.name)))
    elif (args.api == "genderize"):
        dg = DameGenderize()
        print(dg.guess(args.name))
        print("probability: " + str(dg.prob(args.name)))
    elif (args.api == "namsor"):
        dn = DameNamsor()
        if (du.is_not_blank(args.surname)):
            print(dn.guess(str(args.name), str(args.surname)))
            print("scale: " + str(dn.scale(str(args.name), str(args.surname))))
        else:
            print("Surname is required in namsor api")
    elif (args.api == "nameapi"):
        dn = DameNameapi()
        print(dn.guess(str(args.name), str(args.surname)))
        print("confidence: " + str(dn.confidence(str(args.name), str(args.surname))))
    elif (args.api == "average"):
        da = DameAll()
        average = da.average(args.name, args.surname)
        print("average: " + str(average))
コード例 #25
0
ファイル: test_dame_namsor.py プロジェクト: uritau/damegender
 def test_dame_namsor_scale_method_returns_correct_result(self):
     g = DameNamsor()
     self.assertEqual(-1.0, g.scale("David", "Arroyo"))
コード例 #26
0
ファイル: test_dame_namsor.py プロジェクト: uritau/damegender
 def test_dame_namsor_gender_guess_method_returns_correct_result(self):
     g = DameNamsor()
     self.assertEqual(1, g.guess("David", "Arroyo", binary=True))
     self.assertEqual(0, g.guess("Andrea", "Arroyo", binary=True))
     self.assertEqual(0, g.guess("Asdf", "qwer", binary=True))
コード例 #27
0
ファイル: test_dame_namsor.py プロジェクト: uritau/damegender
 def test_dame_namsor_features_list_method_returns_correct_result(self):
     g = DameNamsor()
     fl = g.features_list()
     self.assertTrue(len(fl) > 20)
コード例 #28
0
ファイル: accuracy.py プロジェクト: uritau/damegender
from app.dame_genderize import DameGenderize
from app.dame_genderguesser import DameGenderGuesser
from app.dame_genderapi import DameGenderApi
from app.dame_nameapi import DameNameapi
from app.dame_customsearch import DameCustomsearch

import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--csv', default="files/names/min.csv")
parser.add_argument('--api', default="damegender", choices=['customsearch', 'namsor', 'genderize', 'genderguesser', 'damegender', 'genderapi', 'nameapi'])
parser.add_argument('--ml', default="nltk", choices=['nltk', 'svc', 'sgd', 'gaussianNB', 'multinomialNB', 'bernoulliNB'])
args = parser.parse_args()

if (args.api == "all"):

    dn = DameNamsor()
    print("################### Namsor!!")
    gl = dn.gender_list(path=args.csv)
    print("Gender list: " + str(gl))
    sl = dn.guess_list(path=args.csv, binary=True)
    print("Guess list:  " +str(sl))

    namsor_accuracy = dn.accuracy_score_dame(gl, sl)

    #print(dn.accuracy_score_dame([1, 1, 1, 1, 2, 1], [1, 1, 1, 1, 1, 1]))
    #namsor_accuracy = dn.accuracy_score_dame([1, 1, 1, 1, 2, 1], [1, 1, 1, 1, 2, 1])

    print("Namsor accuracy: %s" % namsor_accuracy)

    dg = DameGenderize()
    print("################### Genderize!!")
コード例 #29
0
ファイル: errors.py プロジェクト: uritau/damegender
    naCoded = dn.na_coded(gl1, gl2)
    print("+ The na coded: %s" %  naCoded)
    egb = dn.error_gender_bias(gl1, gl2)
    print("+ The error gender bias: %s" %  egb)
elif (args.api == "nameapi"):
    dn = DameNameapi()
    print("Nameapi with %s has: " % args.csv)
    gl1 = dn.gender_list(path=args.csv)
    gl2 = dn.guess_list(path=args.csv, binary=True)
    ec = dn.error_coded(gl1, gl2)
    print("+ The error code: %s" % ec)
    ecwa = dn.error_coded_without_na(gl1, gl2)
    print("+ The error code without na: %s" %  ecwa)
    naCoded = dn.na_coded(gl1, gl2)
    print("+ The na coded: %s" %  naCoded)
    egb = dn.error_gender_bias(gl1, gl2)
    print("+ The error gender bias: %s" %  egb)
elif (args.api == "namsor"):
    dn = DameNamsor()
    print("Namsor with %s has: " % args.csv)
    gl1 = dn.gender_list(path=args.csv)
    gl2 = dn.guess_list(path=args.csv, binary=True)
    ec = dn.error_coded(gl1, gl2)
    print("+ The error code: %s" % ec)
    ecwa = dn.error_coded_without_na(gl1, gl2)
    print("+ The error code without na: %s" %  ecwa)
    naCoded = dn.na_coded(gl1, gl2)
    print("+ The na coded: %s" %  naCoded)
    egb = dn.error_gender_bias(gl1, gl2)
    print("+ The error gender bias: %s" %  egb)