def average(self, name, surname): r = 0 count = 0 avg = 0 dgg = DameGenderGuesser() guess1 = int(dgg.guess(name, binary="True")) if (guess1 != 2): r = r + guess1 count = count + 1 if (self.config['DEFAULT']['genderapi'] == 'yes'): dga = DameGenderApi() guess2 = int(dga.guess(name, binary="True")) if (guess2 != 2): r = r + guess2 count = count + 1 if (self.config['DEFAULT']['genderize'] == 'yes'): dg = DameGenderize() guess3 = int(dg.guess(name, binary="True")) if (guess3 != 2): r = r + guess3 count = count + 1 if (self.config['DEFAULT']['namsor'] == 'yes'): dn = DameNamsor() guess4 = int(dn.guess(str(name), str(surname), binary="True")) if (guess4 != 2): r = r + guess4 count = count + 1 avg = r / count return avg
def test_dame_namsor_get_method_returns_correct_result(self): g = DameNamsor() self.assertEqual(['male', -1.0], g.get("David", "Arroyo", binary=False)) self.assertEqual(['male', -1.0], g.get("David", "Arroyo", binary=True)) self.assertEqual(['female', 1.0], g.get("Karen", "Arroyo", binary=True))
def test_dame_namsor_accuracy_score_dame(self): dn = DameNamsor() score1 = dn.accuracy_score_dame([1, 1], [1, 1]) self.assertEqual(score1, 1) score2 = dn.accuracy_score_dame([1, 1, 1, 0], [1, 1, 2, 0]) self.assertEqual(score2, 0.75) score3 = dn.accuracy_score_dame([1, 1, 1, 1, 2, 1], [1, 1, 1, 1, 2, 1]) self.assertEqual(score3, 1)
def test_dame_namsor_accuracy_score_dame_method_returns_correct_result( self): g = DameNamsor() score1 = g.accuracy_score_dame([1, 1], [1, 1]) self.assertEqual(score1, 1) score2 = g.accuracy_score_dame([1, 1, 1, 0], [1, 1, 2, 0]) self.assertEqual(score2, 0.75) score3 = g.accuracy_score_dame([1, 1, 1, 1, 2, 1], [1, 1, 1, 1, 2, 1]) self.assertEqual(score3, 1)
def test_dame_namsor_download(self): dn = DameNamsor() du = DameUtils() path1 = "files/names/min.csv" if (dn.config['DEFAULT']['namsor'] == 'yes'): g = dn.download(path1) self.assertTrue( os.path.isfile("files/names/namsor" + du.path2file(path1) + ".json"))
def test_dame_namsor_json2guess_list(self): dn = DameNamsor() j2gl = dn.json2guess_list( jsonf="files/names/namsorfiles_names_min.csv.json", binary=False) self.assertEqual(['male', 'male', 'male', 'male', 'male', 'female'], j2gl) j2gl = dn.json2guess_list( jsonf="files/names/namsorfiles_names_min.csv.json", binary=True) self.assertEqual([1, 1, 1, 1, 1, 0], j2gl)
def test_dame_namsor_get(self): dn = DameNamsor() if (dn.config['DEFAULT']['namsor'] == 'yes'): l1 = dn.get("David", "Arroyo", binary=False) self.assertEqual(['male', -1.0], [l1[0], round(l1[1])]) l2 = dn.get("David", "Arroyo", binary=True) self.assertEqual(['male', -1.0], [l2[0], round(l2[1])]) l3 = dn.get("Karen", "Arroyo", binary=True) self.assertEqual(['female', 1.0], [l3[0], round(l3[1])])
def test_dame_namsor_gender_list_method_returns_correct_result(self): g = DameNamsor() gl = g.gender_list() self.assertEqual( gl, [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1]) self.assertEqual(len(gl), 21) self.assertEqual(g.females, 3) self.assertEqual(g.males, 16) self.assertEqual(g.unknown, 2)
def test_dame_namsor_gender_list(self): dn = DameNamsor() gl = dn.gender_list(path="files/names/partial.csv") self.assertEqual( gl, [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1]) self.assertEqual(len(gl), 21) self.assertEqual(dn.females, 3) self.assertEqual(dn.males, 16) self.assertEqual(dn.unknown, 2)
def test_dame_namsor_guess_list_method_returns_correct_result(self): g = DameNamsor() self.assertEqual([ 'male', 'male', 'male', 'male', 'male', 'male', 'female', 'female', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'female', 'male', 'male' ], g.guess_list(path="files/names/partial.csv", binary=False)) self.assertEqual( [1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1], g.guess_list(path="files/names/partial.csv", binary=True))
def test_dame_namsor_guess_list(self): dn = DameNamsor() if (dn.config['DEFAULT']['namsor'] == 'yes'): self.assertEqual([ 'male', 'male', 'male', 'male', 'male', 'male', 'female', 'female', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'female', 'male', 'male' ], dn.guess_list(path="files/names/partial.csv", binary=False)) self.assertEqual([ 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1 ], dn.guess_list(path="files/names/partial.csv", binary=True))
import argparse parser = argparse.ArgumentParser() parser.add_argument('--csv', default="files/names/min.csv") parser.add_argument('--api', default="all") parser.add_argument('--ml', default="nltk", choices=['nltk', 'svc', 'sgd', 'gaussianNB', 'multinomialNB', 'bernoulliNB']) parser.add_argument('--dimensions', default="3x2", choices=['3x2', '3x3']) args = parser.parse_args() print("A confusion matrix C is such that Ci,j is equal to the number of observations known to be in group i but predicted to be in group j.") print("If the classifier is nice, the diagonal is high because there are true positives") if (args.api == "all"): dn = DameNamsor() if (args.dimensions == "3x2"): print("Namsor confusion matrix:\n") dn.print_confusion_matrix_dame(path=args.csv) elif (args.dimensions == "3x3"): namsor_confusion_matrix = dn.confusion_matrix(path=args.csv) print("Namsor confusion matrix:\n %s" % namsor_confusion_matrix) dg = DameGenderize() if (args.dimensions == "3x2"): print("Genderize confusion matrix:\n") dg.print_confusion_matrix_dame(path=args.csv) elif (args.dimensions == "3x3"): dg = DameGenderize() genderize_confusion_matrix = dg.confusion_matrix(path=args.csv) print("Genderize confusion matrix:\n %s" % genderize_confusion_matrix)
gl1 = d.gender_list(path=args.csv) if (os.path.isfile(args.jsondownloaded)): gl2 = d.json2guess_list(jsonf=args.jsondownloaded, binary=True) else: gl2 = d.guess_list(path=args.csv, binary=True) # gl2 = d.guess_list(path=args.csv, binary=True) ec = d.error_coded(gl1, gl2) print("+ The error code: %s" % ec) ecwa = d.error_coded_without_na(gl1, gl2) print("+ The error code without na: %s" % ecwa) naCoded = d.na_coded(gl1, gl2) print("+ The na coded: %s" % naCoded) egb = d.error_gender_bias(gl1, gl2) print("+ The error gender bias: %s" % egb) elif (args.api == "namsor"): d = DameNamsor() print("Namsor with %s has: " % args.csv) gl1 = d.gender_list(path=args.csv) if (os.path.isfile(args.jsondownloaded)): gl2 = d.json2guess_list(jsonf=args.jsondownloaded, binary=True) else: gl2 = d.guess_list(path=args.csv, binary=True) ec = d.error_coded(gl1, gl2) print("+ The error code: %s" % ec) ecwa = d.error_coded_without_na(gl1, gl2) print("+ The error code without na: %s" % ecwa) naCoded = d.na_coded(gl1, gl2) print("+ The na coded: %s" % naCoded) egb = d.error_gender_bias(gl1, gl2) print("+ The error gender bias: %s" % egb)
def test_dame_namsor_features_list(self): dn = DameNamsor() fl = dn.features_list() self.assertTrue(len(fl) > 20)
def test_dame_namsor_scale(self): dn = DameNamsor() if (dn.config['DEFAULT']['namsor'] == 'yes'): self.assertEqual(-1.0, round(dn.scale("David", "Arroyo")))
def test_dame_namsor_gender_guess(self): dn = DameNamsor() if (dn.config['DEFAULT']['namsor'] == 'yes'): self.assertEqual(1, dn.guess("David", "Arroyo", binary=True)) self.assertEqual(0, dn.guess("Andrea", "Arroyo", binary=True)) self.assertEqual(1, dn.guess("Asdf", "qwer", binary=True))
parser.add_argument('--csv', type=str, required=True, help="files/names/min.csv") parser.add_argument('--jsondownloaded', default="", help="files/names/genderapifiles_names_min.csv.json") parser.add_argument('--api', required=True, choices=['namsor', 'genderize', 'genderapi', 'genderguesser', 'damegender', 'nameapi', 'all']) #parser.add_argument('--ml', required=True, choices=['nltk', 'svc', 'sgd', 'gaussianNB', 'multinomialNB', 'bernoulliNB', 'forest', 'tree', 'mlp', 'adaboost']) parser.add_argument('--reverse', default=False, action="store_true") parser.add_argument('--dimensions', default="2x3", choices=['1x1', '1x2', '1x3', '2x1', '2x2', '2x3', '3x1', '3x2', '3x3']) args = parser.parse_args() print("A confusion matrix C is such that Ci,j is equal to the number of observations known to be in group i but predicted to be in group j.") print("If the classifier is nice, the diagonal is high because there are true positives") if (args.api == "all"): dg = Gender() if (dg.config['DEFAULT']['namsor'] == 'yes'): dn = DameNamsor() dn.pretty_cm(path=args.csv, jsonf=args.jsondownloaded, reverse=args.reverse, dimensions=args.dimensions, api=args.api.title()) if (dg.config['DEFAULT']['genderize'] == 'yes'): dg = DameGenderize() dg.pretty_cm(path=args.csv, jsonf=args.jsondownloaded, reverse=args.reverse, dimensions=args.dimensions, api=args.api.title()) # dg.print_confusion_matrix_gender(path=args.csv, dimensions=args.dimensions) if (dg.config['DEFAULT']['genderapi'] == 'yes'): dga = DameGenderApi() dga.pretty_cm(path=args.csv, jsonf=args.jsondownloaded, reverse=args.reverse, dimensions=args.dimensions, api=args.api.title()) dgg = DameGenderGuesser() dgg.print_confusion_matrix_gender(path=args.csv, dimensions=args.dimensions) ds = DameSexmachine()
def test_dame_namsor_getGeo(self): dn = DameNamsor() if (dn.config['DEFAULT']['namsor'] == 'yes'): l1 = dn.get("David", "Arroyo", binary=False) self.assertEqual(['male', -1.0], [l1[0], round(l1[1])])
def test_dame_namsor_json2names(self): dn = DameNamsor() l = dn.json2names(jsonf="files/names/namsorfiles_names_min.csv.json") self.assertEqual( ['Pierre', 'Raul', 'Adriano', 'Ralf', 'Guillermo', 'Sabina'], l)
def test_dame_namsor_init(self): g = DameNamsor() self.assertEqual(g.males, 0) self.assertEqual(g.females, 0) self.assertEqual(g.unknown, 0)
def test_dame_namsor_init_method_returns_correct_result(self): g = DameNamsor() self.assertEqual(g.males, 0) self.assertEqual(g.females, 0) self.assertEqual(g.unknown, 0)
args = parser.parse_args() if (args.api == 'genderize'): dg = DameGenderize() text1 = dg.download(path=args.csv, surnames=args.surnames) elif (args.api == 'genderapi'): dga = DameGenderApi() if (dga.config['DEFAULT']['genderapi'] == 'yes'): if (dga.apikey_limit_exceeded_p() == False): text1 = dga.download(path=args.csv) elif (dga.apikey_count_requests() < len(dga.csv2names(args.csv))): print("You don't have enough requests with this api key") elif (dga.apikey_count_requests() >= len(dga.csv2names(args.csv))): text1 = dga.download(path=args.csv) else: print("You have not money with this api key") else: print("You must enable genderapi in config.cfg") elif (args.api == 'namsor'): dn = DameNamsor() if (dn.config['DEFAULT']['namsor'] == 'yes'): text1 = dn.download(path=args.csv) else: print("You must enable namsor in config.cfg") elif (args.api == 'nameapi'): dna = DameNameapi() if (dna.config['DEFAULT']['nameapi'] == 'yes'): text1 = dna.download(path=args.csv) else: print("You must enable nameapi in config.cfg")
choices=['accuracy', 'precision', 'recall', 'f1score']) parser.add_argument('--api', required=True, choices=[ 'customsearch', 'namsor', 'genderize', 'genderguesser', 'damegender', 'genderapi', 'nameapi', 'all' ]) args = parser.parse_args() du = DameUtils() if (args.api == "all"): dg = Gender() if (dg.config['DEFAULT']['namsor'] == 'yes'): dn = DameNamsor() dn.pretty_gg_list(path=args.csv, jsonf=args.jsondownloaded, measure=args.measure, api='Namsor') if (dg.config['DEFAULT']['genderize'] == 'yes'): dg = DameGenderize() dg.pretty_gg_list(path=args.csv, jsonf=args.jsondownloaded, measure=args.measure, api='Genderize') dgg = DameGenderGuesser() dgg.pretty_gg_list(path=args.csv, jsonf=args.jsondownloaded,
du = DameUtils() if (len(sys.argv) > 1): if (args.api == "genderguesser"): dgg = DameGenderGuesser() print(dgg.guess(args.name)) elif (args.api == "genderapi"): dga = DameGenderApi() print(dga.guess(args.name, binary=False)) print("accuracy: " + str(dga.accuracy(args.name))) elif (args.api == "genderize"): dg = DameGenderize() print(dg.guess(args.name)) print("probability: " + str(dg.prob(args.name))) elif (args.api == "namsor"): dn = DameNamsor() if (du.is_not_blank(args.surname)): print(dn.guess(str(args.name), str(args.surname))) print("scale: " + str(dn.scale(str(args.name), str(args.surname)))) else: print("Surname is required in namsor api") elif (args.api == "nameapi"): dn = DameNameapi() print(dn.guess(str(args.name), str(args.surname))) print("confidence: " + str(dn.confidence(str(args.name), str(args.surname)))) elif (args.api == "average"): da = DameAll() average = da.average(args.name, args.surname) print("average: " + str(average))
def test_dame_namsor_scale_method_returns_correct_result(self): g = DameNamsor() self.assertEqual(-1.0, g.scale("David", "Arroyo"))
def test_dame_namsor_gender_guess_method_returns_correct_result(self): g = DameNamsor() self.assertEqual(1, g.guess("David", "Arroyo", binary=True)) self.assertEqual(0, g.guess("Andrea", "Arroyo", binary=True)) self.assertEqual(0, g.guess("Asdf", "qwer", binary=True))
def test_dame_namsor_features_list_method_returns_correct_result(self): g = DameNamsor() fl = g.features_list() self.assertTrue(len(fl) > 20)
from app.dame_genderize import DameGenderize from app.dame_genderguesser import DameGenderGuesser from app.dame_genderapi import DameGenderApi from app.dame_nameapi import DameNameapi from app.dame_customsearch import DameCustomsearch import argparse parser = argparse.ArgumentParser() parser.add_argument('--csv', default="files/names/min.csv") parser.add_argument('--api', default="damegender", choices=['customsearch', 'namsor', 'genderize', 'genderguesser', 'damegender', 'genderapi', 'nameapi']) parser.add_argument('--ml', default="nltk", choices=['nltk', 'svc', 'sgd', 'gaussianNB', 'multinomialNB', 'bernoulliNB']) args = parser.parse_args() if (args.api == "all"): dn = DameNamsor() print("################### Namsor!!") gl = dn.gender_list(path=args.csv) print("Gender list: " + str(gl)) sl = dn.guess_list(path=args.csv, binary=True) print("Guess list: " +str(sl)) namsor_accuracy = dn.accuracy_score_dame(gl, sl) #print(dn.accuracy_score_dame([1, 1, 1, 1, 2, 1], [1, 1, 1, 1, 1, 1])) #namsor_accuracy = dn.accuracy_score_dame([1, 1, 1, 1, 2, 1], [1, 1, 1, 1, 2, 1]) print("Namsor accuracy: %s" % namsor_accuracy) dg = DameGenderize() print("################### Genderize!!")
naCoded = dn.na_coded(gl1, gl2) print("+ The na coded: %s" % naCoded) egb = dn.error_gender_bias(gl1, gl2) print("+ The error gender bias: %s" % egb) elif (args.api == "nameapi"): dn = DameNameapi() print("Nameapi with %s has: " % args.csv) gl1 = dn.gender_list(path=args.csv) gl2 = dn.guess_list(path=args.csv, binary=True) ec = dn.error_coded(gl1, gl2) print("+ The error code: %s" % ec) ecwa = dn.error_coded_without_na(gl1, gl2) print("+ The error code without na: %s" % ecwa) naCoded = dn.na_coded(gl1, gl2) print("+ The na coded: %s" % naCoded) egb = dn.error_gender_bias(gl1, gl2) print("+ The error gender bias: %s" % egb) elif (args.api == "namsor"): dn = DameNamsor() print("Namsor with %s has: " % args.csv) gl1 = dn.gender_list(path=args.csv) gl2 = dn.guess_list(path=args.csv, binary=True) ec = dn.error_coded(gl1, gl2) print("+ The error code: %s" % ec) ecwa = dn.error_coded_without_na(gl1, gl2) print("+ The error code without na: %s" % ecwa) naCoded = dn.na_coded(gl1, gl2) print("+ The na coded: %s" % naCoded) egb = dn.error_gender_bias(gl1, gl2) print("+ The error gender bias: %s" % egb)