def average(self, name, surname): r = 0 count = 0 avg = 0 dgg = DameGenderGuesser() guess1 = int(dgg.guess(name, binary="True")) if (guess1 != 2): r = r + guess1 count = count + 1 if (self.config['DEFAULT']['genderapi'] == 'yes'): dga = DameGenderApi() guess2 = int(dga.guess(name, binary="True")) if (guess2 != 2): r = r + guess2 count = count + 1 if (self.config['DEFAULT']['genderize'] == 'yes'): dg = DameGenderize() guess3 = int(dg.guess(name, binary="True")) if (guess3 != 2): r = r + guess3 count = count + 1 if (self.config['DEFAULT']['namsor'] == 'yes'): dn = DameNamsor() guess4 = int(dn.guess(str(name), str(surname), binary="True")) if (guess4 != 2): r = r + guess4 count = count + 1 avg = r / count return avg
def test_dame_genderize_get(self): dg = DameGenderize() if (dg.config['DEFAULT']['genderize'] == 'yes'): string1 = dg.get("peter") self.assertEqual(string1, {'probability': 0.99, 'count': 165452, 'name': 'peter', 'gender': 'male'}) string2 = dg.get(name="peter", surname="smith", country_id="US") self.assertEqual(string2, {'count': 6521, 'country_id': 'US', 'gender': 'male', 'name': 'peter', 'probability': 0.99})
def test_dame_genderize_get2to10(self): dg = DameGenderize() if (dg.config['DEFAULT']['genderize'] == 'yes'): string1 = dg.get2to10(["peter", "lois", "stevie"]) self.assertEqual(string1, [{'count': 165452, 'gender': 'male', 'name': 'peter', 'probability': 0.99}, {'count': 2510, 'gender': 'female', 'name': 'lois', 'probability': 0.58}, {'count': 2568, 'gender': 'male', 'name': 'stevie', 'probability': 0.87}]) string2 = dg.get2to10(["peter", "lois", "stevie", "john", "paul", "mike", "mary", "anna"]) self.assertEqual(string2, [{"name":"peter","gender":"male","probability":0.99,"count":165452},{"name":"lois","gender":"female","probability":0.58,"count":2510},{"name":"stevie","gender":"male","probability":0.87,"count":2568},{"name":"john","gender":"male","probability":0.99,"count":218952},{"name":"paul","gender":"male","probability":0.99,"count":148099},{"name":"mike","gender":"male","probability":0.99,"count":109844},{"name":"mary","gender":"female","probability":0.99,"count":142684},{"name":"anna","gender":"female","probability":0.98,"count":383713}])
def test_dame_genderize_guess_list(self): dg = DameGenderize() gl1 = dg.json2guess_list(jsonf="files/names/genderizefiles_names_min.csv.json", binary=True) self.assertEqual(gl1, [1, 1, 1, 1, 1, 0]) gl2 = dg.json2guess_list(jsonf="files/names/genderizefiles_names_partialnoundefined.csv.json", binary=True) self.assertEqual(gl2, [1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1]) gl3 = dg.json2guess_list(jsonf="files/names/genderizefiles_names_allnoundefined0.csv.json", binary=True) self.assertEqual(gl3, [1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
def test_dame_genderize_gender_list(self): dg = DameGenderize() gl = dg.gender_list(path="files/names/partial.csv") self.assertEqual(gl, [1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1]) self.assertEqual(len(gl), 21) self.assertEqual(dg.females, 3) self.assertEqual(dg.males, 16) self.assertEqual(dg.unknown, 2)
def test_dame_genderize_guess_list(self): dg = DameGenderize() if (dg.config['DEFAULT']['genderize'] == 'yes'): self.assertEqual(['male', 'male', 'male', 'male', 'male', 'male', 'female', 'female', 'male', 'male'], dg.guess_list(path="files/names/partial.csv", binary=False)[0:10]) self.assertEqual([1, 1, 1, 1, 1, 1, 0, 0, 1, 1], dg.guess_list(path="files/names/partial.csv", binary=True)[0:10])
def test_dame_genderize_gender_list_method_returns_correct_result(self): dg = DameGenderize() if (dg.config['DEFAULT']['genderize'] == 'yes'): gl = dg.gender_list() self.assertEqual(gl, [ 1, 1, 1, 1, 2, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1 ]) self.assertEqual(len(gl), 21) self.assertEqual(dg.females, 3) self.assertEqual(dg.males, 16) self.assertEqual(dg.unknown, 2)
def test_dame_genderize_guess_file_method_returns_correct_result(self): dg = DameGenderize() if (dg.config['DEFAULT']['genderize'] == 'yes'): self.assertEqual(dg.guess("David", binary=True), 1)
args = parser.parse_args() du = DameUtils() if (args.api == "all"): dg = Gender() if (dg.config['DEFAULT']['namsor'] == 'yes'): dn = DameNamsor() dn.pretty_gg_list(path=args.csv, jsonf=args.jsondownloaded, measure=args.measure, api='Namsor') if (dg.config['DEFAULT']['genderize'] == 'yes'): dg = DameGenderize() dg.pretty_gg_list(path=args.csv, jsonf=args.jsondownloaded, measure=args.measure, api='Genderize') dgg = DameGenderGuesser() dgg.pretty_gg_list(path=args.csv, jsonf=args.jsondownloaded, measure=args.measure, api='Genderguesser') ds = DameSexmachine() ds.pretty_gg_list(path=args.csv, jsonf=args.jsondownloaded, measure=args.measure,
dn = DameNamsor() print("################### Namsor!!") gl = dn.gender_list(path=args.csv) print("Gender list: " + str(gl)) sl = dn.guess_list(path=args.csv, binary=True) print("Guess list: " +str(sl)) namsor_accuracy = dn.accuracy_score_dame(gl, sl) #print(dn.accuracy_score_dame([1, 1, 1, 1, 2, 1], [1, 1, 1, 1, 1, 1])) #namsor_accuracy = dn.accuracy_score_dame([1, 1, 1, 1, 2, 1], [1, 1, 1, 1, 2, 1]) print("Namsor accuracy: %s" % namsor_accuracy) dg = DameGenderize() print("################### Genderize!!") gl = dg.gender_list(path=args.csv) print("Gender list: " + str(gl)) sl = dg.guess_list(path=args.csv, binary=True) print("Guess list: " +str(gl)) genderize_accuracy = dg.accuracy_score_dame(gl,sl) print("Genderize accuracy: %s" % genderize_accuracy) dgg = DameGenderGuesser() print("################### GenderGuesser!!") gl = dgg.gender_list(path=args.csv) print("Gender list: " + str(gl)) sl = dgg.guess_list(path=args.csv, binary=True) print("Guess list: " +str(sl))
print("A confusion matrix C is such that Ci,j is equal to the number of observations known to be in group i but predicted to be in group j.") print("If the classifier is nice, the diagonal is high because there are true positives") if (args.api == "all"): dn = DameNamsor() if (args.dimensions == "3x2"): print("Namsor confusion matrix:\n") dn.print_confusion_matrix_dame(path=args.csv) elif (args.dimensions == "3x3"): namsor_confusion_matrix = dn.confusion_matrix(path=args.csv) print("Namsor confusion matrix:\n %s" % namsor_confusion_matrix) dg = DameGenderize() if (args.dimensions == "3x2"): print("Genderize confusion matrix:\n") dg.print_confusion_matrix_dame(path=args.csv) elif (args.dimensions == "3x3"): dg = DameGenderize() genderize_confusion_matrix = dg.confusion_matrix(path=args.csv) print("Genderize confusion matrix:\n %s" % genderize_confusion_matrix) dgg = DameGenderGuesser() if (args.dimensions == "3x2"): print("Gender Guesser confusion matrix:\n") dgg.print_confusion_matrix_dame(path=args.csv) elif (args.dimensions == "3x3"): genderguesser_confusion_matrix = dgg.confusion_matrix(path=args.csv) print("Gender Guesser confusion matrix:\n %s" % genderguesser_confusion_matrix)
parser.add_argument('--csv', type=str, required=True, default="files/names/min.csv", help='input file for names') parser.add_argument('--api', required=True, choices=['namsor', 'genderize', 'genderapi', 'nameapi']) parser.add_argument("--surnames", default=False, action="store_true", help="Flag to surnames") args = parser.parse_args() if (args.api == 'genderize'): dg = DameGenderize() text1 = dg.download(path=args.csv, surnames=args.surnames) elif (args.api == 'genderapi'): dga = DameGenderApi() if (dga.config['DEFAULT']['genderapi'] == 'yes'): if (dga.apikey_limit_exceeded_p() == False): text1 = dga.download(path=args.csv) elif (dga.apikey_count_requests() < len(dga.csv2names(args.csv))): print("You don't have enough requests with this api key") elif (dga.apikey_count_requests() >= len(dga.csv2names(args.csv))): text1 = dga.download(path=args.csv) else: print("You have not money with this api key") else: print("You must enable genderapi in config.cfg") elif (args.api == 'namsor'):
ec = d.error_coded(gl1, gl2) print("+ The error code: %s" % ec) ecwa = d.error_coded_without_na(gl1, gl2) print("+ The error code without na: %s" % ecwa) naCoded = d.na_coded(gl1, gl2) print("+ The na coded: %s" % naCoded) egb = d.error_gender_bias(gl1, gl2) print("+ The error gender bias: %s" % egb) elif ((args.api != "damegender") and (args.ml in [ 'svc', 'sgd', 'gaussianNB', 'multinomialNB', 'bernoulliNB', 'forest', 'xgboost' ])): print("The machine learning prediction is only for damegender") elif (args.api == "genderize"): d = DameGenderize() print("Genderize with %s has: " % args.csv) gl1 = d.gender_list(path=args.csv) if (os.path.isfile(args.jsondownloaded)): gl2 = d.json2guess_list(jsonf=args.jsondownloaded, binary=True) else: gl2 = d.guess_list(path=args.csv, binary=True) ec = d.error_coded(gl1, gl2) print("+ The error code: %s" % ec) ecwa = d.error_coded_without_na(gl1, gl2) print("+ The error code without na: %s" % ecwa) naCoded = d.na_coded(gl1, gl2) print("+ The na coded: %s" % naCoded) egb = d.error_gender_bias(gl1, gl2) print("+ The error gender bias: %s" % egb) elif (args.api == "genderapi"):
parser.add_argument('--version', action='version', version='0.1') args = parser.parse_args() du = DameUtils() if (len(sys.argv) > 1): if (args.api == "genderguesser"): dgg = DameGenderGuesser() print(dgg.guess(args.name)) elif (args.api == "genderapi"): dga = DameGenderApi() print(dga.guess(args.name, binary=False)) print("accuracy: " + str(dga.accuracy(args.name))) elif (args.api == "genderize"): dg = DameGenderize() print(dg.guess(args.name)) print("probability: " + str(dg.prob(args.name))) elif (args.api == "namsor"): dn = DameNamsor() if (du.is_not_blank(args.surname)): print(dn.guess(str(args.name), str(args.surname))) print("scale: " + str(dn.scale(str(args.name), str(args.surname)))) else: print("Surname is required in namsor api") elif (args.api == "nameapi"): dn = DameNameapi() print(dn.guess(str(args.name), str(args.surname))) print("confidence: " + str(dn.confidence(str(args.name), str(args.surname)))) elif (args.api == "average"): da = DameAll()
def test_dame_genderize_json2names(self): dg = DameGenderize() l = dg.json2names(jsonf="files/names/genderizefiles_names_min.csv.json") self.assertEqual(['Pierre', 'Raul', 'Adriano', 'Ralf', 'Guillermo', 'Sabina'], l)
def test_dame_genderize_prob(self): dg = DameGenderize() if (dg.config['DEFAULT']['genderize'] == 'yes'): self.assertEqual(dg.prob("David"), 0.99)
def test_dame_genderize_guess(self): dg = DameGenderize() if (dg.config['DEFAULT']['genderize'] == 'yes'): self.assertEqual(dg.guess("David"), "male") self.assertEqual(dg.guess("David", binary=True), 1)