def download(self, path="files/names/min.csv"): du = DameUtils() namsorjson = path namsorjson = open("files/names/namsor" + du.path2file(path) + ".json", "w+") surnames = True names = self.csv2names(path, surnames=surnames) namsorjson.write("[") length = len(names) i = 0 while (i < length): name = names[i][0] namsorjson.write('{"name":"' + str(names[i][0]) + '",\n') surname = names[i][1] namsorjson.write('"surname":"' + str(names[i][1]) + '",\n') dnget = self.get(name=name, surname=surname, binary=True) namsorjson.write('"gender":"' + str(dnget[0]) + '",\n') namsorjson.write('"scale":' + str(dnget[1]) + '\n') if ((length - 1) == i): namsorjson.write('} \n') else: namsorjson.write('}, \n') i = i + 1 namsorjson.write("]") namsorjson.close()
def test_dame_utils_clean_list_method_returns_correct_result(self): du = DameUtils() self.assertEqual( du.clean_list([ '', 'H. Peter Anvin', '*****@*****.**', 'Ram Yalamanchili', 'Ferenc Wagner' ]), ['H. Peter Anvin', 'Ram Yalamanchili', 'Ferenc Wagner'])
def download(self, path='files/names/partial.csv', surnames=False): du = DameUtils() new = [] d = "" lresult = [] res = "" if (surnames == True): l = self.csv2names(path, surnames=True) for i in range(0, len(l)): d = self.get(l[i][0], surname=l[i][1]) d["surname"] = l[i][1] lresult.append(d) res = str(lresult) else: l = self.csv2names(path) # We must split the list in different lists with size 10 for i in range(0, len(l), 10): new.append(l[i:i + 10]) for j in new: lresult.append(self.get2to10(j)) for k in lresult: res = res + str(k) res = str(res).replace("\'", "\"") res = str(res).replace('None', '"unknown"') backup = open("files/names/genderize" + du.path2file(path) + ".json", "w+") backup.write(res) backup.close() return res
def test_dame_namsor_download(self): dn = DameNamsor() du = DameUtils() path1 = "files/names/min.csv" if (dn.config['DEFAULT']['namsor'] == 'yes'): g = dn.download(path1) self.assertTrue( os.path.isfile("files/names/namsor" + du.path2file(path1) + ".json"))
def test_split(self): u = DameUtils() x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] sp = u.split(x, 5) self.assertEqual(sp, [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13]]) y = list(range(1, 100)) ysp = u.split(y, 10) self.assertEqual(ysp[0:2], [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]])
def test_dame_genderapi_download(self): dga = DameGenderApi() du = DameUtils() path1 = "files/names/min.csv" if (dga.config['DEFAULT']['genderapi'] == 'yes'): g = dga.download(path1) self.assertTrue( os.path.isfile("files/names/genderapi" + du.path2file(path1) + ".json"))
def test_dame_utils_csvcolumn2list(self): du = DameUtils() l = du.csvcolumn2list('files/names/partial.csv', 0, header=True) self.assertEqual(len(l), 21) self.assertEqual([ '"pierre"', '"raul"', '"adriano"', '"ralf"', '"teppei"', '"guillermo"', '"catherine"', '"sabina"', '"ralf"', '"karl"', '"sushil"', '"clemens"', '"gregory"', '"lester"', '"claude"', '"martin"', '"vlad"', '"pasquale"', '"lourdes"', '"bruno"', '"thomas"' ], l)
def test_dame_utils_clean_list(self): du = DameUtils() self.assertEqual( du.clean_list([ '', 'H. Peter Anvin', '*****@*****.**', 'Ram Yalamanchili', 'Ferenc Wagner' ]), ['H. Peter Anvin', 'Ram Yalamanchili', 'Ferenc Wagner']) l1 = [ '', '', 'de', '', '', 'ar', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'ca', 'cl', '', '', '' ] self.assertEqual(du.clean_list(l1), ['de', 'ar', 'ca', 'cl'])
def locale_match(self, surname, path, locale): du = DameUtils() surname = du.drop_accents(surname).upper() string = "" with open(path) as csvfile: freader = csv.reader(csvfile, delimiter=',', quotechar='|') next(freader, None) for row in freader: if ((len(row)) == 11): if (surname in row[1]): string = locale return string
def test_dame_utils_files_one_level_drop_pwd_method_returns_correct_result( self): du = DameUtils() cwd = os.getcwd() self.assertEqual( sorted(du.files_one_level_drop_pwd(cwd + "/files/datamodels")), [ 'files/datamodels/bernoulliNB_model.sav', 'files/datamodels/gaussianNB_model.sav', 'files/datamodels/multinomialNB_model.sav', 'files/datamodels/sgd_model.sav', 'files/datamodels/svc_model.sav' ])
def string2gender(self, string): # TODO: take care with trash strings before the name du = DameUtils() arr = du.string2array(string) name = "" i = 0 features_int = self.features_int(string) while ((name == "") and (len(arr) > i)): if (not (self.guess_surname(arr[i], locale="us")[0]) and (len(string) > 0)): name = arr[i] i = i + 1 return self.guess(name)
def test_dame_utils_files_one_level_drop_pwd(self): du = DameUtils() cwd = os.getcwd() self.assertEqual( sorted( du.files_one_level_drop_pwd(cwd + "/files/datamodels/*sav")), [ 'files/datamodels/adaboost_model.sav', 'files/datamodels/bernoulliNB_model.sav', 'files/datamodels/forest_model.sav', 'files/datamodels/gaussianNB_model.sav', 'files/datamodels/mlp_model.sav', 'files/datamodels/multinomialNB_model.sav', 'files/datamodels/nltk_model.sav', 'files/datamodels/sgd_model.sav', 'files/datamodels/svc_model.sav', 'files/datamodels/tree_model.sav' ])
def surname2ethnicity(self, surname): du = DameUtils() surname = du.drop_accents(surname).upper() path = 'files/names/names_us/surnames.csv' boolean = False with open(path) as csvfile: surnamereader = csv.reader(csvfile, delimiter=',', quotechar='|') next(surnamereader, None) w, b, api, aian, doublerace, h = ("", ) * 6 for row in surnamereader: # print(row) if (row[0] == surname): # white w = row[5] # black b = row[6] # api = Asian Pacific American api = row[7] # aian = American Indian and Alaska Native aian = row[8] # 2prace doublerace = row[9] # hispanic h = row[10] dicc = { "white": w, "black": b, "api": api, "aian": aian, "doublerace": doublerace, "hispanic": h } if (dicc == { "white": "", "black": "", "api": "", "aian": "", "doublerace": "", "hispanic": "" }): res = False else: res = dicc return res
def name_frec(self, name, *args, **kwargs): # guess list method dataset = kwargs.get('dataset', 'es') du = DameUtils() name = du.drop_accents(name) path_males = 'files/names/names_es/esmasculinos.csv' if ((dataset == 'ine') or (dataset == 'es')): path_males = 'files/names/names_es/esmasculinos.csv' elif (dataset == 'uy'): path_males = 'files/names/names_uy/uymasculinos.csv' elif (dataset == 'uk'): path_males = 'files/names/names_uk/ukmales.csv' elif (dataset == 'us'): path_males = 'files/names/names_us/usmales.csv' file_males = open(path_males, 'r') readerm = csv.reader(file_males, delimiter=',', quotechar='|') males = 0 for row in readerm: if ((len(row) > 1) and (row[0].lower() == name.lower())): males = row[1] males = du.drop_dots(males) path_females = 'files/names/names_es/esfemeninos.csv' if ((dataset == 'ine') or (dataset == 'es')): path_females = 'files/names/names_es/esfemeninos.csv' elif (dataset == 'uy'): path_females = 'files/names/names_uy/uyfemeninos.csv' elif (dataset == 'uk'): path_females = 'files/names/names_uk/ukfemales.csv' elif (dataset == 'us'): path_females = 'files/names/names_us/usfemales.csv' file_females = open(path_females, 'r') readerf = csv.reader(file_females, delimiter=',', quotechar='|') females = 0 for row in readerf: if ((len(row) > 1) and (row[0].lower() == name.lower())): females = row[1] females = du.drop_dots(females) dicc = {"females": females, "males": males} return dicc
def getGeo(self, name, surname, locale, binary=False): du = DameUtils() # obtaining data from namsor fichero = open("files/apikeys/namsorpass.txt", "r+") contenido = fichero.readline().rstrip() url = 'https://v2.namsor.com/NamSorAPIv2/api2/json/genderGeo/' url = url + name + '/' + surname + '/' + locale headers = { 'content-type': 'application/json', 'Accept-Charset': 'UTF-8', 'X-API-KEY': contenido } r = requests.get(url, headers=headers) d = json.loads(r.text) v = [d['likelyGender'], d['genderScale']] return v
# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, # Boston, MA 02110-1301 USA, from app.dame_sexmachine import DameSexmachine from app.dame_perceval import DamePerceval from app.dame_utils import DameUtils import sys import argparse parser = argparse.ArgumentParser() parser.add_argument("url", help="Uniform Resource Link") parser.add_argument('--directory') parser.add_argument('--version', action='version', version='0.1') args = parser.parse_args() if (len(sys.argv) > 1): ds = DameSexmachine() du = DameUtils() dp = DamePerceval() l1 = dp.list_committers(args.url, args.directory) l2 = du.delete_duplicated(l1) l3 = du.clean_list(l2) females = 0 males = 0 unknowns = 0 for g in l3: sm = ds.guess(g, binary=True) if (sm == 0): females = females + 1 elif (sm == 1): males = males + 1 else:
def test_dame_utils_files_one_level_method_returns_correct_result(self): du = DameUtils() cwd = os.getcwd() self.assertTrue(len(du.files_one_level(cwd + '/files/')) > 10)
def test_drop_quotes_method_returns_correct_result(self): u = DameUtils() self.assertEqual('Hola Mexico', u.drop_quotes('Hola "Mexico')) self.assertEqual("Hola Mexico", u.drop_quotes("Hola' 'Mexico"))
def test_dame_utils_delete_duplicated_method_returns_correct_result(self): du = DameUtils() self.assertEqual(du.delete_duplicated([1, 2, 2, 1, 3]), [1, 2, 3])
def test_drop_accents_method_returns_correct_result(self): u = DameUtils() self.assertEqual("Ines", u.drop_accents("Inés"))
def test_drop_white_space_method_returns_correct_result(self): u = DameUtils() self.assertEqual("In", u.drop_white_space("In ")) self.assertEqual("Ines", u.drop_accents(u.drop_white_space("Inés ")))
def test_split_method_returns_correct_result(self): u = DameUtils() x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] sp = u.split(x, 5) self.assertEqual(sp, [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13]])
def test_drop_dots_method_returns_correct_result(self): u = DameUtils() self.assertEqual(1212, int(u.drop_dots(12.12)))
def test_is_not_blank_method_returns_correct_result(self): du = DameUtils() self.assertEqual(du.is_not_blank(" "), False) self.assertEqual(du.is_not_blank("ok"), True)
def test_represents_int_method_returns_correct_result(self): du = DameUtils() self.assertEqual(du.represents_int("23"), True) self.assertEqual(du.represents_int("ok"), False)
# Boston, MA 02110-1301 USA, from app.dame_sexmachine import DameSexmachine from app.dame_perceval import DamePerceval from app.dame_utils import DameUtils import sys import argparse parser = argparse.ArgumentParser() parser.add_argument("url", help="Uniform Resource Link") parser.add_argument('--directory') parser.add_argument('--version', action='version', version='0.1') args = parser.parse_args() if (len(sys.argv) > 1): s = DameSexmachine() gg = DamePerceval() du = DameUtils() l = gg.list_mailers(args.url) l = du.delete_duplicated(l) females = 0 males = 0 unknowns = 0 for g in l: sm = s.guess(g, binary=True) if (sm == 0): females = females + 1 elif (sm == 1): males = males + 1 else: unknowns = unknowns + 1
def test_drop_white_space(self): u = DameUtils() self.assertEqual("In", u.drop_white_space("In ")) self.assertEqual("Ines", u.drop_accents(u.drop_white_space("Inés "))) self.assertEqual("JuanCarlosI", u.drop_accents(u.drop_white_space("Juan Carlos I ")))
#print(yesornot) if ((yesornot == "Yes") | (yesornot == "yes") | (yesornot == "Y") | (yesornot == "y")): print("We are creating files/names/nam_dict_list.txt") g.namdict2file() print("We are creating .sav files data models in files/datamodels") print("This process take a long time, you can rest.") s = DameSexmachine() s.classifier() s.gaussianNB() s.svc() s.sgd() s.multinomialNB() s.bernoulliNB() s.tree() s.mlp() print("This process has finished. You have the models in files/datamodels/*.sav") du = DameUtils() print("Creating the file files/names/allnoundefined.csv from files/names/all.csv") with open('files/names/all.csv') as csvfile: reader = csv.reader(csvfile, delimiter=',', quotechar='|') filenou = open('files/names/allnoundefined.csv','w+') for row in reader: g = du.drop_quotes(row[4]) if ((g == "m") | (g == "f")): filenou.write(row[0]+','+row[1]+','+row[2]+','+row[3]+','+row[4]+','+row[5]+'\n') filenou.close()
def test_drop_white_space_around(self): u = DameUtils() self.assertEqual("In", u.drop_white_space_around(" In")) self.assertEqual("Juan Carlos I", u.drop_white_space_around(" Juan Carlos I")) self.assertEqual("Juan Carlos I", u.drop_white_space_around(" Juan Carlos I ")) self.assertEqual("Juan Carlos I", u.drop_white_space_around(" Juan Carlos I ")) self.assertEqual( "Jose Maria", u.drop_white_space_around(u.drop_accents(" José María "))) self.assertEqual("Ines", u.drop_white_space_around(u.drop_accents("Inés "))) self.assertEqual("Ana", u.drop_white_space_around(" Ana"))
def test_drop_white_space_around(self): u = DameUtils() self.assertEqual( "Maria+Jose", u.white_space_inside_by(u.drop_accents(" María José "), "+"))