Esempio n. 1
0
 def download(self, path="files/names/min.csv"):
     du = DameUtils()
     namsorjson = path
     namsorjson = open("files/names/namsor" + du.path2file(path) + ".json",
                       "w+")
     surnames = True
     names = self.csv2names(path, surnames=surnames)
     namsorjson.write("[")
     length = len(names)
     i = 0
     while (i < length):
         name = names[i][0]
         namsorjson.write('{"name":"' + str(names[i][0]) + '",\n')
         surname = names[i][1]
         namsorjson.write('"surname":"' + str(names[i][1]) + '",\n')
         dnget = self.get(name=name, surname=surname, binary=True)
         namsorjson.write('"gender":"' + str(dnget[0]) + '",\n')
         namsorjson.write('"scale":' + str(dnget[1]) + '\n')
         if ((length - 1) == i):
             namsorjson.write('} \n')
         else:
             namsorjson.write('}, \n')
         i = i + 1
     namsorjson.write("]")
     namsorjson.close()
Esempio n. 2
0
 def test_dame_utils_clean_list_method_returns_correct_result(self):
     du = DameUtils()
     self.assertEqual(
         du.clean_list([
             '', 'H. Peter Anvin', '*****@*****.**',
             'Ram Yalamanchili', 'Ferenc Wagner'
         ]), ['H. Peter Anvin', 'Ram Yalamanchili', 'Ferenc Wagner'])
Esempio n. 3
0
 def download(self, path='files/names/partial.csv', surnames=False):
     du = DameUtils()
     new = []
     d = ""
     lresult = []
     res = ""
     if (surnames == True):
         l = self.csv2names(path, surnames=True)
         for i in range(0, len(l)):
             d = self.get(l[i][0], surname=l[i][1])
             d["surname"] = l[i][1]
             lresult.append(d)
         res = str(lresult)
     else:
         l = self.csv2names(path)
         # We must split the list in different lists with size 10
         for i in range(0, len(l), 10):
             new.append(l[i:i + 10])
         for j in new:
             lresult.append(self.get2to10(j))
         for k in lresult:
             res = res + str(k)
     res = str(res).replace("\'", "\"")
     res = str(res).replace('None', '"unknown"')
     backup = open("files/names/genderize" + du.path2file(path) + ".json",
                   "w+")
     backup.write(res)
     backup.close()
     return res
Esempio n. 4
0
 def test_dame_namsor_download(self):
     dn = DameNamsor()
     du = DameUtils()
     path1 = "files/names/min.csv"
     if (dn.config['DEFAULT']['namsor'] == 'yes'):
         g = dn.download(path1)
         self.assertTrue(
             os.path.isfile("files/names/namsor" + du.path2file(path1) +
                            ".json"))
 def test_split(self):
     u = DameUtils()
     x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
     sp = u.split(x, 5)
     self.assertEqual(sp, [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13]])
     y = list(range(1, 100))
     ysp = u.split(y, 10)
     self.assertEqual(ysp[0:2], [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                                 [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]])
 def test_dame_genderapi_download(self):
     dga = DameGenderApi()
     du = DameUtils()
     path1 = "files/names/min.csv"
     if (dga.config['DEFAULT']['genderapi'] == 'yes'):
         g = dga.download(path1)
         self.assertTrue(
             os.path.isfile("files/names/genderapi" + du.path2file(path1) +
                            ".json"))
 def test_dame_utils_csvcolumn2list(self):
     du = DameUtils()
     l = du.csvcolumn2list('files/names/partial.csv', 0, header=True)
     self.assertEqual(len(l), 21)
     self.assertEqual([
         '"pierre"', '"raul"', '"adriano"', '"ralf"', '"teppei"',
         '"guillermo"', '"catherine"', '"sabina"', '"ralf"', '"karl"',
         '"sushil"', '"clemens"', '"gregory"', '"lester"', '"claude"',
         '"martin"', '"vlad"', '"pasquale"', '"lourdes"', '"bruno"',
         '"thomas"'
     ], l)
 def test_dame_utils_clean_list(self):
     du = DameUtils()
     self.assertEqual(
         du.clean_list([
             '', 'H. Peter Anvin', '*****@*****.**',
             'Ram Yalamanchili', 'Ferenc Wagner'
         ]), ['H. Peter Anvin', 'Ram Yalamanchili', 'Ferenc Wagner'])
     l1 = [
         '', '', 'de', '', '', 'ar', '', '', '', '', '', '', '', '', '', '',
         '', '', '', '', 'ca', 'cl', '', '', ''
     ]
     self.assertEqual(du.clean_list(l1), ['de', 'ar', 'ca', 'cl'])
Esempio n. 9
0
 def locale_match(self, surname, path, locale):
     du = DameUtils()
     surname = du.drop_accents(surname).upper()
     string = ""
     with open(path) as csvfile:
         freader = csv.reader(csvfile, delimiter=',', quotechar='|')
         next(freader, None)
         for row in freader:
             if ((len(row)) == 11):
                 if (surname in row[1]):
                     string = locale
     return string
Esempio n. 10
0
 def test_dame_utils_files_one_level_drop_pwd_method_returns_correct_result(
         self):
     du = DameUtils()
     cwd = os.getcwd()
     self.assertEqual(
         sorted(du.files_one_level_drop_pwd(cwd + "/files/datamodels")), [
             'files/datamodels/bernoulliNB_model.sav',
             'files/datamodels/gaussianNB_model.sav',
             'files/datamodels/multinomialNB_model.sav',
             'files/datamodels/sgd_model.sav',
             'files/datamodels/svc_model.sav'
         ])
Esempio n. 11
0
 def string2gender(self, string):
     # TODO: take care with trash strings before the name
     du = DameUtils()
     arr = du.string2array(string)
     name = ""
     i = 0
     features_int = self.features_int(string)
     while ((name == "") and (len(arr) > i)):
         if (not (self.guess_surname(arr[i], locale="us")[0])
                 and (len(string) > 0)):
             name = arr[i]
         i = i + 1
     return self.guess(name)
Esempio n. 12
0
 def test_dame_utils_files_one_level_drop_pwd(self):
     du = DameUtils()
     cwd = os.getcwd()
     self.assertEqual(
         sorted(
             du.files_one_level_drop_pwd(cwd + "/files/datamodels/*sav")), [
                 'files/datamodels/adaboost_model.sav',
                 'files/datamodels/bernoulliNB_model.sav',
                 'files/datamodels/forest_model.sav',
                 'files/datamodels/gaussianNB_model.sav',
                 'files/datamodels/mlp_model.sav',
                 'files/datamodels/multinomialNB_model.sav',
                 'files/datamodels/nltk_model.sav',
                 'files/datamodels/sgd_model.sav',
                 'files/datamodels/svc_model.sav',
                 'files/datamodels/tree_model.sav'
             ])
Esempio n. 13
0
 def surname2ethnicity(self, surname):
     du = DameUtils()
     surname = du.drop_accents(surname).upper()
     path = 'files/names/names_us/surnames.csv'
     boolean = False
     with open(path) as csvfile:
         surnamereader = csv.reader(csvfile, delimiter=',', quotechar='|')
         next(surnamereader, None)
         w, b, api, aian, doublerace, h = ("", ) * 6
         for row in surnamereader:
             #                print(row)
             if (row[0] == surname):
                 # white
                 w = row[5]
                 # black
                 b = row[6]
                 # api = Asian Pacific American
                 api = row[7]
                 # aian = American Indian and Alaska Native
                 aian = row[8]
                 # 2prace
                 doublerace = row[9]
                 # hispanic
                 h = row[10]
     dicc = {
         "white": w,
         "black": b,
         "api": api,
         "aian": aian,
         "doublerace": doublerace,
         "hispanic": h
     }
     if (dicc == {
             "white": "",
             "black": "",
             "api": "",
             "aian": "",
             "doublerace": "",
             "hispanic": ""
     }):
         res = False
     else:
         res = dicc
     return res
Esempio n. 14
0
    def name_frec(self, name, *args, **kwargs):
        # guess list method
        dataset = kwargs.get('dataset', 'es')

        du = DameUtils()
        name = du.drop_accents(name)
        path_males = 'files/names/names_es/esmasculinos.csv'
        if ((dataset == 'ine') or (dataset == 'es')):
            path_males = 'files/names/names_es/esmasculinos.csv'
        elif (dataset == 'uy'):
            path_males = 'files/names/names_uy/uymasculinos.csv'
        elif (dataset == 'uk'):
            path_males = 'files/names/names_uk/ukmales.csv'
        elif (dataset == 'us'):
            path_males = 'files/names/names_us/usmales.csv'
        file_males = open(path_males, 'r')
        readerm = csv.reader(file_males, delimiter=',', quotechar='|')
        males = 0
        for row in readerm:
            if ((len(row) > 1) and (row[0].lower() == name.lower())):
                males = row[1]
                males = du.drop_dots(males)
        path_females = 'files/names/names_es/esfemeninos.csv'
        if ((dataset == 'ine') or (dataset == 'es')):
            path_females = 'files/names/names_es/esfemeninos.csv'
        elif (dataset == 'uy'):
            path_females = 'files/names/names_uy/uyfemeninos.csv'
        elif (dataset == 'uk'):
            path_females = 'files/names/names_uk/ukfemales.csv'
        elif (dataset == 'us'):
            path_females = 'files/names/names_us/usfemales.csv'

        file_females = open(path_females, 'r')
        readerf = csv.reader(file_females, delimiter=',', quotechar='|')
        females = 0
        for row in readerf:
            if ((len(row) > 1) and (row[0].lower() == name.lower())):
                females = row[1]
                females = du.drop_dots(females)
        dicc = {"females": females, "males": males}

        return dicc
Esempio n. 15
0
 def getGeo(self, name, surname, locale, binary=False):
     du = DameUtils()
     # obtaining data from namsor
     fichero = open("files/apikeys/namsorpass.txt", "r+")
     contenido = fichero.readline().rstrip()
     url = 'https://v2.namsor.com/NamSorAPIv2/api2/json/genderGeo/'
     url = url + name + '/' + surname + '/' + locale
     headers = {
         'content-type': 'application/json',
         'Accept-Charset': 'UTF-8',
         'X-API-KEY': contenido
     }
     r = requests.get(url, headers=headers)
     d = json.loads(r.text)
     v = [d['likelyGender'], d['genderScale']]
     return v
Esempio n. 16
0
# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301 USA,

from app.dame_sexmachine import DameSexmachine
from app.dame_perceval import DamePerceval
from app.dame_utils import DameUtils
import sys
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("url", help="Uniform Resource Link")
parser.add_argument('--directory')
parser.add_argument('--version', action='version', version='0.1')
args = parser.parse_args()
if (len(sys.argv) > 1):
    ds = DameSexmachine()
    du = DameUtils()
    dp = DamePerceval()
    l1 = dp.list_committers(args.url, args.directory)
    l2 = du.delete_duplicated(l1)
    l3 = du.clean_list(l2)

    females = 0
    males = 0
    unknowns = 0
    for g in l3:
        sm = ds.guess(g, binary=True)
        if (sm == 0):
            females = females + 1
        elif (sm == 1):
            males = males + 1
        else:
Esempio n. 17
0
 def test_dame_utils_files_one_level_method_returns_correct_result(self):
     du = DameUtils()
     cwd = os.getcwd()
     self.assertTrue(len(du.files_one_level(cwd + '/files/')) > 10)
Esempio n. 18
0
 def test_drop_quotes_method_returns_correct_result(self):
     u = DameUtils()
     self.assertEqual('Hola Mexico', u.drop_quotes('Hola "Mexico'))
     self.assertEqual("Hola Mexico", u.drop_quotes("Hola' 'Mexico"))
Esempio n. 19
0
 def test_dame_utils_delete_duplicated_method_returns_correct_result(self):
     du = DameUtils()
     self.assertEqual(du.delete_duplicated([1, 2, 2, 1, 3]), [1, 2, 3])
Esempio n. 20
0
 def test_drop_accents_method_returns_correct_result(self):
     u = DameUtils()
     self.assertEqual("Ines", u.drop_accents("Inés"))
Esempio n. 21
0
 def test_drop_white_space_method_returns_correct_result(self):
     u = DameUtils()
     self.assertEqual("In", u.drop_white_space("In "))
     self.assertEqual("Ines", u.drop_accents(u.drop_white_space("Inés ")))
Esempio n. 22
0
 def test_split_method_returns_correct_result(self):
     u = DameUtils()
     x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
     sp = u.split(x, 5)
     self.assertEqual(sp, [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13]])
Esempio n. 23
0
 def test_drop_dots_method_returns_correct_result(self):
     u = DameUtils()
     self.assertEqual(1212, int(u.drop_dots(12.12)))
Esempio n. 24
0
 def test_is_not_blank_method_returns_correct_result(self):
     du = DameUtils()
     self.assertEqual(du.is_not_blank("  "), False)
     self.assertEqual(du.is_not_blank("ok"), True)
Esempio n. 25
0
 def test_represents_int_method_returns_correct_result(self):
     du = DameUtils()
     self.assertEqual(du.represents_int("23"), True)
     self.assertEqual(du.represents_int("ok"), False)
Esempio n. 26
0
# Boston, MA 02110-1301 USA,

from app.dame_sexmachine import DameSexmachine
from app.dame_perceval import DamePerceval
from app.dame_utils import DameUtils
import sys
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("url", help="Uniform Resource Link")
parser.add_argument('--directory')
parser.add_argument('--version', action='version', version='0.1')
args = parser.parse_args()
if (len(sys.argv) > 1):
    s = DameSexmachine()
    gg = DamePerceval()
    du = DameUtils()
    l = gg.list_mailers(args.url)
    l = du.delete_duplicated(l)

    females = 0
    males = 0
    unknowns = 0
    for g in l:
        sm = s.guess(g, binary=True)
        if (sm == 0):
            females = females + 1
        elif (sm == 1):
            males = males + 1
        else:
            unknowns = unknowns + 1
Esempio n. 27
0
 def test_drop_white_space(self):
     u = DameUtils()
     self.assertEqual("In", u.drop_white_space("In "))
     self.assertEqual("Ines", u.drop_accents(u.drop_white_space("Inés ")))
     self.assertEqual("JuanCarlosI",
                      u.drop_accents(u.drop_white_space("Juan Carlos I ")))
Esempio n. 28
0
    #print(yesornot)

    if ((yesornot == "Yes") | (yesornot == "yes") | (yesornot == "Y") | (yesornot == "y")):
        print("We are creating files/names/nam_dict_list.txt")
        g.namdict2file()
        print("We are creating .sav files data models in files/datamodels")
        print("This process take a long time, you can rest.")
        s = DameSexmachine()
        s.classifier()
        s.gaussianNB()
        s.svc()
        s.sgd()
        s.multinomialNB()
        s.bernoulliNB()
        s.tree()
        s.mlp()
        print("This process has finished. You have the models in files/datamodels/*.sav")

        du = DameUtils()

        print("Creating the file files/names/allnoundefined.csv from files/names/all.csv")
        with open('files/names/all.csv') as csvfile:
            reader = csv.reader(csvfile, delimiter=',', quotechar='|')
            filenou = open('files/names/allnoundefined.csv','w+')
            for row in reader:
                g = du.drop_quotes(row[4])
                if ((g == "m") | (g == "f")):
                    filenou.write(row[0]+','+row[1]+','+row[2]+','+row[3]+','+row[4]+','+row[5]+'\n')
            filenou.close()
Esempio n. 29
0
 def test_drop_white_space_around(self):
     u = DameUtils()
     self.assertEqual("In", u.drop_white_space_around(" In"))
     self.assertEqual("Juan Carlos I",
                      u.drop_white_space_around(" Juan Carlos I"))
     self.assertEqual("Juan Carlos I",
                      u.drop_white_space_around(" Juan Carlos I  "))
     self.assertEqual("Juan Carlos I",
                      u.drop_white_space_around(" Juan Carlos I "))
     self.assertEqual(
         "Jose Maria",
         u.drop_white_space_around(u.drop_accents(" José María ")))
     self.assertEqual("Ines",
                      u.drop_white_space_around(u.drop_accents("Inés ")))
     self.assertEqual("Ana", u.drop_white_space_around(" Ana"))
Esempio n. 30
0
 def test_drop_white_space_around(self):
     u = DameUtils()
     self.assertEqual(
         "Maria+Jose",
         u.white_space_inside_by(u.drop_accents(" María José "), "+"))