Beispiel #1
0
 def get_gender(self, n):
     name_ = str(n).capitalize()
     if gender.Detector().get_gender(name_) == 'female' or gender.Detector(
     ).get_gender(name_) == 'mostly_female':
         return 'madame'
     else:
         return 'Mister'
Beispiel #2
0
def gender_breakdown(names, graph=True, text=True, plot_options={}):
    d = gender_detector.Detector()
    first_names = map(get_first_name, names)
    genders = map(d.get_gender, first_names)
    genders = map(compress_gender, genders)
    gender_dict = Counter(genders)
    total_names = sum(gender_dict.values())
    if text:
        print("Out of {} total customers:".format(total_names))
        for key in gender_dict.keys():
            print("\t{} ({:.2f}%) cusomters were {}".format(
                gender_dict[key], 100 * gender_dict[key] / total_names, key))
    if graph:
        plt.style.use('ggplot')
        plt.figure(dpi=200)
        plt.bar(range(len(gender_dict)), [
            gender_dict['Female'], gender_dict['Male'],
            gender_dict['Andro/Unknown']
        ],
                align='center',
                **plot_options)
        plt.xticks(range(len(gender_dict)),
                   ['Female', 'Male', 'Andro/Unknown'])
        plt.title("Gender Breakdown of Customers",
                  fontsize=24,
                  fontname='Pier Sans')
        plt.xlabel("Gender")
Beispiel #3
0
def predict_sex(name):
    sex_predictor = gender.Detector(unknown_value=u"unknown",case_sensitive=False)
    first_name= name.str.split(' ').str.get(0)
    sex= first_name.apply(sex_predictor.get_gender)
    sex_dict={'female': -2, 'mostly_female': -1,'unknown':0,'mostly_male':1, 'male': 2}
    sex_code = sex.map(sex_dict).astype(int)
    return sex_code
Beispiel #4
0
    def do_classify_in_threads(self) -> dict:
        """
        Open the the URLS and scrape the info related to the speakers.
        After that store the info in a dictionary to be classify by gender.
        Return a dictionary with the following form {'2016': [1, 0, 0, 1], '2018': [0, 1, 0]}
        where 1 represent a female and 0 a male.
        :return:
        """
        speakers_in_year_es = {}

        with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor:
            future_to_url = {executor.submit(self.connect, url): url for url in URLS_ES}
            for future in concurrent.futures.as_completed(future_to_url):
                url = future_to_url[future]
                try:
                    page = future.result()[0]
                    year = future.result()[1]
                    speakers_in_year_es[str(year)] = self.scrapper_es(page, year)
                except Exception as exc:
                    print(f"{url} generated the following exception: \n{exc}")

        # With the next loops we get a dictionary
        # of this form {'2016': ['female', 'male', 'female']}
        # and then we replace 'female' by 1 and other cases by 0.
        d = gender.Detector()
        for year in speakers_in_year_es:
            for i, name in enumerate(speakers_in_year_es[year]):
                speakers_in_year_es[year][i] = 1 if d.get_gender(name.split()[0]) == 'female' else 0

        return speakers_in_year_es
def guess_sex(s: pd.Series):
    """Guess the sex of the student given their first name(s). The name must
    be in title format for the gender_guesser to work. The "mostly_" versions
    have been mapped to their non-mostly counterparts for simplicity. "andy"
    (androgynous), i.e. equally male and female, will be mapped to "unknown".
    
    Keyword arguments
    name -- a string that contains the first name
    """
    
    gender_detector = gender.Detector()
    
    # Compound names cannot be detected. Split the compound names and use the 
    # first part only.
    first_word = (s
                  .apply(lambda x: re.split(' |-', x)[0].title())
                  )
    
    sex_mapping = {'male': 'male'
                   ,'female': 'female'
                   ,'mostly_female': 'female'
                   ,'mostly_male': 'male'
                   ,'andy': 'unknown'
                   ,'unknown': 'unknown'}
    
    sex_guess = (first_word
                 .apply(gender_detector.get_gender)
                 .map(sex_mapping)
                 )
    
    return sex_guess
Beispiel #6
0
    def create_new_actor(self):
        # We need just the first name to match against our actors
        first_name = self.get_first_name()

        # Filters our objects that start with the name
        # e.g. John would get ['John Wayne', 'Johnny Depp', 'John Goodman']
        # istartswith is case insensitive startswith
        name_matches = Actor.objects.filter(name__istartswith=first_name)

        count = len(name_matches)
        if count == 1:
            # If we have 1 Actor, use that one
            actor = name_matches.first()
        elif count > 1:
            # If there's more than one, use a random one *from those*
            actor = name_matches[randint(0, count - 1)]
        else:
            # Otherwise, most likely create a new actor
            guesser = gender_detector.Detector()
            guessed_gender = guesser.get_gender(first_name)
            if guessed_gender == 'unknown':
                # We didn't find the gender, just use any actor
                actor = self.get_random_actor()
            else:
                gender = 'M' if guessed_gender == 'male' else 'F'
                # Woohoo we can get a gender accurate actor!
                actor = self.get_random_actor(gender)

        return Person.objects.create(name=self.name, actor=actor)
Beispiel #7
0
    def __init__(self, connection):

        auth = tweepy.OAuthHandler(
            '2XAQTR3zNwrBtD2i6AGxZDeP6',
            '7e9xsaOSvyyI0nY5ZDN1cYx0phndLtclhdxukaC1rSjztsH9Q2')
        auth.set_access_token(
            '3333417351-X483ie45LpKqy6vw3LEUa84LN1bG6aMuZXRUlF7',
            'FQnPzJbLjvODZXiph48eb5bK2UJRHhjdhBw5thKDFvZer')
        self.api = tweepy.API(auth)
        self.gen = gender.Detector()
        my_region = 'na1'
        watcher = RiotWatcher('RGAPI-11ab2328-9ef7-47f8-a4c5-e4afcf0baab0')
        static_champ_list = watcher.static_data.champions(my_region)
        static_item_list = watcher.static_data.items(my_region)
        champion_list = static_champ_list['data']
        item_list = static_item_list['data']
        item_names = []
        champion_names = []
        champion_key_list = list(champion_list.keys())
        item_key_list = list(item_list.keys())
        for i in item_key_list:
            item_names.append(item_list[i]['name'].encode('ascii', 'ignore'))

        for i in champion_key_list:
            champion_names.append(champion_list[i]['name'].encode(
                'ascii', 'ignore'))
        self.item_name = item_names
        self.champions_name = champion_names

        print(item_names)
        print(champion_names)
        #coordinates
        self.connection = connection
Beispiel #8
0
def name_to_gender(name, api_key=None, name_dict={}):
    f"""
    This function uses the gender-guesser pip package (https://pypi.org/project/gender-guesser/)
    and optionally the gender API (https://gender-api.com/) or a dictionary to guess the gender of
    the given name.

    Note that a major limitation of this code and the original paper is that this is a guess at the
    gender of a person based only on a first name, and it does not reflect the chosen gender of the
    author. Further, there are greater limitations if the name is not a traditionally western name.
    Last, the gender API has limitations in that it doesn't include all types of experienced genders.

    Inputs
    ------
    name : string
        The first name whose gender you want to guess.
    api_key : string
        The API key for the gender API. You can sign up for a free account and get an API key on
        the gender-api.com website. Optional.
    name_dict : dict
        Dictionary containing gender guesses and accuracy, the dictionary is used if the gender-guesser
        package returns 'unknown'. It is updated if a gender-api request is made.

    Outputs
    -------
    gender : string
        The guessed gender of the name.

    accuracy : int
        The accuracy of the gender guess, in percent.
    """
    # use the _gender_detector as a global variable to avoid re-generating it each time
    global _gender_detector

    # If the name is just an initial, return unknown
    if len(name) < 2:
        return "unknown", 0

    # create gender-guesser detcetor if it doesn't already exist
    if not "_gender_detector" in globals():
        _gender_detector = gender_detecor.Detector(case_sensitive=False)

    gender = _gender_detector.get_gender(name)
    accuracy = None
    if gender == "unknown":
        if name in name_dict.keys():
            gender = name_dict[name]["gender"]
            accuracy = name_dict[name]["accuracy"]
        elif api_key:
            url = f"https://gender-api.com/get?key={api_key}&name={name}"
            response = requests.get(url).json()
            gender = response["gender"]
            accuracy = response["accuracy"]
            name_dict[name] = {"gender": gender, "accuracy": accuracy}
            # save the updated names_dict
            with open(NAME_DICT_PATH, 'w') as name_dict_file:
                json.dump(name_dict, name_dict_file, indent=2)
        # if still unknown and there is a dash in the name, try on the first part of the name
        if gender == "unknown" and "-" in name:
            return name_to_gender(name.split("-")[0], api_key, dict)
    return gender, accuracy
Beispiel #9
0
class Genderize():

    detector = gender.Detector(case_sensitive=False)

    def guess_from_name(firstname):
        g = Genderize.detector.get_gender(firstname)
        return g
Beispiel #10
0
def split_data_by_gender(_file):
    data = pd.read_excel(_file)
    myColumns = list(data.columns)

    male_data = pd.DataFrame(columns=myColumns)
    female_data = pd.DataFrame(columns=myColumns)
    other_data = pd.DataFrame(columns=myColumns)
    d = gender.Detector()
    A = np.array('')

    print('all index: ', len(data))
    for i in range(0, len(data)):
        print(i)
        full_name = data.iloc[i]['CustomerName']
        name = full_name.split(' ', 1)[0]
        newGender = d.get_gender(name)
        # A = np.append(A, newGender)
        if newGender == 'male':
            temp = data.loc[[i]]
            #male_data = pd.concat([male_data, temp], axis=0)
            male_data = male_data.append(temp, sort=True)
            #print(male_data)
        elif newGender == 'female':
            temp = data.loc[[i]]
            female_data = female_data.append(temp, sort=True)
        else:
            temp = data.loc[[i]]
            other_data = other_data.append(temp, sort=True)

    print('male_data.shape: ', male_data.shape)
    print('female_data.shape: ', female_data.shape)
    print('other_data.shape: ', other_data.shape)
    return male_data, female_data
Beispiel #11
0
    def __init__(self):
        # Settings
        self.all_data_file_name = './csv_files/altgender4_2017_12_12_upwork_analysis_unitedstates_allskills.csv'  # Filename for all data
        self.data_log_file_name = './log_files/alt_gender4_log_upwork_data_analysis_2017_12_12_unitedstates_allskills.txt'

        # Write a log
        self.log = open(self.data_log_file_name, 'a')
        self.log.write("We have started analyzing data!" + "\n")
        self.log.flush()

        # Connect to the database
        self.conn = psycopg2.connect("dbname=eureka01")
        self.cur = self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
        psycopg2.extensions.register_adapter(dict, psycopg2.extras.Json)

        # Get detailed_info from workers in our database
        self.cur.execute(
            "SELECT detailed_info FROM upwork_unitedstates_allskills_2017_12_12;"
        )

        # Initialize arrays for Causal Analysis
        self.user_count = 1

        # Initialize gender detectors
        self.d = gender.Detector()
        self.gc = GenderComputer('./nameLists')
        self.us_detector = GenderDetector('us')
        self.ar_detector = GenderDetector('ar')
        self.uk_detector = GenderDetector('uk')
        self.uy_detector = GenderDetector('uy')
        self.gender_guesser = gender_guesser.Detector()
Beispiel #12
0
    def create_auto_reply(self,original):
        mail = MIMEMultipart('alternative')
        mail['Message-ID'] = make_msgid()
        mail['References'] = mail['In-Reply-To'] = original['Message-ID']
        mail['Subject'] = 'Re: ' + str(original['Subject'])
        mail['From'] = self.from_address
        mail['To'] = original['Reply-To'] or original['From']
       
        #print (str(original))
       
        with open('cv.txt', 'a') as the_file:
            the_file.write(str(original))
            
        #extract only name from the From of the email body
        fromName = self.from_address
        fullName = ' '.join([item for item in fromName.split() if '@' not in item])

        print (fullName)
        
        # name splitting 
        name = HumanName(fullName)      
        salutation = name.last + "," + name.middle + " " + name.suffix + " " +  name.nickname + "," + name.title + " " + name.first+ ":\n"   
        
        # gender detection 
        d = gender.Detector()
        typeGender = d.get_gender(name.first)
        
        if typeGender == "male" or typeGender == "mostly_male":
            starts="Mr. "
        elif typeGender == "female" or typeGender == "mostly_female":
            starts="Miss. "
        else: 
            starts="Dear/Honorable "
            
        coreBody = starts + " " + salutation + self.body_html
    
        #categorize body of emails 
        ########################
        # write body to cv.txt
                
        os.system('python3 categorize.py')

        #with open('coreCategory.txt', 'r') as file:
            #allCategory = file.read().replace('\n', '')

        allCategory = ""
        with open('coreCategory.txt') as f:
            for line in f.readlines():
                allCategory += line + "/"

        print (allCategory)
        
        alphaBody = coreBody + "\n" + "#EmailClassifies as:/" + (allCategory) 
        
        clearCommand1 = 'rm -rf cv.txt coreCategory.txt'
        os.system(clearCommand1)

        #mail.attach(MIMEText(dedent(self.body), 'plain'))
        mail.attach(MIMEText(alphaBody, 'html'))
        return mail
Beispiel #13
0
def processNamesColumn(column):
    new_columns = np.ndarray(shape = (len(column), 1), dtype = int)
    gender_detector = gender.Detector()
    
    for i in range(0, (len(column))):
        a_name = str(column[i])
        started=False;
        a_word=""
        new_columns[i, 0] = 0
        
        regex = re.compile('[^a-zA-Z]')
        a_name=regex.sub(' ', a_name)
        a_name=re.sub(r'([A-Z])', r' \1', a_name)
        words=a_name.split()
        for word in words:
            prediction=gender_detector.get_gender(word)
            #print(word,' ',prediction)
            if(prediction=='mostly_female'):
                new_columns[i, 0] = 2
                continue
            elif(prediction=='mostly_male'):
                new_columns[i, 0] = 1
                continue
            elif(prediction=='female'):
                new_columns[i, 0] = 2
                break
            elif(prediction=='male'):
                new_columns[i, 0] = 1
                break
        #print(words,'  ',new_columns[i, 0])
        
    return new_columns
Beispiel #14
0
def get_percentage_of_female_speakers(first_names):
    """Run gender_guesser on the names returning a percentage
       of female speakers (female and mostly_female),
       rounded to 2 decimal places."""
    d = gender.Detector()
    c = Counter([d.get_gender(name) for name in first_names])
    return round((c['female'] + c['mostly_female']) / sum(c.values()) * 100, 2)
Beispiel #15
0
def create_minister():
    d = gender.Detector()

    df_p = pd.DataFrame()
    for file in os.listdir("/home/flex_lev/Dev/Perso/canadian_ministry/dump"):
        if "Parliament" not in file:
            print(file)
            df = pd.read_csv(
                "/home/flex_lev/Dev/Perso/canadian_ministry/dump/" + file,
                sep="|").rename(columns={'Unnamed: 0': 'index_rows'})
            index_number = re.findall(r'\d+', file)[0]
            df["start"] = df["date"].apply(lambda x: find_date(x, 0))
            df["end"] = df["date"].apply(lambda x: find_date(x, 1))
            df["minister_number"] = int(index_number)
            df["sex"] = df["name"].apply(
                lambda x: d.get_gender(x.split(" ")[0]))
            if df_p.shape[0] == 0:
                df_p = df
            else:
                df_p = pd.concat([df_p, df], ignore_index=True)

    print(df_p.shape)
    df_p[[col for col in df_p.columns if "sex" not in col] +
         ["sex"]].sort_values(by=["sex"]).to_csv("ministers.csv",
                                                 sep="|",
                                                 index=False)
Beispiel #16
0
    def annotate(self, file_name):
        with open('./TD/Annotations/' + file_name.replace('.txt', '_ant.txt'),
                  'w') as fw:
            with open(self.path + '/' + file_name) as fr:
                text = fr.read()
                for item in self.entities:
                    if check_if_valid(text, item):
                        self.knowledge.append(
                            self.extract_birth_day(text, item))
                        self.knowledge.append(self.extract_type(text, item))
                        self.knowledge.append(self.extract_pattern(text, item))
                        self.knowledge.append(self.extract_marriage(
                            text, item))
                        self.knowledge.append(
                            self.extract_pattern(text,
                                                 item,
                                                 pattern="appeared in"))

                        tmp = item.split(' ')
                        name = tmp[0].strip()
                        g = gender.Detector()
                        pronouns = genders_person[item].split()
                        pros = pronouns + [x.title() for x in pronouns]
                        regex = "(" + item + "|" + tmp[-1].strip(
                        ) + "| " + " |".join(pros) + " )"
                        text = re.sub(
                            regex, r'<entity name="' + self.entities[item] +
                            '">\\1</entity>', text)
            fw.write(text)
Beispiel #17
0
def get_author_gender_guesser(author):
    """
    Tries to get gender of author, 'female', 'male', 'non-binary' from the gender guesser module

    >>> from gender_novels.corpus_gen import get_author_gender_guesser
    >>> get_author_gender_guesser("Cuthbert, Michael")
    'male'
    >>> get_author_gender_guesser("Li, Michelle")
    'female'
    >>> get_author_gender_guesser("Duan, Mingfei") # should return None


    :param author: str
    :return: str
    """

    first_name = format_author(author).split()[0]
    guesser = gender_guesser.Detector()
    gender_guess = guesser.get_gender(first_name)
    if (gender_guess == 'andy' or gender_guess == 'unknown'):
        return None
    if (gender_guess == 'male' or gender_guess == 'mostly_male'):
        return 'male'
    if (gender_guess == 'female' or gender_guess == 'mostly_female'):
        return 'female'
Beispiel #18
0
def get_male_female_words_count(nlp_doc):
    gender_detector = gender.Detector()
    male_count = 0
    female_count = 0
    neutral_count = 0
    for token in nlp_doc:
        if token.text in male_words or token.lemma_ in male_words:
            male_count += 1
        elif token.text in female_words or token.lemma_ in female_words:
            female_count += 1
        elif token.text in neutral_words or token.lemma_ in neutral_words:
            neutral_count += 1

    for name in get_named_persons(nlp_doc):
        if gender_detector.get_gender(name) in ["male", "mostly_male"]:
            male_count += 1
        elif gender_detector.get_gender(name) == ["female", "mostly_female"]:
            female_count += 1
        elif gender_detector.get_gender(name) == ["andy", "unknown"]:
            neutral_count += 1

    return {
        "male_count": male_count,
        "female_count": female_count,
        "neutral_count": neutral_count
    }
Beispiel #19
0
def get_percentage_of_female_speakers(first_names):
    """Run gender_guesser on the names returning a percentage
       of female speakers, rounded to 2 decimal places."""
    d = gender.Detector()
    total = len(first_names)
    genders = Counter([d.get_gender(x) for x in first_names])
    return round(
        ((genders['female'] + genders['mostly_female']) / total) * 100, 2)
Beispiel #20
0
def populate_genders(users):
    d = gender.Detector()
    for screen_name in list(users.keys()):
        user = users[screen_name]
        if ("gender" not in user) and ("twitter_profile" in user) and (
                "name" in user["twitter_profile"]):
            user["gender"] = d.get_gender(
                user["twitter_profile"]["name"].split(" ")[0])
            user["gender_source"] = "gender-guesser"
Beispiel #21
0
def get_percentage_of_female_speakers(first_names: list) -> float:
    """Run gender_guesser on the names returning a percentage
       of female speakers, rounded to 2 decimal places."""
    d = gender.Detector(case_sensitive=False)
    count = Counter([d.get_gender(name) for name in first_names])
    female_percantage = ((count["female"] + count["mostly_female"]) /
                         len(first_names) * 100)

    return round(female_percantage, 2)
Beispiel #22
0
def get_percentage_of_female_speakers(first_names):
    """Run gender_guesser on the names returning a percentage
       of female speakers, rounded to 2 decimal places."""
    detector = gender.Detector()
    gender_counter = Counter([detector.get_gender(name)
                              for name in first_names])
    perc = (gender_counter['female'] +
            gender_counter['mostly_female']) * 100. / len(first_names)
    return round(perc, 2)
Beispiel #23
0
def select_en_base():
    debug = 2
    conn = database.create_connection()

    cur = database.query_create_select(conn, "Select * From nom_des_voies;")

    stopwords = [
        'la', 'le', 'des', 'de', 'Père', 'point', 'Saint', 'Place', 'Rue',
        'Avenue', 'Allée', 'Quai', 'Rond', 'Chemin', 'Passage', 'Cours',
        'Boulevard', 'Impasse', 'Général', 'Lieutenant', 'Route', 'Cour',
        'Galerie', 'Président', 'Prosper', 'ème', 'Régiment', 'Jardin',
        'Champ', 'La', 'Le', 'et', 'Lys', 'Docteur', 'ter', 'Capitaine',
        'Parc', 'Square', 'Stade', 'bis', 'Voie', 'Pont', 'Commandant',
        'Sainte', 'Colonel', 'Espace'
    ]

    for ligne in cur:
        try:
            if debug == 1:
                print(ligne[0], ligne[1])
            prenom = ligne[1].split(" ")[1]
            if prenom in stopwords:
                prenom = ligne[1].split(" ")[2]
                if prenom in stopwords:
                    prenom = ligne[1].split(" ")[3]

            nom = ligne[1].split(" ")[2]
            if debug == 1:
                print(prenom, nom)
            d = sc.check_genre(prenom)
            #            data = d.get_gender(prenom)
            if debug == 1:
                print(f'prenom :{prenom} genre:{d}')

            # Mise à jour du genre en base données
            requete = "update nom_des_voies set genre = '" + d + "' Where voie_id = " + str(
                ligne[0]) + ";"
            try:
                database.query_create_select(conn, requete)
            except:
                print("Erreur")

            if debug == 1:
                print(requete)

        except IndexError:
            print("Juste un nom dans la rue")
            print(ligne[1])
            d = gender.Detector()
            data = d.get_gender(prenom)
            if debug == 2:
                print(f'prenom :{prenom} genre:{data}')

            # Mise à jour du genre en base données
            requete = "update nom_des_voies set genre = '" + data + "' Where voie_id = " + str(
                ligne[0]) + ";"
Beispiel #24
0
def guess_gender(author):
    d = gender.Detector()
    names = author.split(" ")
    first_name = names[0] if names is not None else ""
    gender_return = d.get_gender(
        first_name
    )  # this returns male, female, unknown or andy
    if gender_return not in ["male", "female"]:
        gender_return = search_person_for_gender(author)
    return gender_return
def guess_gender(goodreads_data, gender_col="gender"):
    d = gender.Detector()
    goodreads_data["First.Name"] = [
        name[0] if name != "" else ""
        for name in goodreads_data["Author"].str.split(" ").fillna("")
    ]
    goodreads_data.loc[:, gender_col] = [
        d.get_gender(name) for name in goodreads_data["First.Name"]
    ]
    return goodreads_data
Beispiel #26
0
	def get_author_gender(self,name):
		# Guess
		import re
		import gender_guesser.detector as gender
		isnt_this_problematic_just_to=gender.Detector()
		genders = [isnt_this_problematic_just_to.get_gender(x) for x in re.split('\W',self.author)]
		for x in genders:
			if x!='unknown':
				return x
		return 'unknown'
Beispiel #27
0
def get_percentage_of_female_speakers(first_names):
    """Run gender_guesser on the names returning a percentage
       of female speakers (female and mostly_female),
       rounded to 2 decimal places."""
    d = gender.Detector()
    femals = 0
    for name in first_names:
        if d.get_gender(name) == 'female' or d.get_gender(name) == 'mostly_female':
            femals += 1
    return round((femals / len(first_names)) * 100, 2)
Beispiel #28
0
 def show_gender_by_year(self, articles):
     d = gender.Detector()
     articles['pub_date'] = pd.to_datetime(articles['pub_date'])
     articles['pub_date'] = articles['pub_date'].dt.year
     ay = articles.loc[articles.author.notnull(), ['author', 'pub_date']]
     ay['name'] = ay.author.str.split(' ', expand=True)[0]
     ay['gender'] = ay['name'].apply(d.get_gender)
     ay = ay.loc[ay.gender != 'unknown', ['pub_date', 'gender']]
     ay = ay.groupby('pub_date').gender.value_counts()
     return ay
    def __init__(self, text):
        # Male homonyms
        self.male_title = ['mr.', 'sir', 'monsieur', 'captain', 'chief', 'master', 'lord', 'baron', 'mister', 'mr',
                           'prince', 'king']
        # Female homonyms
        self.female_title = ['mrs.', 'ms.', 'miss', 'lady', 'mademoiselle', 'baroness', 'mistress', 'mrs', 'ms',
                             'queen', 'princess', 'madam', 'madame']
        self.detector = gender.Detector()

        # base names for removing
        self.all_names = self.all_possible_names(spacy_names(text))
Beispiel #30
0
def processData():
	df = pd.read_csv('Dumitrescu_Gabriel_Horia_train.csv')
	
	d = gender.Detector()
	df['Owner Sex'] =  np.vectorize(get_binary_gender)(d, df['Owner Name'])
			
	df.drop(['Owner Name'], axis=1, inplace=True)
	
	df.fillna(df.mean(), inplace=True)

	return df