コード例 #1
0
ファイル: import_data.py プロジェクト: Sun-Kev/MACS30200proj
def run_pred_census_ln(subset_df, census_year):
    """
    This function takes a dataframe of teacher information and 
    runs the Pred Census Ln Function.

    Input:
    	- subset_df: a dataframe that is a subset of teacher information
    Output:
    	- df: a dataframe with a predicted race imputation 
    """
    has_last_name_df = subset_df[subset_df.teacher_last.notnull()].copy()
    df = pred_census_ln(has_last_name_df, 'teacher_last', census_year)

    #recode two race categories
    recode_dict = {'api': 'asian'}

    # replace values using generalized race categories
    df.replace(to_replace=recode_dict, value=None, inplace=True)

    # keep only the race column
    cols_to_keep = ['race']
    df = df[cols_to_keep]
    df.rename(columns={'race': 'census_lastname'}, inplace=True)

    return df
コード例 #2
0
def guessActorInfo(actor_name):
    # If NNDB bio page not found then use SexMachine or ethnicolr module to guess gender/race
    print("\t\tNNDB bio page not found...Using Python modules to guess")
    first_name = actor_name.split(" ")[0]
    gender_guessed = gender_detector.get_gender(first_name)
    if gender_guessed == "andy":
        print("\t\tUsing getGenders utility")
        # gender_guessed = getGenders(first_name)[0][0]
        gender_guessed = "Male"
    
    if gender_guessed in gender_table.keys():
        gender = gender_table[gender_guessed]
    else:
        gender = "Others"

    last_name = " ".join(actor_name.split(" ")[1:])
    last_name_df = pd.DataFrame([{'name': last_name}])
    race_guessed = pred_census_ln(last_name_df, 'name')["race"][0]
    if race_guessed in race_table.keys():
        race = race_table[race_guessed]
    else:
        race = "Others"
    print("\t\tGuess:(" + gender_guessed + "," + race_guessed + ")")
    return (gender, race)
コード例 #3
0
def predict_ethnicity_1(df: pd.DataFrame, name_attr: str) -> pd.DataFrame:
    df1 = ec.pred_census_ln(df, name_attr, year=2010).iloc[:, -4:]
    df1.columns = ['asian', 'black', 'hispanic', 'white']
    df1.index = df.index
    df.drop(columns='race', inplace=True)
    return df1
コード例 #4
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

import pandas as pd

from ethnicolr import census_ln, pred_census_ln

names = [{'name': 'smith'}, {'name': 'zhang'}, {'name': 'jackson'}]

df = pd.DataFrame(names)

print(df)

print(census_ln(df, 'name'))

print(census_ln(df, 'name', 2010))

print(pred_census_ln(df, 'name'))
コード例 #5
0
logging.info("ending the get username program at " +
             str(datetime.datetime.now()))
logging.info("total: " + str(count))
myFile.close

#adding race prediction
logging.info("adding race prediction")
print("adding race prediction")
df = pd.read_csv("tmp_" + filename,
                 usecols=[
                     'username', 'firstname', 'lastname', 'groupname',
                     'recordingTime', 'num_record', 'num_groupuser'
                 ])
df = pd.DataFrame(df)
dict = {}
result = pred_census_ln(df, 'lastname')
result.to_csv(filename, sep=',')
logging.info("finished race prediction")
print("finished race prediction")
print(filename)

#f.write(str(datetime.datetime.now())+","+str(count)+","+str(client.get_participants(channel,limit = 0).total)+","+channel[13:])
#f.close()

#adding to database

#filename = "dx_chain_user_first.csv"
#groupname = "dxchain"

#connect to database
print("connecting db")
コード例 #6
0
ファイル: name.py プロジェクト: jaemoonseok/Race_Predictor
import pandas as pd
import numpy as np
import csv
import sys
from ethnicolr import census_ln, pred_census_ln

filepath = sys.argv[1]
file = pd.read_csv(filepath)

dataframe_stat = pred_census_ln(file, 'LAST NAME', 2010)
refreshed_dataframe = dataframe_stat[['FIRST NAME', 'LAST NAME', 'race']]
refreshed_dataframe.columns = ['FIRST NAME', 'LAST NAME', 'ETHNICITY']
refreshed_dataframe.to_csv('Consumer_Data_10394_Sample2.csv', index=False)