Python categorical Exemples, scikits.statsmodels.api.categorical Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : py_stats_analysis.py Projet : along1x/r_vs_py

def age_design(indices):
  tmp = np.hstack((sm.categorical(hrdat['sex'][indices])[:,2:],
                  sm.categorical(hrdat['educ'][indices])[:,2:],
                  sm.categorical(hrdat['PTFT'][indices])[:,2:],
                  hrdat['age'].reshape(n,1)[indices,:],
                  (hrdat['age']**2).reshape(n,1)[indices,:]))
  return sm.add_constant(tmp, prepend = True)

Exemple #2

0

Afficher le fichier

def age_design(indices):
  tmp = np.hstack((sm.categorical(hrdat['sex'][indices])[:,2:],
                  sm.categorical(hrdat['educ'][indices])[:,2:],
                  sm.categorical(hrdat['PTFT'][indices])[:,2:],
                  hrdat['age'].reshape(n,1)[indices,:],
                  (hrdat['age']**2).reshape(n,1)[indices,:]))
  return sm.add_constant(tmp, prepend = True)

Exemple #3

0

Afficher le fichier

Fichier : py_stats_analysis.py Projet : flashus/r_vs_py

def age_design(indices):
    tmp = np.hstack(
        (
            sm.categorical(hrdat["sex"][indices])[:, 2:],
            sm.categorical(hrdat["educ"][indices])[:, 2:],
            sm.categorical(hrdat["PTFT"][indices])[:, 2:],
            hrdat["age"].reshape(n, 1)[indices, :],
            (hrdat["age"] ** 2).reshape(n, 1)[indices, :],
        )
    )
    return sm.add_constant(tmp, prepend=True)

Exemple #4

0

Afficher le fichier

Fichier : py_stats_analysis.py Projet : flashus/r_vs_py

X1 = hrdat["sex"] == 2
X1 = sm.add_constant(X1, prepend=True)
model1 = sm.WLS(np.log(hrdat["hrwage"]), X1, weights=hrdat["A_ERNLWT"])
results1 = model1.fit()

print results1.summary()


# Pre-defining model matrix components for more complicated models
# dat_mat is DATa model MATtrices
n = len(hrdat)
dat_mat = {}
dat_names = {}
factor_vars = ["sex", "educ", "PTFT", "ind", "occ", "marstat", "GEDIV", "race", "hispanic", "disabled"]
for name in factor_vars:
    dat_mat[name], dat_names[name] = sm.categorical(hrdat[name], dictnames=True)
    dat_mat[name] = dat_mat[name][:, 2:]
dat_mat["age"] = hrdat["age"].reshape(n, 1)
dat_mat["age^2"] = (hrdat["age"] ** 2).reshape(n, 1)
dat_mat["const"] = np.ones((n, 1))
dat_names["age"] = ["age"]
dat_names["age^2"] = ["age^2"]
dat_names["const"] = ["const"]

for name in factor_vars:
    fact_names = sorted(dat_names[name].values())[1:]
    dat_names[name] = ["".join([name, str(val)]) for val in fact_names]

# helper function to spit out design matrix and names

Exemple #5

0

Afficher le fichier

Fichier : py_stats_analysis.py Projet : along1x/r_vs_py

X1 = hrdat['sex']==2
X1 = sm.add_constant(X1, prepend=True)
model1 = sm.WLS(np.log(hrdat['hrwage']), X1, weights = hrdat['A_ERNLWT'])
results1 = model1.fit()

print results1.summary()




#More complicated model, log(hrwage)~sex+educ+age+PTFT
n = len(hrdat)
logwage = np.log(hrdat['hrwage'])
w = hrdat['A_ERNLWT']

X2 = np.hstack((sm.categorical(hrdat['sex'])[:,2:],
                sm.categorical(hrdat['educ'])[:,2:],
                hrdat['age'].reshape(n,1),
                sm.categorical(hrdat['PTFT'])[:,2:]))

X2 = sm.add_constant(X2, prepend=True)
model2 = sm.WLS(logwage, X2, weights = w)
results2 = model2.fit()

print results2.summary()



#Now include ind and occ (industry and occupation codings)
X2_5 = np.hstack((sm.categorical(hrdat['sex'])[:,2:],
                    sm.categorical(hrdat['educ'])[:,2:],

Exemple #6

0

Afficher le fichier

def getInstructionIdentifier(array_str, array_nr, str):
    for x in range(0, array_str.size - 1):
        if array_str[x] == str:
            return array_nr[x]
    print("Error could not find instruction")
    return -100


def column(matrix, i):
    return [row[i] for row in matrix]


#We make all strings to an identification integer
#X_Dictionary_str = numpy.genfromtxt("input/input_train.csv",dtype=str)
X_Dictionary_str = numpy.genfromtxt("input/input_Dict.csv", dtype=str)
X_Dictionary = categorical(X_Dictionary_str, drop=True)
X_Dictionary = X_Dictionary.argmax(1)

#X_train_str = numpy.genfromtxt("input/input_train.csv",dtype=str)
#X_train = categorical(X_train_str, drop=True)
#X_train = X_train.argmax(1)
X_train_str = X_Dictionary_str
X_train = X_Dictionary

#Target power and performance values
Y_train = numpy.loadtxt("input/input_target.csv", delimiter=",")

#Test
X_test_str = numpy.genfromtxt("input/input_test.csv", dtype=str)
X_test = numpy.zeros(X_test_str.size)
for x in range(0, X_test_str.size - 1):

Exemple #7

0

Afficher le fichier

def getInstructionIdentifier(array_str, array_nr, str):
    for x in range(0, array_str.size - 1):
        if array_str[x] == str:
            return array_nr[x]
    print "instruction number is ", x
    print("Error could not find instruction")


def column(matrix, i):
    return [row[i] for row in matrix]


#We make all strings to an identification integer
X_Dictionary_str = numpy.genfromtxt("input/input_train.csv", dtype=str)
X_Dictionary = categorical(X_Dictionary_str, drop=True)
X_Dictionary = X_Dictionary.argmax(1)

X_train_str = numpy.genfromtxt("input/input_train.csv", dtype=str)
X_train = categorical(X_train_str, drop=True)
X_train = X_train.argmax(1)

#Target power and performance values
Y_train = numpy.loadtxt("input/input_target.csv", delimiter=",")

#Test
X_test_str = numpy.genfromtxt("input/input_test.csv", dtype=str)
X_test = numpy.zeros(X_test_str.size)
for x in range(0, X_test_str.size - 1):
    X_test[x] = getInstructionIdentifier(X_train_str, X_train, X_test_str[x])
#Float

Exemple #8

0

Afficher le fichier

X1 = hrdat['sex']==2
X1 = sm.add_constant(X1, prepend=True)
model1 = sm.WLS(np.log(hrdat['hrwage']), X1, weights = hrdat['A_ERNLWT'])
results1 = model1.fit()

print results1.summary()




#More complicated model, log(hrwage)~sex+educ+age+PTFT
n = len(hrdat)
logwage = np.log(hrdat['hrwage'])
w = hrdat['A_ERNLWT']

X2 = np.hstack((sm.categorical(hrdat['sex'])[:,2:],
                sm.categorical(hrdat['educ'])[:,2:],
                hrdat['age'].reshape(n,1),
                sm.categorical(hrdat['PTFT'])[:,2:]))

X2 = sm.add_constant(X2, prepend=True)
model2 = sm.WLS(logwage, X2, weights = w)
results2 = model2.fit()

print results2.summary()



#Now include ind and occ (industry and occupation codings)
X2_5 = np.hstack((sm.categorical(hrdat['sex'])[:,2:],
                    sm.categorical(hrdat['educ'])[:,2:],