def impute_missing_age_values(ds: pd.DataFrame) -> pd.DataFrame:
    mean_age = ds.groupby(['Sex', 'Family']).Age.mean().round().astype(int)
    guessed_age = ds.apply(lambda x: mean_age[x.Sex, x.Family], axis=1)
    ds.Age = ds.apply(lambda x: guessed_age[x.PassengerId]
                      if np.isnan(x.Age) else x.Age,
                      axis=1)
    return ds
# 上节课我们学习了Series,DataFrame其实可以把每列拆成一个Series
stu_names = student_3.Name
print(stu_names)
# 或者
stu_names = student_3['Name']
print(stu_names)

# 提取出的Series中的元素可以继续索引出值
print(stu_names['a'])

# DataFrame中的元素时候可以直接获得呢?
print(student_3.Name['a'])  # 也是可以的。

# DataFrame中的元素可以进行修改
print(student_1)
student_1.Age = 20  # 一整列都被修改为20
print(student_1)

student_1.ID = range(5)
print(student_1)

student_1.Name[0] = 'Kevin'
print(student_1)
# 请注意以上的几种修改方法

# DataFrame可以进行矩阵转置
student_1 = student_1.T
print(student_1)

# 可以经列表或者Series作为新列添加进DataFrame中么?
Math = [90, 80, 82, 100, 96]
Esempio n. 3
0
meanFare = dp.avgMethod("Fare")
replaceNanInEmvarked = dp.replaceNanInEmvarked1("Embarked")

t_MeanAge = dptest.avgMethod("Age")
t_replaceNanInSex = dptest.replaceNanInSex("Sex")
t_meanFare = dptest.avgMethod("Fare")
t_replaceNanInEmvarked = dptest.replaceNanInEmvarked1("Embarked")

#,Fare,Cabin,Embarked

testFrame.PassengerId = testFrame.PassengerId.replace(np.nan, 1)
testFrame.Pclass = testFrame.Pclass.replace(np.nan, 3)
testFrame.Sex = testFrame.Sex.replace(np.nan, replaceNanInSex)
testFrame.Sex = testFrame.Sex.replace('female', 0)
testFrame.Sex = testFrame.Sex.replace('male', 1)
testFrame.Age = testFrame.Age.replace(np.nan, 29)
testFrame.Fare = testFrame.Fare.replace(np.nan, meanFare)
#testFrame.Cabin = testFrame.Cabin.replace(np.nan,"unknown")
testFrame.SibSp = testFrame.SibSp.replace(np.nan, 0)
testFrame.Parch = testFrame.Parch.replace(np.nan, 0)
testFrame.Embarked = testFrame.Embarked.replace(np.nan, replaceNanInEmvarked)
testFrame.Embarked = testFrame.Embarked.replace('S', 1)
testFrame.Embarked = testFrame.Embarked.replace('C', 2)
testFrame.Embarked = testFrame.Embarked.replace('Q', 3)

trainFrame.PassengerId = trainFrame.PassengerId.replace(np.nan, 1)
#trainFrame.Survived = trainFrame.Survived.replace(np.nan,1)
trainFrame.Pclass = trainFrame.Pclass.replace(np.nan, 3)
trainFrame.Sex = trainFrame.Sex.replace(np.nan, replaceNanInSex)
trainFrame.Sex = trainFrame.Sex.replace('female', 0)  #z
trainFrame.Sex = trainFrame.Sex.replace('male', 1)
Esempio n. 4
0
sex = lambda x: 0 if x == 'male' else 1
age = lambda x: 1 if x <= 1 else 0.9 if x <= 15 else 0.8 if x < 20 else 0.5 if x <= 30 else 0.3 if x <= 50 else 0.6 if x <= 60 else 0.8
pclass = lambda x: 0 if x == 3 else 0.5 if x == 2 else 1
fare = lambda x: 0 if x < 8 else 0.3 if x < 15 else 0.5 if x < 31 else 0.7 if x < 100 else 1
embarked = lambda x: 0.5 if x == 'S' else 1 if x == 'Q' else 0.7
name = lambda x: 0 if 'Mr.' in x else 0.5 if 'Dr.' in x else 0.6 if 'Master.' in x else 0.8 if (
    'Mrs.' in x) or ('Miss' in x) or ('Lady' in x) or ('Ms' in x) else 0
sibsp = lambda x: x / 8
parch = lambda x: x / 6
print(trainFrame.describe())
#print(trainFrame.ix[:,0].value_counts())
# #,Fare,Cabin,Embarked

trainFrame.Pclass = trainFrame.Pclass.apply(pclass)
trainFrame.Sex = trainFrame.Sex.apply(sex)
trainFrame.Age = trainFrame.Age.apply(age)
trainFrame.Fare = trainFrame.Fare.apply(fare)
trainFrame.Embarked = trainFrame.Embarked.apply(embarked)
trainFrame.Name = trainFrame.Name.apply(name)
trainFrame.SibSp = trainFrame.SibSp.apply(sibsp)
trainFrame.Parch = trainFrame.Parch.apply(parch)
testFrame.Pclass = testFrame.Pclass.apply(pclass)

testFrame.Sex = testFrame.Sex.apply(sex)
testFrame.Age = testFrame.Age.apply(age)
testFrame.Fare = testFrame.Fare.apply(fare)
testFrame.Embarked = testFrame.Embarked.apply(embarked)
testFrame.Name = testFrame.Name.apply(name)
testFrame.SibSp = testFrame.SibSp.apply(sibsp)
testFrame.Parch = testFrame.Parch.apply(parch)