def impute_missing_age_values(ds: pd.DataFrame) -> pd.DataFrame: mean_age = ds.groupby(['Sex', 'Family']).Age.mean().round().astype(int) guessed_age = ds.apply(lambda x: mean_age[x.Sex, x.Family], axis=1) ds.Age = ds.apply(lambda x: guessed_age[x.PassengerId] if np.isnan(x.Age) else x.Age, axis=1) return ds
# 上节课我们学习了Series,DataFrame其实可以把每列拆成一个Series stu_names = student_3.Name print(stu_names) # 或者 stu_names = student_3['Name'] print(stu_names) # 提取出的Series中的元素可以继续索引出值 print(stu_names['a']) # DataFrame中的元素时候可以直接获得呢? print(student_3.Name['a']) # 也是可以的。 # DataFrame中的元素可以进行修改 print(student_1) student_1.Age = 20 # 一整列都被修改为20 print(student_1) student_1.ID = range(5) print(student_1) student_1.Name[0] = 'Kevin' print(student_1) # 请注意以上的几种修改方法 # DataFrame可以进行矩阵转置 student_1 = student_1.T print(student_1) # 可以经列表或者Series作为新列添加进DataFrame中么? Math = [90, 80, 82, 100, 96]
meanFare = dp.avgMethod("Fare") replaceNanInEmvarked = dp.replaceNanInEmvarked1("Embarked") t_MeanAge = dptest.avgMethod("Age") t_replaceNanInSex = dptest.replaceNanInSex("Sex") t_meanFare = dptest.avgMethod("Fare") t_replaceNanInEmvarked = dptest.replaceNanInEmvarked1("Embarked") #,Fare,Cabin,Embarked testFrame.PassengerId = testFrame.PassengerId.replace(np.nan, 1) testFrame.Pclass = testFrame.Pclass.replace(np.nan, 3) testFrame.Sex = testFrame.Sex.replace(np.nan, replaceNanInSex) testFrame.Sex = testFrame.Sex.replace('female', 0) testFrame.Sex = testFrame.Sex.replace('male', 1) testFrame.Age = testFrame.Age.replace(np.nan, 29) testFrame.Fare = testFrame.Fare.replace(np.nan, meanFare) #testFrame.Cabin = testFrame.Cabin.replace(np.nan,"unknown") testFrame.SibSp = testFrame.SibSp.replace(np.nan, 0) testFrame.Parch = testFrame.Parch.replace(np.nan, 0) testFrame.Embarked = testFrame.Embarked.replace(np.nan, replaceNanInEmvarked) testFrame.Embarked = testFrame.Embarked.replace('S', 1) testFrame.Embarked = testFrame.Embarked.replace('C', 2) testFrame.Embarked = testFrame.Embarked.replace('Q', 3) trainFrame.PassengerId = trainFrame.PassengerId.replace(np.nan, 1) #trainFrame.Survived = trainFrame.Survived.replace(np.nan,1) trainFrame.Pclass = trainFrame.Pclass.replace(np.nan, 3) trainFrame.Sex = trainFrame.Sex.replace(np.nan, replaceNanInSex) trainFrame.Sex = trainFrame.Sex.replace('female', 0) #z trainFrame.Sex = trainFrame.Sex.replace('male', 1)
sex = lambda x: 0 if x == 'male' else 1 age = lambda x: 1 if x <= 1 else 0.9 if x <= 15 else 0.8 if x < 20 else 0.5 if x <= 30 else 0.3 if x <= 50 else 0.6 if x <= 60 else 0.8 pclass = lambda x: 0 if x == 3 else 0.5 if x == 2 else 1 fare = lambda x: 0 if x < 8 else 0.3 if x < 15 else 0.5 if x < 31 else 0.7 if x < 100 else 1 embarked = lambda x: 0.5 if x == 'S' else 1 if x == 'Q' else 0.7 name = lambda x: 0 if 'Mr.' in x else 0.5 if 'Dr.' in x else 0.6 if 'Master.' in x else 0.8 if ( 'Mrs.' in x) or ('Miss' in x) or ('Lady' in x) or ('Ms' in x) else 0 sibsp = lambda x: x / 8 parch = lambda x: x / 6 print(trainFrame.describe()) #print(trainFrame.ix[:,0].value_counts()) # #,Fare,Cabin,Embarked trainFrame.Pclass = trainFrame.Pclass.apply(pclass) trainFrame.Sex = trainFrame.Sex.apply(sex) trainFrame.Age = trainFrame.Age.apply(age) trainFrame.Fare = trainFrame.Fare.apply(fare) trainFrame.Embarked = trainFrame.Embarked.apply(embarked) trainFrame.Name = trainFrame.Name.apply(name) trainFrame.SibSp = trainFrame.SibSp.apply(sibsp) trainFrame.Parch = trainFrame.Parch.apply(parch) testFrame.Pclass = testFrame.Pclass.apply(pclass) testFrame.Sex = testFrame.Sex.apply(sex) testFrame.Age = testFrame.Age.apply(age) testFrame.Fare = testFrame.Fare.apply(fare) testFrame.Embarked = testFrame.Embarked.apply(embarked) testFrame.Name = testFrame.Name.apply(name) testFrame.SibSp = testFrame.SibSp.apply(sibsp) testFrame.Parch = testFrame.Parch.apply(parch)