# Deleting outliers as identified by EDA train.drop(train[(train['grlivarea'] > 4000) & (train['saleprice'] < 300000)].index, inplace=True) train_objs_num = len(train) y = train['saleprice'] dataset = pd.concat(objs=[train.drop(columns=['saleprice']), test], axis=0, ignore_index=True) all_data = dataset.copy() all_data.shape import Clean, Feature_Engineering, Simple_Stacking all_data = Clean.model(all_data, train_objs_num) all_data = Feature_Engineering.model(all_data) ## Apply Log tranformation to target variable as it is right skewed y = np.log1p(y) f_train = all_data[:train_objs_num] f_test = all_data[train_objs_num:] predictions = Simple_Stacking.model(f_train, y, f_test) final_predictions = predictions submission = pd.DataFrame({ 'Id': sub_index, 'SalePrice': final_predictions.astype(float) })