Exemple #1
0
cat_columns2 = ["PassengerId","Name","Embarked"]
cat_columns3 = ["Fare","Cabin"]
cat_columns4 = ["Cabin"]


num_pipeline = Pipeline(
    [
        # ("columns",DropColumns(cat_columns)),
        ('imputer',  SimpleImputer(strategy="median")),
        ('std_scaler',RobustScaler(1,99)),
        
    ]
)
cat_pipeline = Pipeline(
    [     
        ("columns",DropColumns(cat_columns2))   
        # ('imputer_cat', SimpleImputer( strategy="most_frequent")),        
        # ("cat3",OneHotEncoder(handle_unknown="ignore", sparse= False))
    ]
)
cat_pipeline2 = Pipeline(
    [        
        ('cabins', AddFeaturesFromCat()),        
    ]
)
cat_pipeline3 = Pipeline(
    [        
        ('decks', GetDeck()),        
        ("decks2",OneHotEncoder(handle_unknown="ignore", sparse= False))
    ]
)
Exemple #2
0
    ("cat3", OneHotEncoder(handle_unknown="ignore", sparse=False))
])
cat_transform_pipeline = Pipeline([
    ('cabin', GetDeck()),
    # ('decks', Combine()),
    # ("decks2",OneHotEncoder(handle_unknown="ignore", sparse= False))
    # ("decks2",OrdinalEncoder())
])
transformer = ColumnTransformer([
    ("num", num_pipeline, num_columns),
    ("num_transform", num_to_transform_pipeline, num_columns_to_transform),
    ("cat_stay", cat_stay_pipeline, cat_columns_stay),
    ("cat_transform", cat_transform_pipeline, cat_columns_to_transform)
])

full_pipeline = Pipeline([("1", DropColumns(columns_to_drop)),
                          ("2", transformer)])
# train_data.drop(["PassengerId","Name"], axis= 1, inplace= True)
#%%
# split = StratifiedShuffleSplit(n_splits=1,test_size=0.3,random_state=42)
# for train_index, test_index in split.split(train_data,train_data["Sex"]):
#     train_data_train = train_data.iloc[train_index]
#     val_data = train_data.iloc[test_index]

X_train_data = train_data.drop("Survived", axis=1)
y_train_data = train_data["Survived"]
X_train, X_val, y_train, y_val = train_test_split(X_train_data,
                                                  y_train_data,
                                                  random_state=42,
                                                  test_size=0.1)
#%%
Exemple #3
0
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import KNNImputer
from sklearn.impute import SimpleImputer
from sklearn.model_selection import GridSearchCV
from lightgbm import LGBMClassifier
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.neural_network import MLPClassifier
from titanic_functions import plot_learning_curves, get_num_of_cabins, get_real_fare, get_deck, DropColumns, AddFeaturesFromCat, GetDeck, print_results

here = os.path.dirname(os.path.abspath(__file__))

d = {
    'col1': [1, 10, 5, 2, 17, 3, 27],
    'col2': ['a', 'b', 'np.nan', 'c', 'd', 'e', 'f'],
    'col3': ['sd', 'asdb', 'asdas', 'cgh', 'fgh', 'fgh', 'werf'],
    'col4': [0.2, 0.10, 0.35, 0.52, 0.6917, 30, 0.627]
}
data = pd.DataFrame(data=d)
data
#%%
pipeline1 = Pipeline([("1", OneHotEncoder(handle_unknown='ignore',
                                          sparse=False))])
pipeline2 = Pipeline([("1", DropColumns(["col1"]))])
transformer = ColumnTransformer([
    # ("p1",pipeline1,data.columns),
    ("p2", pipeline2, data.columns)
])
full = Pipeline([("f1", transformer)])
data_prepared = full.fit_transform(data)
type(data_prepared)