import pandas as pd import pickle from sklearn.preprocessing import LabelEncoder data = pd.read_csv('https://raw.githubusercontent.com/dataprofessor/code/master/streamlit/part3/penguins_cleaned.csv') df=data.copy() encoder_species=LabelEncoder() encoder_island=LabelEncoder() encoder_sex=LabelEncoder() df['species']=encoder_species.fit_transform(df['species']) df['island']=encoder_island.fit_transform(df['island']) df['sex']=encoder_sex.fit_transform(df['sex']) def encoder_save(): pickle.dump(encoder_species, open('penguins_enc_sp.pkl', 'wb')) pickle.dump(encoder_island, open('penguins_enc_is.pkl', 'wb')) pickle.dump(encoder_sex, open('penguins_enc_sx.pkl', 'wb')) encoder_save() x=df.drop('species',axis=1) y=df['species'] from sklearn.ensemble import RandomForestClassifier model=RandomForestClassifier() model.fit(x,y) pickle.dump(model, open('penguins_model.pkl', 'wb')) print('finished')
# Multiple Linear Regression # Importing the libraries import numpy as np import matplotlib.pyplot as plt import pandas as pd # Importing the dataset dataset = pd.read_csv('50_Startups.csv') X = dataset.iloc[:, :-1].values y = dataset.iloc[:, 4].values # Encoding categorical data from sklearn.preprocessing import LabelEncoder, OneHotEncoder labelencoder = LabelEncoder() X[:, 3] = labelencoder.fit_transform(X[:, 3]) onehotencoder = OneHotEncoder(categorical_features=[3]) X = onehotencoder.fit_transform(X).toarray() # Avoiding the Dummy Variable Trap X = X[:, 1:] # Splitting the dataset into the Training set and Test set from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)