google['oil_high'] = oil['High'] google['oil_low'] = oil['Low'] google['eur_myr'] = eur_myr['Unnamed: 1'] google['usd_myr'] = usd_myr['Unnamed: 1'] # In[4]: date_ori = pd.to_datetime(google.iloc[:, 0]).tolist() google.head() # In[5]: minmax = MinMaxScaler().fit(google.iloc[:, 4].values.reshape((-1, 1))) df_log = MinMaxScaler().fit_transform(google.iloc[:, 1:].astype('float32')) df_log = pd.DataFrame(df_log) df_log.head() # In[6]: thought_vector = autoencoder.reducedimension(df_log.values, 4, 0.001, 128, 100) # In[7]: thought_vector.shape # In[8]: num_layers = 1 size_layer = 128 timestamp = 5 epoch = 500
from IPython.display import display from sklearn.preprocessing import MinMaxScaler from sklearn.model_selection import cross_val_score from sklearn.linear_model import LogisticRegression from sklearn.preprocessing import LabelEncoder data_path = 'data/' df_train = pd.read_csv(data_path + 'titanic_train.csv') df_test = pd.read_csv(data_path + 'titanic_test.csv') train_Y = df_train['Survived'] ids = df_test['PassengerId'] df_train = df_train.drop(['PassengerId', 'Survived'], axis=1) df_test = df_test.drop(['PassengerId'], axis=1) df = pd.concat([df_train, df_test]) df.head() # 檢查 DataFrame 空缺值的狀態 def na_check(df_data): data_na = (df_data.isnull().sum() / len(df_data)) * 100 data_na = data_na.drop( data_na[data_na == 0].index).sort_values(ascending=False) missing_data = pd.DataFrame({'Missing Ratio': data_na}) display(missing_data.head(10)) na_check(df) # 以下 In[3]~In[10] 只是鐵達尼預測中的一組特徵工程, 並以此組特徵工程跑參數, 若更換其他特徵工程, In[10]的參數需要重新跑 # Sex : 直接轉男 0 女 1