import pandas as pd import numpy as np #from sklearn.metrics import roc_auc_score import gc gc.enable() from dtypes import dtypes import time from sklearn.preprocessing import LabelEncoder num_columns = [key for key, value in dtypes.items() if value is not 'category'] num_types = { key: value for key, value in dtypes.items() if value is not 'category' } print('Load numeric features') train = pd.read_csv('train.csv', dtype=num_types, usecols=num_columns, low_memory=True, nrows=None) num_columns.remove('HasDetections') test = pd.read_csv('test.csv', dtype=num_types, usecols=num_columns, low_memory=True, nrows=None) num_columns += ['HasDetections'] train = train.replace(np.nan, 0) test = test.replace(np.nan, 0)
import pandas as pd import numpy as np #from sklearn.metrics import roc_auc_score import gc gc.enable() from dtypes import dtypes import time from sklearn.preprocessing import LabelEncoder from keras.models import Sequential from keras.layers import Dense, Embedding cat_columns = [key for key, value in dtypes.items() if value is 'category'] cat_types = {key: value for key, value in dtypes.items() if value is 'category'} cat_columns.remove('MachineIdentifier') cat_types.pop('MachineIdentifier', None) # cat_columns.remove('HasDetections') # print(cat_types) # exit() print('Load category features') train = pd.read_csv('train.csv', dtype=cat_types, usecols=cat_columns, low_memory=True, nrows=None) # exit() test = pd.read_csv('test.csv', dtype=cat_types, usecols=cat_columns, low_memory=True, nrows=None) print('Transform category features') col = 1 total_cols = len(cat_columns) train = train.replace(np.nan, 0) test = test.replace(np.nan, 0)