Ejemplo n.º 1
0
import numpy as np
import pandas as pd
import gc
import dataproc as dproc
from sklearn.preprocessing import OneHotEncoder

do_prediction = False
loaded_test = False
########################### Data and Parameters Import ##########################
target_map, label_features, all_classes, all_class_weights \
    = dproc.getDataParameters()

train_meta, test_meta_data = dproc.getMetaData()
train = pd.read_csv('input/training_set.csv')
train_full, train_features = dproc.getFullData(train, train_meta, perpb=False)
del train
gc.collect()
#train_full[['object_id', 'period']].to_csv("train_periods.csv", index=False)
#target_id list: will be used in one hot encoding of labels
all_clmap_vals = np.array(list(target_map.values()))
print("Train Feats: {0}".format(train_features))
print("Train Data All COLS: {0}".format(train_full.columns))

calc_feats = ['period', 'power', 'Eta_e']
label_lc_feats = ['object_id']
for f in calc_feats:
    for i in range(6):
        label_lc_feats.append(f + '--' + str(i))

Ejemplo n.º 2
0
import dataproc as dproc
from keras import regularizers
from sklearn.model_selection import StratifiedKFold
import numpy as np
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import metrics
import seaborn as sns
#import matplotlib.pyplot as plt   
import pandas as pd 

target_map, label_features, all_classes, all_class_weights \
    = dproc.getDataParameters()

train_meta, test_meta_data = dproc.getMetaData()
train_full = pd.read_csv('input/training_set.csv')
train_full, train_features = dproc.getFullData(ts_data=train_full,
                                               meta_data=train_meta)

train_mean = train_full.mean(axis=0)
train_full.fillna(train_mean, inplace=True)
# this is the size of our encoded representations
encoding_dim = 64

# this is our input placeholder
input_layer = Input(shape=(len(train_features),))
# "encoded" is the encoded representation of the input
encoded = Dense(256, activation='relu')(input_layer)
encoded = Dropout(0.5)(encoded)
encoded = Dense(128, activation='relu')(encoded)
encoded = Dropout(0.5)(encoded)
encoded = Dense(64, activation='relu')(encoded)
encoded = Dropout(0.5)(encoded)
Ejemplo n.º 3
0
import seaborn as sns
import lightgbm as lgb
#from catboost import Pool, CatBoostClassifier
import itertools
import pickle, gzip
import glob
from sklearn.preprocessing import StandardScaler
import dataproc as dproc

gc.enable()

target_map, label_features, all_classes, all_class_weights \
    = dproc.getDataParameters()
train_meta, test_meta_data = dproc.getMetaData()
train = pd.read_csv('training_set.csv')
train_full, train_features = dproc.getFullData(train, train_meta)

if 'target' in train_full:
    y = train_full['target']
#    del full_train['target']
#    del full_train['target_id']
classes = sorted(y.unique())

# Taken from Giba's topic : https://www.kaggle.com/titericz
# https://www.kaggle.com/c/PLAsTiCC-2018/discussion/67194
# with Kyle Boone's post https://www.kaggle.com/kyleboone
class_weight = {c: 1 for c in classes}
for c in [64, 15]:
    class_weight[c] = 2

print('Unique classes : ', classes)
Ejemplo n.º 4
0
import gc
import dataproc as dproc
from sklearn.preprocessing import OneHotEncoder
import lightgbm as lgbm
import xgboost as xgb
from model_train import trainLGB, trainLGBXGB

do_prediction = True
loaded_test = False
########################### Data and Parameters Import ##########################
target_map, label_features, all_classes, all_class_weights \
    = dproc.getDataParameters()

train_meta, test_meta_data = dproc.getMetaData()
train = pd.read_csv('input/training_set.csv')
train_full, train_features = dproc.getFullData(train, train_meta)
del train
gc.collect()

#train_full[['object_id', 'period']].to_csv("train_periods.csv", index=False)
#target_id list: will be used in one hot encoding of labels
all_clmap_vals = np.array(list(target_map.values()))
print("Train Feats: {0}".format(train_features))
print("Train Data All COLS: {0}".format(train_full.columns))

calc_feats = ['period', 'power', 'Eta_e']
label_lc_feats = ['object_id']
for f in calc_feats:
    for i in range(6):
        label_lc_feats.append(f + '--' + str(i))