def validation_curve(): # Test decision tree using cross validation # Preprocess data data = pd.read_csv('./arrhythmia.data', header = None, na_values = '?') data = fill_na(data = data) features = data.columns.tolist()[:-1] target = data.columns.tolist()[-1] feature_types = implicit_feature_type_inferrence(data = data[features], num_unique_values = 3) train_set, test_set = train_test_split(data = data, train_fraction = 0.8, reindex = False, random_seed = 0) max_depth_cv = list() training_error_cv = list() test_error_cv = list() # Start cross-validation for i in range(2,21,2): tree_max_depth = i print("Tree Max Depth: %d" %tree_max_depth) max_depth_cv.append(tree_max_depth) tree = DecisionTree(tree_max_depth) training_error, test_error = cross_validation(data = data, features = features, target = target, feature_types = feature_types, model = tree, fold = 3, random_seed = 0) training_error_cv.append(training_error) test_error_cv.append(test_error) print("Training Error: %f" %training_error) print("Test Error: %f" %test_error) plot_curve(max_depth = max_depth_cv, training_error = training_error_cv, test_error = test_error_cv)
def _loss(self, predictions): with tf.name_scope("loss"): err = tf.square(predictions - self.labels) err_filled = utils.fill_na(err, 0) finite_count = tf.reduce_sum(tf.cast(tf.is_finite(err), tf.float32)) mse = tf.reduce_sum(err_filled) / finite_count return mse
def loss(predictions, labels, alpha=1.): err = tf.square(predictions - labels) err_filled = utils.fill_na(err, 0) finite_count = tf.reduce_sum(tf.cast(tf.is_finite(err), tf.float32)) mse = alpha * tf.reduce_sum(err_filled) / finite_count #mse = tf.reduce_mean(err) / 2 tf.add_to_collection('losses', mse/2.) return mse/2.
def _loss(self, predictions): with tf.name_scope("loss"): # if training then crop center of y, else, padding was applied slice_amt = (np.sum(self.filter_sizes) - len(self.filter_sizes)) / 2 slice_y = self.y_norm[:,slice_amt:-slice_amt, slice_amt:-slice_amt] _y = tf.cond(self.is_training, lambda: slice_y, lambda: self.y_norm) tf.subtract(predictions, _y) err = tf.square(predictions - _y) err_filled = utils.fill_na(err, 0) finite_count = tf.reduce_sum(tf.cast(tf.is_finite(err), tf.float32)) mse = tf.reduce_sum(err_filled) / finite_count return mse
def _loss(self, predictions): with tf.name_scope("loss"): # if training then crop center of y, else, padding was applied slice_amt = int((np.sum(self.filter_sizes) - len(self.filter_sizes)) / 2) slice_y = self.y_norm[:, slice_amt:-slice_amt, slice_amt:-slice_amt] _y = tf.cond(self.is_training, lambda: slice_y, lambda: self.y_norm) # tf.subtract(predictions, _y) err = tf.square(predictions - _y) err_filled = utils.fill_na(err, 0) finite_count = tf.reduce_sum(tf.cast(tf.is_finite(err), tf.float32)) mse = tf.reduce_sum(err_filled) / finite_count return mse
import tensorflow as tf #from importlib import reload #import model #reload(model) from model import deepFM from utils import fill_na, preprocess # --------- prepare dataset ------- #read data dftrain = pd.read_csv('titanic-train.csv') dfeval = pd.read_csv('titanic-eval.csv') dftrain.info() #fill na values fill_na(dftrain) fill_na(dfeval) #preprocess dataset meta_cate = preprocess( dftrain, cate_cols=['sex', 'class', 'deck', 'embark_town', 'alone']) preprocess(dfeval, cate_cols=['sex', 'class', 'deck', 'embark_town', 'alone'], existed_cate=meta_cate) #target column y_train = dftrain.pop('survived') y_eval = dfeval.pop('survived') #transform dataframe to tensor trainset = tf.data.Dataset.from_tensor_slices(
matrix = feature_engineering.add_month_days(matrix) matrix = feature_engineering.add_seasons(matrix) matrix = feature_engineering.add_december_distance(matrix) matrix = feature_engineering.add_first_last_sale(matrix) # Check for time patterns utils.check_time_patterns(train) # Drop first 12 months matrix = matrix[matrix.date_block_num > 11] # Fill na values created from lags matrix = utils.fill_na(matrix) matrix.to_pickle('datasets/data.pkl') data = pd.read_pickle('datasets/data.pkl') # fill null values for LinearRegression to work data = utils.fill_null(data) ### Modelling X_train, Y_train, X_valid, Y_valid, X_test = modelling.split_tests(data) # Run Light GBM light_val_pred, light_test_pred = modelling.run_light_gbm( X_train, Y_train, X_valid, X_test) # Run Cat Boost