Exemple #1
0
def validation_curve():

    # Test decision tree using cross validation

    # Preprocess data
    data = pd.read_csv('./arrhythmia.data', header = None, na_values = '?')
    data = fill_na(data = data)

    features = data.columns.tolist()[:-1]
    target = data.columns.tolist()[-1]

    feature_types = implicit_feature_type_inferrence(data = data[features], num_unique_values = 3)

    train_set, test_set = train_test_split(data = data, train_fraction = 0.8, reindex = False, random_seed = 0)

    max_depth_cv = list()
    training_error_cv = list()
    test_error_cv = list()

    # Start cross-validation
    for i in range(2,21,2):
        tree_max_depth = i
        print("Tree Max Depth: %d" %tree_max_depth)
        max_depth_cv.append(tree_max_depth)
        tree = DecisionTree(tree_max_depth)

        training_error, test_error = cross_validation(data = data, features = features, target = target, feature_types = feature_types, model = tree, fold = 3, random_seed = 0)
        training_error_cv.append(training_error)
        test_error_cv.append(test_error)
        print("Training Error: %f" %training_error)
        print("Test Error: %f" %test_error)

    plot_curve(max_depth = max_depth_cv, training_error = training_error_cv, test_error = test_error_cv)
Exemple #2
0
 def _loss(self, predictions):
     with tf.name_scope("loss"):
         err = tf.square(predictions - self.labels)
         err_filled = utils.fill_na(err, 0)
         finite_count = tf.reduce_sum(tf.cast(tf.is_finite(err),
                                              tf.float32))
         mse = tf.reduce_sum(err_filled) / finite_count
         return mse
Exemple #3
0
def loss(predictions, labels, alpha=1.):
    err = tf.square(predictions - labels)
    err_filled = utils.fill_na(err, 0)
    finite_count = tf.reduce_sum(tf.cast(tf.is_finite(err), tf.float32))
    mse = alpha * tf.reduce_sum(err_filled) / finite_count
    #mse = tf.reduce_mean(err) / 2
    tf.add_to_collection('losses', mse/2.)
    return mse/2.
 def _loss(self, predictions):
     with tf.name_scope("loss"):
         # if training then crop center of y, else, padding was applied
         slice_amt = (np.sum(self.filter_sizes) - len(self.filter_sizes)) / 2
         slice_y = self.y_norm[:,slice_amt:-slice_amt, slice_amt:-slice_amt]
         _y = tf.cond(self.is_training, lambda: slice_y, lambda: self.y_norm)
         tf.subtract(predictions, _y)
         err = tf.square(predictions - _y)
         err_filled = utils.fill_na(err, 0)
         finite_count = tf.reduce_sum(tf.cast(tf.is_finite(err), tf.float32))
         mse = tf.reduce_sum(err_filled) / finite_count
         return mse
Exemple #5
0
    def _loss(self, predictions):
        with tf.name_scope("loss"):
            # if training then crop center of y, else, padding was applied
            slice_amt = int((np.sum(self.filter_sizes) - len(self.filter_sizes)) / 2)
            slice_y = self.y_norm[:, slice_amt:-slice_amt, slice_amt:-slice_amt]
            _y = tf.cond(self.is_training, lambda: slice_y, lambda: self.y_norm)

            # tf.subtract(predictions, _y)
            err = tf.square(predictions - _y)
            err_filled = utils.fill_na(err, 0)
            finite_count = tf.reduce_sum(tf.cast(tf.is_finite(err), tf.float32))
            mse = tf.reduce_sum(err_filled) / finite_count
            return mse
Exemple #6
0
import tensorflow as tf
#from importlib import reload
#import model
#reload(model)
from model import deepFM
from utils import fill_na, preprocess

# --------- prepare dataset -------

#read data
dftrain = pd.read_csv('titanic-train.csv')
dfeval = pd.read_csv('titanic-eval.csv')
dftrain.info()

#fill na values
fill_na(dftrain)
fill_na(dfeval)

#preprocess dataset
meta_cate = preprocess(
    dftrain, cate_cols=['sex', 'class', 'deck', 'embark_town', 'alone'])
preprocess(dfeval,
           cate_cols=['sex', 'class', 'deck', 'embark_town', 'alone'],
           existed_cate=meta_cate)

#target column
y_train = dftrain.pop('survived')
y_eval = dfeval.pop('survived')

#transform dataframe to tensor
trainset = tf.data.Dataset.from_tensor_slices(
Exemple #7
0
matrix = feature_engineering.add_month_days(matrix)

matrix = feature_engineering.add_seasons(matrix)

matrix = feature_engineering.add_december_distance(matrix)

matrix = feature_engineering.add_first_last_sale(matrix)

# Check for time patterns
utils.check_time_patterns(train)

# Drop first 12 months
matrix = matrix[matrix.date_block_num > 11]

# Fill na values created from lags
matrix = utils.fill_na(matrix)

matrix.to_pickle('datasets/data.pkl')
data = pd.read_pickle('datasets/data.pkl')

# fill null values for LinearRegression to work
data = utils.fill_null(data)

### Modelling
X_train, Y_train, X_valid, Y_valid, X_test = modelling.split_tests(data)

# Run Light GBM
light_val_pred, light_test_pred = modelling.run_light_gbm(
    X_train, Y_train, X_valid, X_test)

# Run Cat Boost