Ejemplo n.º 1
0
    def read_data(self):
        data_set = Dataset(self.train_file_name,
                           self.test_file_name,
                           base_dir=self.data_dir)

        self.train_X, self.train_Y, self.test, self.train_features, self.cate_features = data_set.preprocess(
            reload=True)
Ejemplo n.º 2
0
    def read_data(self):
        self.set_train_test_bool = True
        data_set = Dataset(self.train_file_name,
                           self.test_file_name,
                           base_dir=self.data_dir)

        self.train_X, self.train_Y, self.test, self.train_features, self.cate_features = data_set.preprocess(
            reload=True)
        # default stratified values
        self.stratified_values = self.train_Y.values
Ejemplo n.º 3
0
    def set_train_outlier(self):
        dataset = Dataset(train_path=self.train_file, test_path=self.test_file)

        self.train_X, self.train_Y, self.test, self.features, self.cate_features = dataset.preprocess(
            reload=True)
        self.train_X['target'] = self.train_Y

        if 'outliers' not in self.train_X.columns:
            dataset.set_outlier_col(self.train_X)

        train_df = self.train_X[self.train_X['outliers'] == 0]

        target = train_df['target']
        del train_df['target']

        return train_df, target
Ejemplo n.º 4
0
 def load_data(self):
     dataset = Dataset('train_agg_id1.csv', 'test_agg_id1.csv')
     self.train_X, self.target, self.test, self.features, self.cate_features = dataset.preprocess(
         reload=True)
Ejemplo n.º 5
0
            param_dict[key] = params[index]
        # set some non-numeric parameters too
        for key in self.non_numeric_param:
            param_dict[key] = self.non_numeric_param[key]

        print(param_dict)
        cv_error, _ = self._train(param_dict)
        return cv_error, param_dict

    def predict(self):
        pass


if __name__ == "__main__":

    dataset = Dataset(train_path='df_train_agg1.csv',
                      test_path='df_test_agg1.csv')
    dataset.format_transformer(
        train_file_name='alltrainffm_agg1.txt',
        test_file_name='alltestffm_agg1.txt',
        fields=[
            'feature_1', 'feature_2', 'feature_3', 'elapsed_time',
            'hist_month_lag_max', 'hist_category_1_sum', 'hist_weekend_sum',
            'hist_category_3_mean_mean', 'hist_category_1_sum',
            'hist_category_1_mean', 'hist_authorized_flag_sum',
            'hist_authorized_flag_mean', 'hist_purchase_date_max'
        ])

    model = FFMModel(debug=True,
                     train_name='alltrainffm_agg1.txt',
                     test_name='alltestffm_agg1.txt')
    model.train()