def convert(raw_dir, max_features): dataset_dict = util.convert_uci_classif( info, raw_dir, file_name, y_first=True ) # Feature 15 is constant. Thus useless. # Feature 4 is the most important feature (according to random forest). # But with it, the problem is too easy i.e., most decent learning algorithms classify the test perfectly. return util.remove_features( dataset_dict, [4,15] )
def convert(raw_dir, max_features): """ returns a dictionary containing the required fields for the dataset. """ data_dir = os.path.join( raw_dir, 'ml-prove' ) file_name_list = ['test.csv', 'validation.csv', 'train.csv' ] dataset_dict = util.convert_uci_classif( info, data_dir, file_name_list ) return util.remove_features(dataset_dict, [51,53])
def convert(raw_dir, max_features): """ returns a dictionary containing the required fields for the dataset. """ data_dir = os.path.join(raw_dir, 'ml-prove') file_name_list = ['test.csv', 'validation.csv', 'train.csv'] dataset_dict = util.convert_uci_classif(info, data_dir, file_name_list) return util.remove_features(dataset_dict, [51, 53])
def convert(raw_dir, max_features): dataset_dict = util.convert_uci_classif(info, raw_dir, file_name, y_first=True) # Feature 15 is constant. Thus useless. # Feature 4 is the most important feature (according to random forest). # But with it, the problem is too easy i.e., most decent learning algorithms classify the test perfectly. return util.remove_features(dataset_dict, [4, 15])