コード例 #1
0
ファイル: mushroom.py プロジェクト: StartE/mlbench
def convert(raw_dir, max_features):
    dataset_dict =  util.convert_uci_classif( info, raw_dir, file_name, y_first=True )
    
    # Feature 15 is constant. Thus useless.
    # Feature 4 is the most important feature (according to random forest). 
    # But with it, the problem is too easy i.e., most decent learning algorithms classify the test perfectly.
    return util.remove_features( dataset_dict, [4,15] ) 
コード例 #2
0
ファイル: ml_prove.py プロジェクト: StartE/mlbench
def convert(raw_dir, max_features):
    """
    returns a dictionary containing the required fields for the dataset.
    """
    data_dir = os.path.join( raw_dir, 'ml-prove' )
    file_name_list = ['test.csv', 'validation.csv', 'train.csv' ]
    dataset_dict =  util.convert_uci_classif( info, data_dir, file_name_list )
    
    return util.remove_features(dataset_dict, [51,53])
コード例 #3
0
def convert(raw_dir, max_features):
    """
    returns a dictionary containing the required fields for the dataset.
    """
    data_dir = os.path.join(raw_dir, 'ml-prove')
    file_name_list = ['test.csv', 'validation.csv', 'train.csv']
    dataset_dict = util.convert_uci_classif(info, data_dir, file_name_list)

    return util.remove_features(dataset_dict, [51, 53])
コード例 #4
0
def convert(raw_dir, max_features):
    dataset_dict = util.convert_uci_classif(info,
                                            raw_dir,
                                            file_name,
                                            y_first=True)

    # Feature 15 is constant. Thus useless.
    # Feature 4 is the most important feature (according to random forest).
    # But with it, the problem is too easy i.e., most decent learning algorithms classify the test perfectly.
    return util.remove_features(dataset_dict, [4, 15])