Esempio n. 1
0
File: exp.py Progetto: eliavw/aaai20
def collect_results(dataset,
                    q_codes,
                    results: Dict[str, List[float]],
                    identifier='weka',
                    q_idx: List[int] = None,
                    **extra_fields):
    # Init
    df = pd.DataFrame()

    nb_qry, n_att = q_codes.shape
    miss_encoding = encode_attribute(2, [0], [1])
    n_miss = np.sum((q_codes == miss_encoding), axis=1)

    # Build DataFrame

    df['missing_percentage'] = n_miss / n_att
    df['difficulty'] = _convert_percentage_missing_to_difficulty(
        df['missing_percentage'].values)

    if q_idx is not None:
        df['q_idx'] = q_idx
    else:
        df['q_idx'] = range(nb_qry)

    for k in results:
        df[k] = results[k]

    df['identifier'] = identifier
    df['dataset'] = dataset

    for f in extra_fields:
        df[f] = extra_fields[f]

    return df
def setup_classification():
    train, test = datasets.load_nursery()
    model = MERCS()

    ind_parameters = {
        'ind_type': 'RF',
        'ind_n_estimators': 10,
        'ind_max_depth': 4
    }

    sel_parameters = {'sel_type': 'Base', 'sel_its': 4, 'sel_param': 2}

    model.fit(train, **ind_parameters, **sel_parameters)

    code = [0, 0, 0, 0, 0, 0, 0, 0, 1]

    target_boolean = np.array(code) == encode_attribute(2, [1], [2])
    y_true = test[test.columns.values[target_boolean]].values
    return train, test, code, model, y_true
Esempio n. 3
0
def test_perform_imputation():
    # Prelims
    train, test = ds.load_nursery()
    query_code = np.array([0, -1, -1, -1, -1, -1, 0, 0, 1])

    imputator = Imputer(missing_values='NaN',
                        strategy='most_frequent',
                        axis=0)
    imputator.fit(train)

    # Actual test
    obs = perform_imputation(test, query_code, imputator)

    assert test.shape == obs.shape
    assert isinstance(obs, np.ndarray)

    boolean_missing = encode_attribute(0, [1], [2])

    for row in obs[:, boolean_missing].T:
        assert len(np.unique(row)) == 1
def setup():
    train, test = datasets.load_fertility()
    model = MERCS()

    # Ensure attributes are correctly recognized as nominal/numeric
    train['season'] = pd.factorize(train['season'])[0]
    test['season'] = pd.factorize(test['season'])[0]

    ind_parameters = {'ind_type':           'RF',
                      'ind_n_estimators':   10,
                      'ind_max_depth':      4}

    sel_parameters = {'sel_type':           'Base',
                      'sel_its':            8,
                      'sel_param':          2}

    model.fit(train, **ind_parameters, **sel_parameters)

    code = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1]

    target_boolean = np.array(code) == encode_attribute(2, [1], [2])
    y_true = test[test.columns.values[target_boolean]].values
    return train, test, code, model, y_true, target_boolean