コード例 #1
0
def selection(fold, seedval, path, agg):
    seed(seedval)
    initial_ensemble_size = 2
    max_ensemble_size = 50
    max_candidates = 50
    max_diversity_candidates = 5
    accuracy_weight = 0.5
    max_clusters = 20
    train_df, train_labels, test_df, test_labels = common.read_fold(path, fold)
    train_df = common.unbag(train_df, agg)
    test_df = common.unbag(test_df, agg)
    best_classifiers = train_df.apply(lambda x: common.fmax_score(
        train_labels, x)).sort_values(ascending=not common.greater_is_better)
    train_performance = []
    test_performance = []
    ensemble = []
    for i in range(min(max_ensemble_size, len(best_classifiers))):
        best_candidate = select_candidate_enhanced(train_df, train_labels,
                                                   best_classifiers, ensemble,
                                                   i)
        ensemble.append(best_candidate)
        train_performance.append(
            get_performance(train_df, ensemble, fold, seedval))
        test_performance.append(
            get_performance(test_df, ensemble, fold, seedval))
    train_performance_df = pd.DataFrame.from_records(train_performance)
    best_ensemble_size = common.get_best_performer(
        train_performance_df).ensemble_size.values
    best_ensemble = train_performance_df.ensemble[:best_ensemble_size.item(0) +
                                                  1]
    return get_predictions(
        test_df, best_ensemble, fold,
        seedval), pd.DataFrame.from_records(test_performance)
コード例 #2
0
ファイル: stacking.py プロジェクト: shwhalen/datasink
def stacked_generalization(fold):
    train_df, train_labels, test_df, test_labels = common.read_fold(path, fold)
    if method == 'aggregate':
        train_df = common.unbag(train_df, bag_count)
        test_df = common.unbag(test_df, bag_count)
    test_predictions = stacker.fit(train_df, train_labels).predict_proba(test_df)[:, 1]
    return DataFrame({'fold': fold, 'id': test_df.index.get_level_values('id'), 'label': test_labels, 'prediction': test_predictions, 'diversity': common.diversity_score(test_df.values)})
コード例 #3
0
ファイル: stacking.py プロジェクト: Web5design/datasink
def stacked_generalization(fold):
    seed(seedval)
    train_df, train_labels, test_df, test_labels = read_fold(path, fold)
    if method == 'aggregate':
        train_df = unbag(train_df, bag_count)
        test_df = unbag(test_df, bag_count)
    predictions = stacker.fit(train_df, train_labels).predict_proba(test_df)[:, 1]
    return eval_metrics(test_df, test_labels, predictions, [[fold], [seedval]])
コード例 #4
0
def stacked_generalization(fold):
    seed(seedval)
    train_df, train_labels, test_df, test_labels = read_fold(path, fold)
    if method == 'aggregate':
        train_df = unbag(train_df, bag_count)
        test_df = unbag(test_df, bag_count)
    predictions = stacker.fit(train_df, train_labels).predict_proba(test_df)[:,
                                                                             1]
    return eval_metrics(test_df, test_labels, predictions, [[fold], [seedval]])
コード例 #5
0
ファイル: ensemble.py プロジェクト: linhuawang/LargeGOPred
def stacked_generalization(path,stacker_name,stacker,fold,agg):
    train_df, train_labels, test_df, test_labels = common.read_fold(path, fold)
    train_df = common.unbag(train_df,agg)
    test_df = common.unbag(test_df,agg)
    try:
        test_predictions = stacker.fit(train_df, train_labels).predict_proba(test_df)[:, 1]
    except:
        test_predictions = stacker.fit(train_df,train_labels).predict(test_df)[:,1]
    df = pd.DataFrame({'fold': fold, 'id': test_df.index.get_level_values('id'), 'label': test_labels, 'prediction': test_predictions, 'diversity': common.diversity_score(test_df.values)})
    return df
コード例 #6
0
ファイル: ensemble.py プロジェクト: linhuawang/LargeGOPred
def bestbase_fmax(path,fold_count=5,agg=1):
    assert exists(path)
    if not exists('%s/analysis' % path):
        mkdir('%s/analysis' % path)
    predictions = []
    labels = []
    for fold in range(fold_count):
        _,_,test_df,label = common.read_fold(path,fold)
        test_df = common.unbag(test_df, agg)
        predictions.append(test_df)
        labels = append(labels,label)
    predictions = pd.concat(predictions)
    fmax_list = [common.fmax_score(labels,predictions.iloc[:,i]) for i in range(len(predictions.columns))]
    return max(fmax_list)
コード例 #7
0
ファイル: ensemble.py プロジェクト: linhuawang/LargeGOPred
def mean_fmax(path,fold_count=5,agg=1):
    assert exists(path)
    if not exists('%s/analysis' % path):
        mkdir('%s/analysis' % path)
    predictions = []
    labels = []
    for fold in range(fold_count):
        _,_,test_df,label = common.read_fold(path,fold)
        test_df = common.unbag(test_df, agg)
        predict = test_df.mean(axis=1).values
        predictions = append(predictions,predict)
        labels = append(labels,label)
    fmax = '%.3f' %(common.fmax_score(labels,predictions))
    return float(fmax)
コード例 #8
0
def mean_fmax(path):
    assert exists(path)
    if not exists('%s/analysis' % path):
        mkdir('%s/analysis' % path)
    p = common.load_properties(path)
    fold_count = int(p['foldCount'])
    predictions = []
    labels = []
    for fold in range(fold_count):
        _, _, test_df, label = common.read_fold(path, fold)
        test_df = common.unbag(test_df, 10)
        predict = test_df.mean(axis=1).values
        predictions += predict
        labels += label
    fmax = '%.3f' % (common.fmax_score(labels, predictions))
    return fmax
コード例 #9
0
def bestbase_fmax(path):
    assert exists(path)
    if not exists('%s/analysis' % path):
        mkdir('%s/analysis' % path)
    p = common.load_properties(path)
    fold_count = int(p['foldCount'])
    predictions = []
    labels = []
    for fold in range(fold_count):
        _, _, test_df, label = common.read_fold(path, fold)
        test_df = common.unbag(test_df, 10)
        predictions.append(test_df)
        labels += label
    predictions = concat(predictions)
    fmax_list = [
        common.fmax_core(labels, predictions[col].tolist())
        for col in list(predictions)
    ]
    return max(fmax_list)