Ejemplo n.º 1
0
            MINDIFF, PERSISTENCE)
    print 'Using spambase data folded {} ways. Testing on fold {}.'.format(
        opt['folds'], opt['testfold'])

    # stopping conditions
    if opt['rounds'] is None:

        def doround(err, rndct):
            return checkconverged(err)
    else:

        def doround(err, rndct):
            return countdown(rndct)

    # load from file
    spambase.load()

    # roll into folds
    folds = [[] for i in xrange(opt['folds'])]
    k = 0  # kurrent fold
    for dp in spambase.data:
        # change the 0,1 labels to -1,1
        dp.label = 1 if dp.label else -1
        # add to the current fold & switch to the next fold
        folds[k].append(dp)
        k = (k + 1) % opt['folds']

    # unroll to testing & training
    testing = folds.pop(opt['testfold'])
    training = reduce(lambda acc, cur: acc + cur, folds)
    del folds
Ejemplo n.º 2
0
def main():
    spambase.load('spambase.data')
    sc = StumpCollection(spambase.trainset)
Ejemplo n.º 3
0
    auc = resultset.auc(roc)
    with open('{}-{}_lambda={}_auc={}'.format(regression.__name__, gdname,
                                       suffix, auc).lower(),
              mode='wb') as fd:
        for fpr, tpr in roc:
            fd.write('{}, {}\n'.format(fpr, tpr))


###############################################################################


if __name__ == '__main__':
    print

    # load from file
    spambase.load()
    d = spambase.data

    # zscore feature values
    def pre(lon):
        mu = stats.mean(lon)
        sd = stats.stddev(stats.mvue(lon, mu))
        return stats.zscore(lon, mu, sd)
    dataset.applykernel(d, pre)

    # insert phantom features
    for dp in d:
        dp.features = [1.0] + list(dp.features)

    # roll into folds
    folds = [[] for i in xrange(FOLDCOUNT)]
Ejemplo n.º 4
0
def main():
    spambase.load('spambase.data')
    sc = StumpCollection(spambase.trainset)