Beispiel #1
0
# set booster parameters
simdir = sys.argv[1]
simnum = int(sys.argv[2])
num_rounds = int(sys.argv[3])
cv_params = {
    "eval_metric": "mlogloss",
    "objective": "multi:softprob",
    "silent": 1,
    "num_class": 9,
    "nthread": 16,
    "eta": sys.argv[4],
    "gamma": sys.argv[5],
    "max_depth": sys.argv[6],
    "min_child_weight": sys.argv[7],
    "colsample_bytree": sys.argv[8],
    "subsample": sys.argv[9],
    "early_stopping_rounds": 10,
}


print cv_params

mkdir_p(simdir)
paramfile = simdir + "/param_%d.txt" % simnum
with open(paramfile, "w") as fp:
    json.dump(cv_params, fp)

logfile = simdir + "/log_%d.txt" % simnum
lls = xu.xgb_cv(cv_params, dtrain, num_rounds, nfold=3)
np.savetxt(logfile, lls)
Beispiel #2
0
cv_params = {}
cv_params['simdir'] = sys.argv[1]
cv_params['simnum'] = int(sys.argv[2])
cv_params['kernel'] = sys.argv[3]
cv_params['C'] = float(sys.argv[4])
cv_params['gamma'] = float(sys.argv[5])


clf_svc = svm.SVC(probability=True, verbose=False,
                  C=cv_params['C'], kernel=cv_params['kernel'],
                  gamma=cv_params['gamma'], cache_size=1000)

clf = make_pipeline(preprocessing.StandardScaler(), clf_svc)

mkdir_p(cv_params['simdir'])

scores = cross_validation.cross_val_score(clf, train, labels,
                                          cv=n_cvs, n_jobs=-1,
                                          scoring=logloss_mc,
                                          verbose=1)
print "scores: ", scores

logfile = cv_params['simdir'] + '/%d.txt' % cv_params['simnum']
with open(logfile, 'w') as fscores:
    fscores.write('%f %f\n' % (scores.mean(), scores.std()))

pfile = logfile.replace('.txt', 'p.txt')
with open(pfile, 'w') as f:
    json.dump(cv_params, f)
Beispiel #3
0
    sys.path.append(pardir)


import xgb_utils as xu
import otto_utils as ou

if len(sys.argv)<2:
    sys.exit(1)

ncv = int(sys.argv[1])

if ncv not in [0, 1, 2, 3, 4]:
    sys.exit(1)

simdir = 'kfcv'
ou.mkdir_p(simdir)

simdir = simdir + '/' + str(ncv)
ou.mkdir_p(simdir)


num_rounds = 2000
params = """{"eval_metric": "mlogloss", "early_stopping_rounds": 10, "colsample_bytree": "0.5", "num_class": 9, "silent": 1, "nthread": 16, "min_child_weight": "4", "subsample": "0.8", "eta": "0.0125","objective": "multi:softprob", "max_depth": "14", "gamma": "0.025"}"""

params = json.loads(params)


# files
train_csv = '../data/train.csv'
train_buf = 'data/train.buffer'