# binary_model = LinearSVC(random_state=0) # binary_model = GaussianNB() # binary_model = BernoulliNB() binary_model = LogisticRegression() # model = HOMER(base_clf=OneVsRestClassifier(binary_model, n_jobs=3), # k=3, # max_iter=20, # # random_state=123456, # # verbose=True, # verbose=False) # model = OneVsRestClassifier(binary_model, n_jobs=-1) models = [# ClassifierChain(binary_model, n_jobs=5, verbose=2), OneVsRestClassifier(binary_model, n_jobs=-1) ] print "%d samples, %d features of training set:" % train_X.shape print label_summary(train_y) for model in models: print "Using model: ", model print "#" * 20 run_experiment(model, train_X, train_y, test_X, test_y) print
# sample subset of all the data rng = np.random.RandomState(0) # SAMPLE_N = None SAMPLE_N = 10000 if SAMPLE_N: print "Sample size: %d" % SAMPLE_N rows = rng.permutation(X.shape[0])[:SAMPLE_N] X = X[rows, :] y = y[rows, :] else: print "Sample size: all data" SAMPLE_N = X.shape[0] # sample train and test train_ratio = 0.9 train_n = int(SAMPLE_N * train_ratio) rows = rng.permutation(SAMPLE_N) train_rows = rows[:train_n] test_rows = rows[train_n:] train_X = X[train_rows, :] train_y = y[train_rows, :] test_X = X[test_rows, :] test_y = y[test_rows, :] from exp_util import run_experiment run_experiment(model, train_X, train_y, test_X, test_y, label_names)