all_B_over_U = data['all_B_over_U'] print 'Done' N = 500 random_hyperparams = { 'count__ngram_range': [(1, n) for n in npr.randint(1, 4, N)], 'count__max_df': npr.uniform(.75, 1.0, N), 'tfidf__norm': npr.choice(['l1', 'l2', None], N), 'tfidf__use_idf': npr.choice([True, False], N), 'svc__base_estimator__C': npr.choice(np.logspace(-3, 4, 10000), N) } score_kwds = { 'xs': train_data['text'], 'ys': train_data['is_multiple'], 'bs': train_data['is_biased'], 'all_B_over_U': all_B_over_U, 'fit_weight_kwd': 'svc__sample_weight', 'n_cv_splits': 5, 'random_seed': random_seed } print 'Starting Experiments...' t0 = time() experiments = random_search(model, random_hyperparams, 'best_svm_mult_silver.pkl', **score_kwds) print 'Done {}:{}:{} seconds. Writing out experiments'.format(*hms(time() - t0)) joblib.dump(experiments, 'svm_mult_silver_dev.pkl') print 'All done'
print 'Done' N = 500 random_hyperparams = { 'count__ngram_range': [(1, n) for n in npr.randint(1, 4, N)], 'count__max_df': npr.uniform(.75, 1.0, N), 'tfidf__norm': npr.choice(['l1', 'l2', None], N), 'tfidf__use_idf': npr.choice([True, False], N), 'rf__n_estimators': npr.choice(range(10, 200), N), 'rf__max_features': npr.choice(['log2', 'sqrt'], N) } score_kwds = { 'xs': train_data['text'], 'ys': train_data['is_multiple'], 'bs': train_data['is_biased'], 'all_B_over_U': all_B_over_U, 'fit_weight_kwd': 'rf__sample_weight', 'n_cv_splits': 5, 'random_seed': random_seed } print 'Starting Experiments...' t0 = time() experiments = random_search(model, random_hyperparams, 'best_rf_mult_gold.pkl', **score_kwds) print 'Done {}:{}:{} seconds. Writing out experiments'.format(*hms(time() - t0)) joblib.dump(experiments, 'rf_mult_gold_dev.pkl') print 'All done'
all_B_over_U = data['all_B_over_U'] print 'Done' N = 500 random_hyperparams = { 'count__ngram_range': [(1, n) for n in npr.randint(1, 4, N)], 'count__max_df': npr.uniform(.75, 1.0, N), 'tfidf__norm': npr.choice(['l1', 'l2', None], N), 'tfidf__use_idf': npr.choice([True, False], N), 'svc__base_estimator__C': npr.choice(np.logspace(-3, 4, 10000), N) } score_kwds = { 'xs': train_data['text'], 'ys': train_data['is_foodborne'], 'bs': train_data['is_biased'], 'all_B_over_U': all_B_over_U, 'fit_weight_kwd': 'svc__sample_weight', 'n_cv_splits': 5, 'random_seed': random_seed } print 'Starting Experiments...' t0 = time() experiments = random_search(model, random_hyperparams, 'best_svm_sick_biased.pkl', **score_kwds) print 'Done {}:{}:{} seconds. Writing out experiments'.format(*hms(time() - t0)) joblib.dump(experiments, 'svm_sick_biased_dev.pkl') print 'All done'
train_data = data['train_data'] all_B_over_U = data['all_B_over_U'] print 'Done' N = 500 random_hyperparams = { 'count__ngram_range':[(1,n) for n in npr.randint(1,4, N)], 'count__max_df':npr.uniform(.75, 1.0, N), 'tfidf__norm':npr.choice(['l1', 'l2', None], N), 'tfidf__use_idf':npr.choice([True, False], N), 'logreg__C':npr.choice(np.logspace(-3,4, 10000), N), 'logreg__penalty':npr.choice(['l1', 'l2'], N) } score_kwds = { 'xs':train_data['text'], 'ys':train_data['is_foodborne'], 'bs':train_data['is_biased'], 'all_B_over_U':all_B_over_U, 'fit_weight_kwd':'logreg__sample_weight', 'n_cv_splits':5, 'random_seed':random_seed } print 'Starting Experiments...' t0 = time() experiments = random_search(model, random_hyperparams, 'best_lr_sick_gold.pkl', **score_kwds) print 'Done {}:{}:{} seconds. Writing out experiments'.format(*hms(time()-t0)) joblib.dump(experiments, 'lr_sick_gold_dev.pkl') print 'All done'