all_B_over_U = data['all_B_over_U']
print 'Done'

N = 500
random_hyperparams = {
    'count__ngram_range': [(1, n) for n in npr.randint(1, 4, N)],
    'count__max_df': npr.uniform(.75, 1.0, N),
    'tfidf__norm': npr.choice(['l1', 'l2', None], N),
    'tfidf__use_idf': npr.choice([True, False], N),
    'svc__base_estimator__C': npr.choice(np.logspace(-3, 4, 10000), N)
}

score_kwds = {
    'xs': train_data['text'],
    'ys': train_data['is_multiple'],
    'bs': train_data['is_biased'],
    'all_B_over_U': all_B_over_U,
    'fit_weight_kwd': 'svc__sample_weight',
    'n_cv_splits': 5,
    'random_seed': random_seed
}

print 'Starting Experiments...'
t0 = time()
experiments = random_search(model, random_hyperparams,
                            'best_svm_mult_silver.pkl', **score_kwds)
print 'Done {}:{}:{} seconds. Writing out experiments'.format(*hms(time() -
                                                                   t0))
joblib.dump(experiments, 'svm_mult_silver_dev.pkl')
print 'All done'
Ejemplo n.º 2
0
    test_summaries_writer.add_summary(
        u.explicit_summaries({"xent": test_loss}), step)
    test_summaries_writer.add_summary(u.explicit_summaries(tag_values), step)
    for idx, img in enumerate(stats['debug_imgs']):
        debug_img_summary = u.pil_image_to_tf_summary(img,
                                                      tag="debug_img_%d" % idx)
        test_summaries_writer.add_summary(debug_img_summary, step)
    test_summaries_writer.flush()

    # report one liner
    log = []
    log.append("step %d/%d" % (step, opts.steps))
    log.append("time %d" % int(time.time() - start_time))
    log.append("train_loss %f" % train_loss)
    log.append("test_loss %s" % test_loss)
    log.append("test stats { p:%0.2f, r:%0.2f, f1:%0.2f }" %
               tuple([stats[k] for k in ['precision', 'recall', 'f1']]))
    print("\t".join(log))

    # check if done by steps or time
    step += 1  # TODO: fetch global_step from keras model (?)
    if step >= opts.steps:
        done = True
    if opts.secs is not None:
        run_time = time.time() - start_time
        remaining_time = opts.secs - run_time
        print("run_time %s remaining_time %s" %
              (u.hms(run_time), u.hms(remaining_time)))
        if remaining_time < 0:
            done = True