def load_x(ds, preset):
    feature_parts = [Dataset.load_part(ds, part) for part in preset.get('features', [])]
    prediction_parts = [load_prediction(ds, p, mode=preset.get('predictions_mode', 'fulltrain')) for p in preset.get('predictions', [])]
    prediction_parts = [p.clip(lower=0.1).values.reshape((p.shape[0], 1)) for p in prediction_parts]

    if 'prediction_transform' in preset:
        prediction_parts = map(preset['prediction_transform'], prediction_parts)

    return hstack(feature_parts + prediction_parts)
Exemple #2
0
def main():
    p = argparse.ArgumentParser(description='Display a kernel.')
    p.add_argument('-out', help='output to *.png file instead of viewing')
    p.add_argument('k', nargs='*', help='path to kernel(s)')
    args = p.parse_args()

    out = None

    for fn in args.k:
        print('Loading', fn)
        step, kernel = util.load_kernel(fn)
        print('  Step', step)
        print('  Kernel shape is', kernel.shape)
        print('  Min', np.min(kernel))
        print('  Max', np.max(kernel))
        print('  Mean', np.mean(kernel))
        print('  Sum', np.sum(kernel))
        print('  Sum of abs', np.sum(np.abs(kernel)))
        print('  RMS', np.sqrt(np.mean(kernel * kernel)))

        render = util.vis_hwoi(kernel, doubles=2)
        render = util.hstack([render, util.make_label(fn)], 5)

        if out is None:
            out = render
        else:
            out = util.vstack([out, render], 5)

    out = util.border(out, 5)

    if args.out is not None:
        util.save_image(args.out, out)
        print('Written to', args.out)
    else:
        print('Press ESC to close window.')

        def render_fn():
            return out

        util.viewer(None, render_fn)
Exemple #3
0
 def render():
     """Returns an image showing the current weights and output."""
     # TODO: vertically align labels.
     t0 = time.time()
     rout, rdiff, rw = sess.run([actual_img, diff, w1])
     render_out = util.vstack([
         util.hstack([
             util.vstack([util.cache_label('input:'), vimg1], 5),
             util.vstack([
                 util.cache_label('actual:'),
                 util.vis_nhwc(rout, doubles=0, gamma=args.gamma)
             ], 5),
             util.vstack([util.cache_label('expected:'), vimg2], 5),
         ], 5),
         util.cache_label('difference:'),
         util.vis_nhwc(rdiff, doubles=0),
         util.cache_label('kernel:'),
         util.vis_hwoi(rw, doubles=2),
     ], 5)
     render_out = util.border(render_out, 5)
     t1 = time.time()
     render_time[0] += t1 - t0
     return render_out
    opt_eval_x = train_x[opt_eval_idx]
    opt_eval_y = train_y[opt_eval_idx]
    opt_eval_r = train_r.slice(opt_eval_idx)

    if len(feature_builders) > 0:  # TODO: Move inside of bagging loop
        print("    Building per-fold features...")

        opt_train_x = [opt_train_x]
        opt_eval_x = [opt_eval_x]

        for fb in feature_builders:
            opt_train_x.append(fb.fit_transform(opt_train_r))
            opt_eval_x.append(fb.transform(opt_eval_r))

        opt_train_x = hstack(opt_train_x)
        opt_eval_x = hstack(opt_eval_x)
    param_grid = preset.get('param_grid', [])

    if len(param_grid) > 0:
        if preset['opt_method'] != "grid_search":
            preset['model'].optimize(
                opt_train_x,
                y_transform(opt_train_y),
                opt_eval_x,
                y_transform(opt_eval_y),
                param_grid,
                eval_func=lambda yt, yp: accuracy_score(
                    y_inv_transform(yt), y_inv_transform(yp)))
        else:
            preset['model'].grid_search(
n_components = 500  # 500 components explain 99.8% of variance

print "Loading data..."

train_num = Dataset.load_part('train', 'numeric')
train_cat = Dataset.load_part('train', 'categorical_dummy')

test_num = Dataset.load_part('test', 'numeric')
test_cat = Dataset.load_part('test', 'categorical_dummy')

train_cnt = train_num.shape[0]

print "Combining data..."

all_data = hstack((scale(vstack(
    (train_num, test_num)).astype(np.float64)).astype(np.float32),
                   vstack((train_cat, test_cat))))

del train_num, train_cat, test_num, test_cat

print "Fitting svd..."

svd = TruncatedSVD(n_components)
res = svd.fit_transform(all_data)

print "Explained variance ratio: %.5f" % np.sum(svd.explained_variance_ratio_)

print "Saving..."

Dataset.save_part_features('svd', ['svd%d' % i for i in xrange(n_components)])
Dataset(svd=res[:train_cnt]).save('train')