Beispiel #1
0
def plot():
    import matplotlib.pyplot as plt
    import matplotlib as mpl
    mpl.rcParams['font.family'] = 'serif'
    mpl.rcParams['image.interpolation'] = 'none'
    with open('results.pkl') as f:
        transform_parser, transform_vects, train_losses, tests_losses = pickle.load(f)

    RS = RandomState((seed, "plotting"))
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(111)
    omniglot.show_alphabets(omniglot.load_rotated_alphabets(RS, normalize=False, angle=90), ax=ax)
    ax.plot([0, 20 * 28], [5 * 28, 5 * 28], '--k')
    ax.text(-15, 5 * 28 * 3 / 2 - 60, "Rotated alphabets", rotation='vertical')
    plt.savefig("all_alphabets.png")
    # Plotting transformations
    names = ['no_sharing', 'full_sharing', 'learned_sharing']
    title_strings = {'no_sharing'      : 'Independent nets',
                     'full_sharing'    : 'Shared bottom layer',
                     'learned_sharing' : 'Learned sharing'}
    covar_imgs = {name : build_covar_image(transform_vects[name]) for name in names}

    for i, name in enumerate(names):
        fig = plt.figure(0)
        fig.clf()
        fig.set_size_inches((2, 6))
        ax = fig.add_subplot(111)
        ax.matshow(covar_imgs[name], cmap = mpl.cm.binary)
        ax.set_xticks([])
        ax.set_yticks([])
        plt.savefig('learned_corr_{0}.png'.format(i))
        plt.savefig('learned_corr_{0}.pdf'.format(i))
Beispiel #2
0
def plot():
    import matplotlib.pyplot as plt
    import matplotlib as mpl
    mpl.rcParams['font.family'] = 'serif'
    mpl.rcParams['image.interpolation'] = 'none'
    with open('results.pkl') as f:
        transform_parser, transform_vects, train_losses, tests_losses = pickle.load(
            f)

    RS = RandomState((seed, "plotting"))
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(111)
    alphabets = omniglot.load_rotated_alphabets(RS, normalize=False, angle=90)
    num_cols = 15
    num_rows = 5
    omniglot.show_alphabets(alphabets, ax=ax, n_cols=num_cols)
    ax.plot([0, num_cols * 28], [num_rows * 28, num_rows * 28], '--k')
    #ax.text(-15, 5 * 28 * 3 / 2 - 60, "Rotated alphabets", rotation='vertical')
    plt.savefig("all_alphabets.png", bbox_inches='tight')

    # Plotting transformations
    names = ['no_sharing', 'full_sharing', 'learned_sharing']
    title_strings = {
        'no_sharing': 'Independent nets',
        'full_sharing': 'Shared bottom layer',
        'learned_sharing': 'Learned sharing'
    }
    covar_imgs = {
        name: build_covar_image(transform_vects[name])
        for name in names
    }

    for model_ix, model_name in enumerate(names):
        image_list = covar_imgs[model_name]
        for layer_ix, image in enumerate(image_list):
            fig = plt.figure(0)
            fig.clf()
            fig.set_size_inches((1, 1))
            ax = fig.add_subplot(111)
            ax.matshow(image, cmap=mpl.cm.binary, vmin=0.0, vmax=1.0)
            ax.set_xticks([])
            ax.set_yticks([])
            plt.savefig('minifigs/learned_corr_{0}_{1}.png'.format(
                model_name, layer_ix),
                        bbox_inches='tight')
            plt.savefig('minifigs/learned_corr_{0}_{1}.pdf'.format(
                model_name, layer_ix),
                        bbox_inches='tight')

    # Write results to a nice latex table for paper.
    with open('results_table.tex', 'w') as f:
        f.write(" & No Sharing & Full Sharing & Learned \\\\\n")
        f.write("Training loss & {:2.2f} & {:2.2f} & {:2.2f} \\\\\n".format(
            train_losses['no_sharing'], train_losses['full_sharing'],
            train_losses['learned_sharing']))
        f.write("Test loss & {:2.2f} & {:2.2f} & \\bf {:2.2f} ".format(
            tests_losses['no_sharing'], tests_losses['full_sharing'],
            tests_losses['learned_sharing']))
Beispiel #3
0
def plot():
    import matplotlib.pyplot as plt
    import matplotlib as mpl

    mpl.rcParams["font.family"] = "serif"
    mpl.rcParams["image.interpolation"] = "none"
    with open("results.pkl") as f:
        transform_parser, transform_vects, train_losses, tests_losses = pickle.load(f)

    RS = RandomState((seed, "plotting"))
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(111)
    alphabets = omniglot.load_rotated_alphabets(RS, normalize=False, angle=90)
    num_cols = 15
    num_rows = 5
    omniglot.show_alphabets(alphabets, ax=ax, n_cols=num_cols)
    ax.plot([0, num_cols * 28], [num_rows * 28, num_rows * 28], "--k")
    # ax.text(-15, 5 * 28 * 3 / 2 - 60, "Rotated alphabets", rotation='vertical')
    plt.savefig("all_alphabets.png", bbox_inches="tight")

    # Plotting transformations
    names = ["no_sharing", "full_sharing", "learned_sharing"]
    title_strings = {
        "no_sharing": "Independent nets",
        "full_sharing": "Shared bottom layer",
        "learned_sharing": "Learned sharing",
    }
    covar_imgs = {name: build_covar_image(transform_vects[name]) for name in names}

    for model_ix, model_name in enumerate(names):
        image_list = covar_imgs[model_name]
        for layer_ix, image in enumerate(image_list):
            fig = plt.figure(0)
            fig.clf()
            fig.set_size_inches((1, 1))
            ax = fig.add_subplot(111)
            ax.matshow(image, cmap=mpl.cm.binary, vmin=0.0, vmax=1.0)
            ax.set_xticks([])
            ax.set_yticks([])
            plt.savefig("minifigs/learned_corr_{0}_{1}.png".format(model_name, layer_ix), bbox_inches="tight")
            plt.savefig("minifigs/learned_corr_{0}_{1}.pdf".format(model_name, layer_ix), bbox_inches="tight")

    # Write results to a nice latex table for paper.
    with open("results_table.tex", "w") as f:
        f.write(" & No Sharing & Full Sharing & Learned \\\\\n")
        f.write(
            "Training loss & {:2.2f} & {:2.2f} & {:2.2f} \\\\\n".format(
                train_losses["no_sharing"], train_losses["full_sharing"], train_losses["learned_sharing"]
            )
        )
        f.write(
            "Test loss & {:2.2f} & {:2.2f} & \\bf {:2.2f} ".format(
                tests_losses["no_sharing"], tests_losses["full_sharing"], tests_losses["learned_sharing"]
            )
        )
Beispiel #4
0
def run():
    RS = RandomState((seed, "top_rs"))
    all_data = omniglot.load_rotated_alphabets(RS)
    train_data, tests_data = random_partition(all_data, RS, [12, 3])
    w_parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weights = w_parser.vect.size
    script_parser = VectorParser()
    for i_script in range(N_scripts):
        script_parser[i_script] = np.zeros(N_weights)
    transform_parser = make_transform([0] * N_layers)

    def get_layers(vect):
        layers = []
        for i_layer in range(N_layers):
            weights_by_scripts = vect.reshape((N_scripts, N_weights))
            weights_idxs, _ = w_parser.idxs_and_shapes[('weights', i_layer)]
            biases_idxs, _  = w_parser.idxs_and_shapes[('biases',  i_layer)]
            assert weights_idxs.stop == biases_idxs.start
            layer_idxs = slice(weights_idxs.start, biases_idxs.stop)
            layers.append(weights_by_scripts[:, layer_idxs])
        return layers

    def transform_weights(z_vect, transform_vect):
        z_layers = get_layers(z_vect)
        transform = transform_parser.new_vect(transform_vect)
        w_layers = [np.dot(transform[i], z) for i, z in enumerate(z_layers)]
        return np.concatenate(w_layers, axis=1).ravel()

    def likelihood_loss(w_vect, data):
        w = script_parser.new_vect(w_vect)
        return sum([loss_fun(w[i], **script_data) for i, script_data in enumerate(data)])

    def regularization(z_vect):
        return np.dot(z_vect, z_vect) * np.exp(log_L2)

    def train_z(data, transform_vect, RS):
        def primal_loss(z_vect, transform_vect, i_primal, record_results=False):
            w_vect = transform_weights(z_vect, transform_vect)
            loss = likelihood_loss(w_vect, data)
            reg = regularization(z_vect)
            if record_results and i_primal % N_thin == 0:
                print "Iter {0}: train: {1}".format(i_primal, getval(loss) / N_scripts)
            return loss + reg
        z_vect_0 = RS.randn(script_parser.vect.size) * np.exp(log_init_scale)
        return sgd(grad(primal_loss), transform_vect, z_vect_0, alpha, beta, N_iters)

    def train_sharing():
        def hyperloss(transform_vect, i_hyper):
            RS = RandomState((seed, i_hyper, "hyperloss"))
            cur_train_data, cur_valid_data = random_partition(train_data, RS, [10, 2])
            z_vect_final = train_z(cur_train_data, transform_vect, RS)
            w_vect_final = transform_weights(z_vect_final, transform_vect)
            return likelihood_loss(w_vect_final, cur_valid_data) / N_scripts
        hypergrad = grad(hyperloss)
        cur_transform_vect = make_transform([init_script_corr] * N_layers).vect
        for i_hyper in range(N_meta_iter):
            print "Hyper iter {0}".format(i_hyper)
            grad_transform = hypergrad(cur_transform_vect, i_hyper)
            cur_transform_vect = cur_transform_vect - grad_transform * meta_alpha
        return cur_transform_vect

    transform_vects, train_losses, tests_losses = {}, {}, {}
    transform_vects['no_sharing']      = make_transform([0, 0, 0]).vect
    transform_vects['full_sharing']    = make_transform([1, 0, 0]).vect
    transform_vects['learned_sharing'] = train_sharing()
    for name in transform_vects.keys():
        RS = RandomState("final_training")
        tv = transform_vects[name]
        trained_z = train_z(train_data, tv, RS)
        trained_w = transform_weights(trained_z, tv)
        train_losses[name] = likelihood_loss(trained_w, train_data) / N_scripts
        tests_losses[name] = likelihood_loss(trained_w, tests_data) / N_scripts
        print "{0} : train: {1}, test: {2}".format(name, train_losses[name], tests_losses[name])
    return transform_parser, transform_vects, train_losses, tests_losses
Beispiel #5
0
def run():
    """Three different parsers:
    w_parser[('biases', i_layer)] : neural net weights/biases per layer for a single  script
    script_parser[i_script]       : weights vector for each script
    transform_parser[i_layer]     : transform matrix (scripts x scripts) for each alphabet"""
    RS = RandomState((seed, "top_rs"))
    train_data, valid_data, tests_data = omniglot.load_rotated_alphabets(
        [11, 2, 2], RS)
    w_parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weights = w_parser.vect.size
    transform_parser = make_transform(N_scripts, script_corr_init)
    script_parser = VectorParser()
    for i_script in range(N_scripts):
        script_parser[i_script] = np.zeros(N_weights)

    def get_layers(vect):
        layers = []
        for i_layer in range(N_layers):
            weights_by_scripts = vect.reshape((N_scripts, N_weights))
            weights_idxs, _ = w_parser.idxs_and_shapes[('weights', i_layer)]
            biases_idxs, _ = w_parser.idxs_and_shapes[('biases', i_layer)]
            assert weights_idxs.stop == biases_idxs.start
            layer_idxs = slice(weights_idxs.start, biases_idxs.stop)
            layers.append(weights_by_scripts[:, layer_idxs])
        return layers

    def transform_weights(z_vect, transform_vect):
        z_layers = get_layers(z_vect)
        transform = transform_parser.new_vect(transform_vect)
        w_layers = [np.dot(transform[i], z) for i, z in enumerate(z_layers)]
        return np.concatenate(w_layers, axis=1).ravel()

    def total_loss(w_vect, data):
        w = script_parser.new_vect(w_vect)
        return sum([
            loss_fun(w[i], **script_data) for i, script_data in enumerate(data)
        ])

    def regularization(z_vect):
        return np.dot(z_vect, z_vect) * np.exp(log_L2_init)

    results = defaultdict(list)

    def hyperloss(transform_vect, i_hyper, record_results=True):
        RS = RandomState((seed, i_hyper, "hyperloss"))

        def primal_loss(z_vect,
                        transform_vect,
                        i_primal,
                        record_results=False):
            w_vect = transform_weights(z_vect, transform_vect)
            loss = total_loss(w_vect, train_data)
            reg = regularization(z_vect)
            if VERBOSE and record_results and i_primal % N_thin == 0:
                print "Iter {0}: train: {1}, valid: {2}, reg: {3}".format(
                    i_primal,
                    getval(loss) / N_scripts,
                    total_loss(getval(w_vect), valid_data) / N_scripts,
                    getval(reg))
            return loss + reg

        z_vect_0 = RS.randn(
            script_parser.vect.size) * np.exp(log_initialization_scale)
        z_vect_final = sgd(grad(primal_loss),
                           transform_vect,
                           z_vect_0,
                           alpha,
                           beta,
                           N_iters,
                           callback=None)
        w_vect_final = transform_weights(z_vect_final, transform_vect)
        valid_loss = total_loss(w_vect_final, valid_data)
        if record_results:
            results['valid_loss'].append(getval(valid_loss) / N_scripts)
            results['train_loss'].append(
                total_loss(getval(w_vect_final), train_data) / N_scripts)
            results['tests_loss'].append(
                total_loss(getval(w_vect_final), tests_data) / N_scripts)
        return valid_loss

    all_transforms = []
    hypergrad = grad(hyperloss)
    for i_hyper in range(N_meta_iter):
        grad_transform = hypergrad(transform_parser.vect,
                                   i_hyper,
                                   record_results=True)
        transform_parser.vect = transform_parser.vect - grad_transform * meta_alpha
        all_transforms.append(transform_parser.as_dict())
        print "Hyper iter {0}".format(i_hyper)
        print "Results", {k: v[-1] for k, v in results.iteritems()}

    return results, all_transforms
Beispiel #6
0
def run():
    """Three different parsers:
    w_parser[('biases', i_layer)] : neural net weights/biases per layer for a single  script
    script_parser[i_script]       : weights vector for each script
    transform_parser[i_layer]     : transform matrix (scripts x scripts) for each alphabet"""
    RS = RandomState((seed, "top_rs"))
    train_data, valid_data, tests_data = omniglot.load_rotated_alphabets([11, 2, 2], RS)
    w_parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weights = w_parser.vect.size
    transform_parser = make_transform(N_scripts, script_corr_init)
    script_parser = VectorParser()
    for i_script in range(N_scripts):
        script_parser[i_script] = np.zeros(N_weights)

    def get_layers(vect):
        layers = []
        for i_layer in range(N_layers):
            weights_by_scripts = vect.reshape((N_scripts, N_weights))
            weights_idxs, _ = w_parser.idxs_and_shapes[('weights', i_layer)]
            biases_idxs, _  = w_parser.idxs_and_shapes[('biases',  i_layer)]
            assert weights_idxs.stop == biases_idxs.start
            layer_idxs = slice(weights_idxs.start, biases_idxs.stop)
            layers.append(weights_by_scripts[:, layer_idxs])
        return layers

    def transform_weights(z_vect, transform_vect):
        z_layers = get_layers(z_vect)
        transform = transform_parser.new_vect(transform_vect)
        w_layers = [np.dot(transform[i], z) for i, z in enumerate(z_layers)]
        return np.concatenate(w_layers, axis=1).ravel()

    def total_loss(w_vect, data):
        w = script_parser.new_vect(w_vect)
        return sum([loss_fun(w[i], **script_data) for i, script_data in enumerate(data)])

    def regularization(z_vect):
        return np.dot(z_vect, z_vect) * np.exp(log_L2_init)

    results = defaultdict(list)
    def hyperloss(transform_vect, i_hyper, record_results=True):
        RS = RandomState((seed, i_hyper, "hyperloss"))
        def primal_loss(z_vect, transform_vect, i_primal, record_results=False):
            w_vect = transform_weights(z_vect, transform_vect)
            loss = total_loss(w_vect, train_data)
            reg = regularization(z_vect)
            if VERBOSE and record_results and i_primal % N_thin == 0:
                print "Iter {0}: train: {1}, valid: {2}, reg: {3}".format(
                    i_primal,
                    getval(loss) / N_scripts,
                    total_loss(getval(w_vect), valid_data) / N_scripts,
                    getval(reg))
            return loss + reg

        z_vect_0 = RS.randn(script_parser.vect.size) * np.exp(log_initialization_scale)
        z_vect_final = sgd(grad(primal_loss), transform_vect, z_vect_0,
                           alpha, beta, N_iters, callback=None)
        w_vect_final = transform_weights(z_vect_final, transform_vect)
        valid_loss = total_loss(w_vect_final, valid_data)
        if record_results:
            results['valid_loss'].append(getval(valid_loss) / N_scripts) 
            results['train_loss'].append(total_loss(getval(w_vect_final), train_data) / N_scripts)
            results['tests_loss'].append(total_loss(getval(w_vect_final), tests_data) / N_scripts)
        return valid_loss

    all_transforms = []
    hypergrad = grad(hyperloss)
    for i_hyper in range(N_meta_iter):
        grad_transform = hypergrad(transform_parser.vect, i_hyper, record_results=True)
        transform_parser.vect = transform_parser.vect - grad_transform * meta_alpha
        all_transforms.append(transform_parser.as_dict())
        print "Hyper iter {0}".format(i_hyper)
        print "Results", {k : v[-1] for k, v in results.iteritems()}

    return results, all_transforms