Beispiel #1
0
def plot():

    import matplotlib.pyplot as plt
    from matplotlib import rc
    rc('font',**{'family':'serif'})

    with open('results.pkl') as f:
        parser, parsed_avg_hypergrad = pickle.load(f)

    #rc('text', usetex=True)

   # ----- Small versions of stepsize schedules for paper -----
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(111)
    def layer_name(weight_key):
        return "Layer {num}".format(num=weight_key[1] + 1)
    for cur_results, name in zip(parsed_avg_hypergrad['log_alphas'].T, parser.names):
        if name[0] == 'weights':
            ax.plot(cur_results, 'o-', label=layer_name(name))
    low, high = ax.get_ylim()
    #ax.set_ylim([0, high])
    ax.set_ylabel('Learning rate radient')
    ax.set_xlabel('Schedule index')
    ax.set_yticks([0,])
    ax.set_yticklabels(['0',])
    fig.set_size_inches((6,2.5))
    #ax.legend(numpoints=1, loc=1, frameon=False, prop={'size':'12'})
    plt.savefig('schedules_small.pdf', pad_inches=0.05, bbox_inches='tight')


    # ----- Alpha and beta initial hypergradients -----
    print "Plotting initial gradients..."
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(411)
    for cur_results, name in zip(parsed_avg_hypergrad['log_alphas'].T, parser.names):
        if name[0] == 'weights':
            ax.plot(cur_results, 'o-', label=nice_layer_name(name))
    ax.set_ylabel('Step size Gradient', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5),
              prop={'family':'serif', 'size':'12'})

    ax = fig.add_subplot(412)
    for cur_results, name in zip(parsed_avg_hypergrad['invlogit_betas'].T, parser.names):
        if name[0] == 'weights':
            ax.plot(cur_results, 'o-', label=nice_layer_name(name))
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum Gradient', fontproperties='serif')

    ax = fig.add_subplot(413)
    for cur_results, name in zip(parsed_avg_hypergrad['log_alphas'].T, parser.names):
        if name[0] == 'biases':
            ax.plot(cur_results, 'o-', label=nice_layer_name(name))
    ax.set_ylabel('Step size Gradient', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5),
              prop={'family':'serif', 'size':'12'})

    ax = fig.add_subplot(414)
    for cur_results, name in zip(parsed_avg_hypergrad['invlogit_betas'].T, parser.names):
        if name[0] == 'biases':
            ax.plot(cur_results, 'o-', label=nice_layer_name(name))
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum Gradient', fontproperties='serif')

    fig.set_size_inches((6,8))
    #plt.show()
    plt.savefig('initial_gradient.png')
    plt.savefig('initial_gradient.pdf', pad_inches=0.05, bbox_inches='tight')
Beispiel #2
0
def plot():

    import matplotlib.pyplot as plt
    from matplotlib import rc
    rc('font',**{'family':'serif'})

    with open('results.pkl') as f:
        results, parser, parsed_init_hypergrad = pickle.load(f)

    #rc('text', usetex=True)

   # ----- Small versions of stepsize schedules for paper -----
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(111)
    def layer_name(weight_key):
        return "Layer {num}".format(num=weight_key[1] + 1)
    for cur_results, name in zip(results['log_alphas'][-1].T, parser.names):
        if name[0] == 'weights':
            ax.plot(np.exp(cur_results), 'o-', label=layer_name(name))
    low, high = ax.get_ylim()
    ax.set_ylim([0, high])
    ax.set_ylabel('Learning rate')
    ax.set_xlabel('Schedule index')
    fig.set_size_inches((6,2.5))
    ax.legend(numpoints=1, loc=1, frameon=False, prop={'size':'12'})
    plt.savefig('schedules_small.pdf', pad_inches=0.05, bbox_inches='tight')


    # ----- Alpha and beta initial hypergradients -----
    print "Plotting initial gradients..."
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(411)
    for cur_results, name in zip(parsed_init_hypergrad['log_alphas'].T, parser.names):
        if name[0] == 'weights':
            ax.plot(cur_results, 'o-', label=nice_layer_name(name))
    ax.set_ylabel('Step size Gradient', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5),
              prop={'family':'serif', 'size':'12'})

    ax = fig.add_subplot(412)
    for cur_results, name in zip(parsed_init_hypergrad['invlogit_betas'].T, parser.names):
        if name[0] == 'weights':
            ax.plot(cur_results, 'o-', label=nice_layer_name(name))
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum Gradient', fontproperties='serif')

    ax = fig.add_subplot(413)
    for cur_results, name in zip(parsed_init_hypergrad['log_alphas'].T, parser.names):
        if name[0] == 'biases':
            ax.plot(cur_results, 'o-', label=nice_layer_name(name))
    ax.set_ylabel('Step size Gradient', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5),
              prop={'family':'serif', 'size':'12'})

    ax = fig.add_subplot(414)
    for cur_results, name in zip(parsed_init_hypergrad['invlogit_betas'].T, parser.names):
        if name[0] == 'biases':
            ax.plot(cur_results, 'o-', label=nice_layer_name(name))
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum Gradient', fontproperties='serif')

    fig.set_size_inches((6,8))
    #plt.show()
    plt.savefig('initial_gradient.png')
    plt.savefig('initial_gradient.pdf', pad_inches=0.05, bbox_inches='tight')


    # ----- Nice versions of Alpha and beta schedules for paper -----
    print "Plotting full alpha and beta schedules curves..."
    fig = plt.figure(0)
    fig.clf()
    ax = fig.add_subplot(411)
    for cur_results, name in zip(results['log_alphas'][-1].T, parser.names):
        if name[0] == 'weights':
            ax.plot(np.exp(cur_results), 'o-', label=nice_layer_name(name))
    low, high = ax.get_ylim()
    ax.set_ylim([0, high])
    ax.set_ylabel('Step size', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5),
              prop={'family':'serif', 'size':'12'})

    ax = fig.add_subplot(412)
    for cur_results, name in zip(results['invlogit_betas'][-1].T, parser.names):
        if name[0] == 'weights':
            ax.plot(logit(cur_results), 'o-', label=nice_layer_name(name))
    ax.set_ylim([0, 1])
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum', fontproperties='serif')

    ax = fig.add_subplot(413)
    for cur_results, name in zip(results['log_alphas'][-1].T, parser.names):
        if name[0] == 'biases':
            ax.plot(np.exp(cur_results), 'o-', label=nice_layer_name(name))
    low, high = ax.get_ylim()
    ax.set_ylim([0, high])
    ax.set_ylabel('Step size', fontproperties='serif')
    ax.set_xticklabels([])
    ax.legend(numpoints=1, loc=1, frameon=False, bbox_to_anchor=(1.0, 0.5),
              prop={'family':'serif', 'size':'12'})

    ax = fig.add_subplot(414)
    for cur_results, name in zip(results['invlogit_betas'][-1].T, parser.names):
        if name[0] == 'biases':
            ax.plot(logit(cur_results), 'o-', label=nice_layer_name(name))
    ax.set_ylim([0, 1])
    ax.set_xlabel('Learning Iteration', fontproperties='serif')
    ax.set_ylabel('Momentum', fontproperties='serif')
    fig.set_size_inches((6,8))
    #plt.show()
    plt.savefig('alpha_beta_paper.png')
    plt.savefig('alpha_beta_paper.pdf', pad_inches=0.05, bbox_inches='tight')



    print "Plotting learning curves..."
    fig.clf()
    fig.set_size_inches((6,8))
    # ----- Primal learning curves -----
    ax = fig.add_subplot(311)
    ax.set_title('Primal learning curves')
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['learning_curve'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    ax.set_ylabel('Negative log prob')
    #ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(312)
    ax.set_title('Meta learning curves')
    losses = ['train_loss', 'valid_loss', 'tests_loss']
    for loss_type in losses:
        ax.plot(results[loss_type], 'o-', label=loss_type)
    ax.set_xlabel('Meta iter number')
    ax.set_ylabel('Negative log prob')
    ax.legend(loc=1, frameon=False)

    ax = fig.add_subplot(313)
    ax.set_title('Meta-gradient magnitude')
    ax.plot(results['meta_grad_magnitude'], 'o-', label='Meta-gradient magnitude')
    ax.plot(results['meta_grad_angle'], 'o-', label='Meta-gradient angle')
    ax.set_xlabel('Meta iter number')
    ax.set_ylabel('Meta-gradient Magnitude')
    ax.legend(loc=1, frameon=False)

    plt.savefig('learning_curves.png')

    # ----- Learning curve info -----
    print "Plotting extra learning curves..."
    fig.clf()
    ax = fig.add_subplot(311)
    ax.set_title('Primal learning curves')
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['grad_norm'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    #ax.legend(loc=1, frameon=False)
    ax.set_title('Grad norm')

    ax = fig.add_subplot(312)
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['weight_norm'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    ax.legend(loc=1, frameon=False)
    ax.set_title('Weight norm')

    ax = fig.add_subplot(313)
    for i, y in enumerate(results['learning_curves']):
        ax.plot(y['velocity_norm'], 'o-', label='Meta iter {0}'.format(i))
    ax.set_xlabel('Epoch number')
    ax.set_title('Velocity norm')
    ax.legend(loc=1, frameon=False)
    plt.savefig('extra_learning_curves.png')

    # ----- Init scale and L2 reg -----
    print "Plotting initialization distributions and regularization..."
    fig.clf()
    ax = fig.add_subplot(111)
    #ax.set_title('Init scale learning curves')
    for i, y in enumerate(zip(*results['log_param_scale'])):
        if parser.names[i][0] == 'weights':
            ax.plot(np.exp(y), 'o-', label=layer_name(parser.names[i]))
    # Show lines for theoretical optimum.
    y1 = 1.0/np.sqrt(layer_sizes[0])
    y2 = 1.0/np.sqrt(layer_sizes[1])
    ax.plot(ax.get_xlim(), (y1, y1), 'b--')
    ax.plot(ax.get_xlim(), (y2, y2), 'k--')
    ax.set_xlabel('Meta iteration')
    ax.set_ylabel('Initial scale')
    #ax.set_yscale('log')
    #ax.legend(loc=1, frameon=False)

    fig.set_size_inches((2.5,2.5))
    #ax.legend(numpoints=1, loc=1, frameon=False, prop={'size':'12'})
    plt.savefig('init_weight_learning_curve.pdf', pad_inches=0.05, bbox_inches='tight')

    fig.clf()
    ax = fig.add_subplot(111)
    #ax.set_title('Init scale learning curves')
    for i, y in enumerate(zip(*results['log_param_scale'])):
        if parser.names[i][0] == 'biases':
            ax.plot(np.exp(y), 'o-', label=layer_name(parser.names[i]))
    ax.set_xlabel('Meta iteration')
    #ax.set_ylabel('Scale')
    #ax.set_yscale('log')
    #ax.set_ylabel('Log param scale')
    fig.set_size_inches((2.5,2.5))
    ax.legend(numpoints=1, loc=0, frameon=False, prop={'size':'10'})
    plt.savefig('init_bias_learning_curve.pdf', pad_inches=0.05, bbox_inches='tight')