def display_results_task_f_4D_simple_ReLu_BT():
    get_k = lambda a: 7*a + 4*3*2*a**2
    shallow = lambda k: 4*k+k+k
    bt = lambda f: 2*f+f +2*f*(2*f)+ 2*(2*f)
    get_f = lambda a: 3*2*a
    #
    get_errors_from = mtf.get_errors_based_on_train_error
    #get_errors_from = mtf.get_errors_based_on_validation_error
    decider = ns.Namespace(get_errors_from=get_errors_from)

    task_name = 'task_f_4D_simple_ReLu_BT'
    experiment_name = mtf.get_experiment_folder(task_name)

    (X_train, Y_train, X_cv, Y_cv, X_test, Y_test) = mtf.get_data(task_name)
    var_Y_test = np.var(Y_test)
    print('var_Y_test: ', var_Y_test)

    plt.subplot(111)
    path_to_experiments = '../../%s/task_September_23_NN_xavier_relu'%experiment_name
    expts_best_results = mtf.get_best_results_for_experiments(path_to_experiments,decider,verbose=False)
    sorted_units, sorted_train_errors, sorted_validation_errors, sorted_test_errors = mtf.get_errors_for_display(expts_best_results)
    nb_params_shallow = [ shallow(nb_units) for nb_units in sorted_units ]
    print('nb_params_shallow = ', nb_params_shallow)
    print('sorted_train_errors = ', sorted_train_errors)
    print('sorted_test_errors = ', sorted_test_errors)
    #krls.plot_values(nb_params_shallow,sorted_train_errors,xlabel='number of parameters',y_label='squared error (l2 loss)',label='Train errors for Shallow Neural Net (NN)',markersize=3,colour='b')
    #krls.plot_values(nb_params_shallow,sorted_test_errors,xlabel='number of parameters',y_label='squared error (l2 loss)',label='Test error for Shallow NN',markersize=3,colour='b')

    duration_type = 'minutes'
    duration_type = 'hours'
    expts_best_results = mtf.get_all_simulation_results(path_to_experiments,decider,verbose=False)
    sorted_units, sorted_duration_means, sorted_duration_stds = mtf.get_duration_of_experiments(all_results=expts_best_results, duration_type=duration_type)
    nb_params_shallow = [ shallow(nb_units) for nb_units in sorted_units ]
    krls.plot_errors_and_bars(nb_params_shallow,sorted_duration_means,sorted_duration_stds,xlabel='number of parameters',y_label='squared error (l2 loss)',label='Duration for experiments '+duration_type,markersize=3,colour='b')

    path_to_experiments = '../../%s/task_September_23_BTHL_xavier_relu'%experiment_name
    expts_best_results = mtf.get_best_results_for_experiments(path_to_experiments,decider,verbose=False)
    sorted_units, sorted_train_errors, sorted_validation_errors, sorted_test_errors = mtf.get_errors_for_display(expts_best_results)
    nb_params_bt = [ bt(nb_units) for nb_units in sorted_units ]
    print('nb_params_bt = ', nb_params_bt)
    print('sorted_train_errors = ', sorted_train_errors)
    print('sorted_test_errors = ', sorted_test_errors)

    #krls.plot_values(nb_params_bt,sorted_train_errors,xlabel='number of parameters',y_label='squared error (l2 loss)',label='Train errors for Binary Tree (BT) Neural Net',markersize=3,colour='c')
    #krls.plot_values(nb_params_bt,sorted_test_errors,xlabel='number of parameters',y_label='squared error (l2 loss)',label='Test errors for Binary Tree (BT) Neural Net',markersize=3,colour='c')
    #
    plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.)
    #plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    #plt.legend()
    plt.show()
def main(argv):
    (_, task_name, result_loc, nb_inits, nb_rbf_shapes, units)  = argv
    nb_inits, nb_rbf_shapes = int(nb_inits), int(nb_rbf_shapes)
    units_list =  units.split(',')
    nb_centers_list = [ int(a) for a in units_list ]

    print 'task_name ', task_name
    (X_train, Y_train, X_cv, Y_cv, X_test, Y_test) = mtf.get_data(task_name)
    data = (X_train, Y_train, X_cv, Y_cv, X_test, Y_test)

    replace = False # with or without replacement
    stddevs = np.linspace(start=0.1, stop=3, num=nb_rbf_shapes)
    print 'number of RBF stddev tried:', len(stddevs)
    print 'start stddevs: ', stddevs

    mdl_best_params, mdl_mean_params, errors_best, errors_stats, reconstructions_best, reconstructions_mean = mtf.evalaute_models(data, stddevs, nb_centers_list, replace=False, nb_inits=nb_inits)
    (C_hat_bests, centers_bests, best_stddevs) = mdl_best_params
    print 'best_stddevs: ',best_stddevs
    (train_errors_bests, _, test_errors_bests) = errors_best
    (train_errors_means,_,test_errors_means, train_error_stds,_,test_error_stds) = errors_stats
    (Y_pred_train_best, _, Y_pred_test_best) = reconstructions_best

    # plot errors
    print 'plotting errors'
    plt1d.figure(n=1)
    plt1d.plot_errors(nb_centers_list, train_errors_bests,label='train_Errors_best', markersize=3,colour='b')
    plt1d.plot_errors(nb_centers_list, test_errors_bests,label='test_Errors_best', markersize=3,colour='r')
    plt1d.plot_errors_and_bars(nb_centers_list, train_errors_means, train_error_stds, label='train_Errors_average', markersize=3,colour='b')
    plt1d.plot_errors_and_bars(nb_centers_list, test_errors_means, test_error_stds, label='test_Errors_average', markersize=3,colour='r')

    # get things to reconstruct
    if task_name == 'qianli_func':
        print 'plotting reconstruct for task %s'%task_name
        pass
    elif task_name == 'f_2D_task2':
        print 'plotting reconstruct for task %s'%task_name
        print 'HERE'
        pass

    # plot show
    plt1d.show()

    #result_loc = './tmp_test_experiments/tmp_krls_workspace'
    mtf.save_workspace(filename=result_loc,names_of_spaces_to_save=dir(),dict_of_values_to_save=locals())
    print '\a' #makes beep
def main(argv):
        print  'argv: ', argv
        (_, task_name, folder, experiment_name, nb_inits, nb_rbf_shapes, units)  = argv
        nb_inits, nb_rbf_shapes = int(nb_inits), int(nb_rbf_shapes)
        units_list =  units.split(',')
        nb_centers_list = [ int(a) for a in units_list ]
        date = datetime.date.today().strftime("%B %d").replace (" ", "_")
        print 'task_name ', task_name

        (X_train, Y_train, X_cv, Y_cv, X_test, Y_test) = mtf.get_data(task_name)
        data = (X_train, Y_train, X_cv, Y_cv, X_test, Y_test)
        print X_train.shape

        replace = False # with or without replacement
        stddevs = np.linspace(start=0.1, stop=1250, num=nb_rbf_shapes)
        print 'number of RBF stddev tried:', len(stddevs)
        print 'start stddevs: ', stddevs

        mdl_best_params, mdl_mean_params, errors_best, errors_stats, reconstructions_best, reconstructions_mean = mtf.evalaute_models(data, stddevs, nb_centers_list, replace=False, nb_inits=nb_inits)
        (C_hat_bests, centers_bests, best_stddevs) = mdl_best_params
        print 'best_stddevs: ',best_stddevs
        (train_errors_bests, cv_errors_bests, test_errors_bests) = errors_best
        (train_errors_means,cv_errors_means,test_errors_means, train_error_stds,cv_error_stds,test_error_stds) = errors_stats
        (train_errors_bests, cv_errors_bests, test_errors_bests) = [ list(err_list) for err_list in errors_best]
        (train_errors_means,cv_errors_means,test_errors_means, train_error_stds,cv_error_stds,test_error_stds) = [ list(err_list) for err_list in errors_stats]
        #(Y_pred_train_best, _, Y_pred_test_best) = reconstructions_best
        print 'train_errors_means: ', train_errors_means

        dir_path = '../../om_krls/%s_%s'%(date,experiment_name)
        mtf.make_and_check_dir(dir_path)
        # save rbf
        rbf_params_loc = dir_path+'/rbf_params_%s_%s'%(date,experiment_name)
        np.savez(rbf_params_loc,C_hat_bests=C_hat_bests,centers_bests=centers_bests,best_stddevs=best_stddevs,units_list=np.array(nb_centers_list))
        # save errors
        result_loc = dir_path+'/results_json_%s_%s'%(date,experiment_name)
        results = {'nb_centers_list':nb_centers_list}
        mtf.load_results_dic(results,train_errors_bests=train_errors_bests,cv_errors_bests=cv_errors_bests,test_errors_bests=test_errors_bests, train_errors_means=train_errors_means,cv_errors_means=cv_errors_means,test_errors_means=test_errors_means, train_error_stds=train_error_stds,cv_error_stds=cv_error_stds,test_error_stds=test_error_stds)
        with open(result_loc, 'w+') as f_json:
            json.dump(results,f_json,sort_keys=True, indent=2, separators=(',', ': '))
def main3(agv):
        (_, task_name, result_loc, nb_inits, nb_rbf_shapes, units)  = argv
        nb_inits, nb_rbf_shapes = int(nb_inits), int(nb_rbf_shapes)
        units_list =  units.split(',')
        nb_centers_list = [ int(a) for a in units_list ]

        print 'task_name ', task_name
        (X_train, Y_train, X_cv, Y_cv, X_test, Y_test) = mtf.get_data(task_name)
        data = (X_train, Y_train, X_cv, Y_cv, X_test, Y_test)

        replace = False # with or without replacement
        stddevs = np.linspace(start=0.1, stop=3, num=nb_rbf_shapes)
        print 'number of RBF stddev tried:', len(stddevs)
        print 'start stddevs: ', stddevs

        mdl_best_params, mdl_mean_params, errors_best, errors_stats, reconstructions_best, reconstructions_mean = mtf.evalaute_models(data, stddevs, nb_centers_list, replace=False, nb_inits=nb_inits)
        (C_hat_bests, centers_bests, best_stddevs) = mdl_best_params
        print 'best_stddevs: ',best_stddevs
        (train_errors_bests, _, test_errors_bests) = errors_best
        (train_errors_means,_,test_errors_means, train_error_stds,_,test_error_stds) = errors_stats
        #(Y_pred_train_best, _, Y_pred_test_best) = reconstructions_best

        #mtf.save_workspace(filename=result_loc,names_of_spaces_to_save=dir(),dict_of_values_to_save=locals())
        print '\a' #makes beep
import pdb

import tensorflow as tf
import numpy as np
import sklearn
from sklearn.decomposition import PCA
from numpy import linalg as LA

import my_tf_pkg as mtf

from keras.datasets import mnist

task_name='task_MNIST_flat_auto_encoder'
print '----====> TASK NAME: %s' % task_name
(X_train, Y_train, X_cv, Y_cv, X_test, Y_test) = mtf.get_data(task_name)
data = (X_train, Y_train, X_cv, Y_cv, X_test, Y_test)
#X_train =  np.vstack((X_train,X_cv))

# (x_train, _), (x_test, _) = mnist.load_data()
# x_train = x_train.astype('float32') / 255.
# x_test = x_test.astype('float32') / 255.
# x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
# x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
# print x_train.shape
# print x_test.shape
#print 'MNIST keras fingerprint: ', np.sum(x_train)
#print 'MNIST keras fingerprint: ', np.sum(X_train)+np.sum(X_train)

def get_reconstruction(X_train,k):
    pca = PCA(n_components=k)
    pca = pca.fit(X_train)
def main_gpu(arg):
    print('Running main')
    print('--==>', dict(arg) )
    arg.act_name = arg.act.__name__
    results = {'train_errors':[], 'cv_errors':[],'test_errors':[]}

    path, errors_pretty, mdl_dir, json_file = set_experiment_folders(arg)
    set_tensorboard(arg)

    ## Data sets and task
    print( '----====> TASK NAME: %s' % arg.data_file_name )
    (X_train, Y_train, X_cv, Y_cv, X_test, Y_test) = mtf.get_data(arg)

    N_frac = arg.N_frac
    X_train, Y_train, X_cv, Y_cv, X_test, Y_test = X_train[:N_frac,:], Y_train[:N_frac,:], X_cv[:N_frac,:], Y_cv[:N_frac,:], X_test[:N_frac,:], Y_test[:N_frac,:]

    if arg.data_normalize == 'normalize_input':
        X_train, X_cv, X_test = preprocessing.scale(X_train), preprocessing.scale(X_cv), preprocessing.scale(X_test)

    (N_train,D) = X_train.shape
    (N_test,D_out) = Y_test.shape
    print( '(N_train,D) = ', (N_train,D) )
    print( '(N_test,D_out) = ', (N_test,D_out) )

    ##
    phase_train = tf.placeholder(tf.bool, name='phase_train') if arg.bn else  None

    arg.steps = arg.get_steps(arg)
    arg.M = arg.get_batch_size(arg)

    arg.log_learning_rate = arg.get_log_learning_rate(arg)
    arg.starter_learning_rate = arg.get_start_learning_rate(arg)
    print( '++> starter_learning_rate ', arg.starter_learning_rate )

    ## decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
    arg.decay_rate = arg.get_decay_rate(arg)
    arg.decay_steps = arg.get_decay_steps(arg)

    if arg.optimization_alg == 'GD':
        pass
    elif arg.optimization_alg=='Momentum':
        arg.use_nesterov = arg.get_use_nesterov()
        arg.momentum = arg.get_momentum(arg)
        print('arg.use_nesterov', arg.use_nesterov)
        print('arg.momentum', arg.momentum)
    elif arg.optimization_alg == 'Adadelta':
        arg.rho = arg.get_rho(arg)
        print('arg.rho', arg.rho)
    elif arg.optimization_alg == 'Adagrad':
        #only has learning rate
        pass
    elif arg.optimization_alg == 'Adam':
        arg.beta1 = arg.get_beta1(arg)
        arg.beta2 = arg.get_beta2(arg)
        print('arg.beta1', arg.beta1)
        print('arg.beta2', arg.beta2)
    elif arg.optimization_alg == 'RMSProp':
        arg.decay = arg.get_decay(arg)
        arg.momentum = arg.get_momentum(arg)
        print('arg.decay', arg.decay)
        print('arg.momentum', arg.momentum)
    else:
        pass

    ##############################
    # if data_file_name == 'task_MNIST_flat_auto_encoder':
    #     PCA_errors = {12:24.8254684915, 48:9.60052317906, 96:4.72118325768}
    #     if len(units_list) == 1:
    #         k = units_list[0]
    #     else:
    #         k = units_list[0] * len(units_list)
    #     if not k in PCA_errors.keys():
    #         print( 'COMPUTING PCA... k = ', k)
    #         X_reconstruct_pca, _, _ = mtf. get_reconstruction_pca(X_train,k=units_list[0])
    #         pca_error = mtf.report_l2_loss(Y=X_train,Y_pred=X_reconstruct_pca)
    #         PCA_errors[k] = pca_error
    #     else:
    #         pca_error = PCA_errors[k]
    #     print( '*************> PCA error: ', pca_error)
    # else:
    #     pca_error = None
    #     rbf_error = None
    #
    # hbf1_error = None
    # if model == 'hbf':
    #     #error, Y_pred, Kern, C, subsampled_data_points = report_RBF_error_from_data(X_train, dims, stddev)
    #     if len(units_list) > 1:
    #         k = units_list[0]*len(units_list)
    #         print( 'RBF units = ', k)
    #         nb_units = [None, k]
    #         rbf_error, _, _, _, _ = mtf.report_RBF_error_from_data(X_train, X_train, nb_units, S_init[1])
    #         print( rbf_error)
    #         hbf1={12:26.7595}
    #         if k in hbf1.keys():
    #             hbf1_error = hbf1[k]
    #     else:
    #         nb_units = dims
    #         rbf_error, _, _, _, _ = mtf.report_RBF_error_from_data(X_train, X_train, nb_units, S_init[1])
    ##

    pca_error = None
    rbf_error = None
    hbf1_error = None
    ## Make Model
    if arg.mdl == 'standard_nn':
        arg.dims = [D]+arg.units+[D_out]
        arg.mu_init_list = arg.get_W_mu_init(arg)
        arg.std_init_list = arg.get_W_std_init(arg)

        arg.b_init = arg.get_b_init(arg)
        float_type = tf.float64
        x = tf.placeholder(float_type, shape=[None, D], name='x-input') # M x D

        nb_layers = len(arg.dims)-1
        nb_hidden_layers = nb_layers-1
        (inits_C,inits_W,inits_b) = mtf.get_initilizations_standard_NN(init_type=arg.init_type,dims=arg.dims,mu=arg.mu_init_list,std=arg.std_init_list,b_init=arg.b_init, X_train=X_train, Y_train=Y_train)
        with tf.name_scope("standardNN") as scope:
            mdl = mtf.build_standard_NN(arg, x,arg.dims,(None,inits_W,inits_b),phase_train,arg.trainable_bn)
            mdl = mtf.get_summation_layer(l=str(nb_layers),x=mdl,init=inits_C[0])
        inits_S = inits_b
    elif arg.mdl == 'hbf':
        arg.dims = [D]+arg.units+[D_out]
        trainable_S = True if (arg.trainable_S=='train_S') else False
        arg.b_init = arg.get_b_init(arg)
        arg.S_init = arg.b_init
        float_type = tf.float64
        #arg.mu , arg.std = arg.get_W_mu_init(arg), arg.get_W_std_init(arg)
        x = tf.placeholder(float_type, shape=[None, D], name='x-input') # M x D
        (inits_C,inits_W,inits_S,rbf_error) = mtf.get_initilizations_HBF(init_type=arg.init_type,dims=arg.dims,mu=arg.mu,std=arg.std,b_init=arg.b_init,S_init=arg.S_init, X_train=X_train, Y_train=Y_train, train_S_type=arg.train_S_type)
        #print(inits_W)
        nb_layers = len(arg.dims)-1
        nb_hidden_layers = nb_layers-1
        with tf.name_scope("HBF") as scope:
            mdl = mtf.build_HBF2(x,arg.dims,(inits_C,inits_W,inits_S),phase_train,arg.trainable_bn,trainable_S)
            mdl = mtf.get_summation_layer(l=str(nb_layers),x=mdl,init=inits_C[0])
    elif arg.mdl == 'binary_tree_4D':
        pass
    elif arg.mdl == 'binary_tree_4D_conv_hidden_layer':
        print( 'binary_tree_4D' )
        inits_S = None
        pca_error, rbf_error = None, None
        float_type = tf.float32
        # Data sizes needed for reshaping
        N_cv, N_test = X_cv.shape[0], X_test.shape[0]
        # reshape data sets
        X_train, X_cv, X_test = X_train.reshape(N_train,1,D,1), X_cv.reshape(N_cv,1,D,1), X_test.reshape(N_test,1,D,1)
        x = tf.placeholder(float_type, shape=[None,1,D,1], name='x-input')
        #
        arg.stride_convd1, arg.filter_size = 2, 2 #fixed for Binary Tree BT
        #arg.mean, arg.stddev = arg.get_W_mu_init(arg), arg.get_W_std_init(arg)
        with tf.name_scope("build_binary_model") as scope:
            mdl = mtf.build_binary_tree_4D_hidden_layer(x,arg,phase_train=phase_train)
        arg.dims = [D]+[arg.nb_filters]+[arg.nb_final_hidden_units]+[D_out]
    elif arg.mdl == 'binary_tree_4D_conv_hidden_layer_automatic':
        print( arg.scope_name )
        inits_S = None
        pca_error, rbf_error = None, None
        float_type = tf.float32
        #
        N_cv, N_test = X_cv.shape[0], X_test.shape[0]
        X_train, X_cv, X_test = X_train.reshape(N_train,1,D,1), X_cv.reshape(N_cv,1,D,1), X_test.reshape(N_test,1,D,1)
        x = tf.placeholder(tf.float32, shape=[None,1,D,1], name='x-input') #[M, 1, D, 1]
        #
        with tf.name_scope("mdl"+arg.scope_name) as scope:
            mdl = mtf.bt_mdl_conv(arg,x)
        arg.dims = [D]+arg.F[1:]+[D_out]
    elif arg.mdl == 'binary_tree_8D_conv_hidden_layer':
        print( arg.scope_name )
        inits_S = None
        pca_error, rbf_error = None, None
        float_type = tf.float32
        #
        N_cv, N_test = X_cv.shape[0], X_test.shape[0]
        X_train, X_cv, X_test = X_train.reshape(N_train,1,D,1), X_cv.reshape(N_cv,1,D,1), X_test.reshape(N_test,1,D,1)
        x = tf.placeholder(tf.float32, shape=[None,1,D,1], name='x-input') #[M, 1, D, 1]
        #
        with tf.name_scope("mdl"+arg.scope_name) as scope:
            mdl = mtf.bt_mdl_conv(arg,x)
        arg.dims = [D]+arg.F[1:]+[D_out]

    ## Output and Loss
    y = mdl
    y_ = tf.placeholder(float_type, shape=[None, D_out]) # (M x D)
    with tf.name_scope("L2_loss") as scope:
        l2_loss = tf.reduce_sum( tf.reduce_mean(tf.square(y_-y), 0) )
        #l2_loss = (2.0/N_train)*tf.nn.l2_loss(y_-y)
        #l2_loss = tf.reduce_mean(tf.square(y_-y))

    nb_params = count_number_trainable_params(y)
    results['nb_params'] = nb_params
    print( '---> nb_params ', nb_params )


    ##

    with tf.name_scope("train") as scope:
        # If the argument staircase is True, then global_step / decay_steps is an integer division and the decayed earning rate follows a staircase function.
        ## decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
        global_step = tf.Variable(0, trainable=False)
        learning_rate = tf.train.exponential_decay(learning_rate=arg.starter_learning_rate, global_step=global_step,decay_steps=arg.decay_steps, decay_rate=arg.decay_rate, staircase=arg.staircase)
        # Passing global_step to minimize() will increment it at each step.
        if arg.optimization_alg == 'GD':
            opt = tf.train.GradientDescentOptimizer(learning_rate)
        elif arg.optimization_alg == 'Momentum':
            opt = tf.train.MomentumOptimizer(learning_rate=learning_rate,momentum=arg.momentum,use_nesterov=arg.use_nesterov)
        elif arg.optimization_alg == 'Adadelta':
            opt = tf.train.AdadeltaOptimizer(learning_rate=learning_rate, rho=arg.rho, epsilon=1e-08, use_locking=False, name='Adadelta')
        elif arg.optimization_alg == 'Adam':
            opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=arg.beta1, beta2=arg.beta2, epsilon=1e-08, name='Adam')
        elif arg.optimization_alg == 'Adagrad':
            opt = tf.train.AdagradOptimizer(learning_rate)
        elif arg.optimization_alg == 'RMSProp':
            opt = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=arg.decay, momentum=arg.momentum, epsilon=1e-10, name='RMSProp')

    ## TODO
    if arg.re_train == 're_train' and arg.data_file_name == 'hrushikesh':
        print( 'data_file_name: ', data_file_name)
        print( 're_train: ', re_train)
        var_list = [v for v in tf.all_variables() if v.name == 'C:0']
        #train_step = opt.minimize(l2_loss, var_list=var_list)
    else:
        train_step = opt.minimize(l2_loss, global_step=global_step)

    ##
    with tf.name_scope('learning_rate'):
        learning_rate_scalar_summary = tf.scalar_summary("learning_rate", learning_rate)

    with tf.name_scope("l2_loss") as scope:
        ls_scalar_summary = tf.scalar_summary("l2_loss", l2_loss)

    if arg.data_file_name == 'task_MNIST_flat_auto_encoder':
        with tf.name_scope('input_reshape'):
            x_image = tf.to_float(x, name='ToFloat')
            image_shaped_input_x = tf.reshape(x_image, [-1, 28, 28, 1])
            # tf.image_summary(tag, tensor, max_images=3, collections=None, name=None)
            tf.image_summary('input', image_shaped_input_x, 10)

        with tf.name_scope('reconstruct'):
            y_image = tf.to_float(y, name='ToFloat')
            image_shaped_input_y = tf.reshape(x_image, [-1, 28, 28, 1])
            # tf.image_summary(tag, tensor, max_images=3, collections=None, name=None)
            tf.image_summary('reconstruct', image_shaped_input_y, 10)

    def register_all_variables_and_grads(y):
        all_vars = tf.all_variables()
        grad_vars = opt.compute_gradients(y,all_vars) #[ (gradient,variable) ]
        for (dldw,v) in grad_vars:
            if dldw != None:
                prefix_name = 'derivative_'+v.name
                suffix_text = 'dJd'+v.name
                #mtf.put_summaries(var=tf.sqrt( tf.reduce_sum(tf.square(dldw)) ),prefix_name=prefix_name,suffix_text=suffix_text)
                mtf.put_summaries(var=tf.abs(dldw),prefix_name=prefix_name,suffix_text='_abs_'+suffix_text)
                tf.histogram_summary('hist'+prefix_name, dldw)


    register_all_variables_and_grads(y)
    ## TRAIN
    if phase_train is not None:
        #DO BN
        feed_dict_train = {x:X_train, y_:Y_train, phase_train: False}
        feed_dict_cv = {x:X_cv, y_:Y_cv, phase_train: False}
        feed_dict_test = {x:X_test, y_:Y_test, phase_train: False}
    else:
        #Don't do BN
        feed_dict_train = {x:X_train, y_:Y_train}
        feed_dict_cv = {x:X_cv, y_:Y_cv}
        feed_dict_test = {x:X_test, y_:Y_test}

    def get_batch_feed(X, Y, M, phase_train):
        mini_batch_indices = np.random.randint(M,size=M)
        Xminibatch =  X[mini_batch_indices,:] # ( M x D^(0) )
        Yminibatch = Y[mini_batch_indices,:] # ( M x D^(L) )
        if phase_train is not None:
            #DO BN
            feed_dict = {x: Xminibatch, y_: Yminibatch, phase_train: True}
        else:
            #Don't do BN
            feed_dict = {x: Xminibatch, y_: Yminibatch}
        return feed_dict

    def print_messages(*args):
        for i, msg in enumerate(args):
            print('>%s'%msg, flush=True)

    if arg.use_tensorboard:
        if tf.gfile.Exists('/tmp/mdl_logs'):
          tf.gfile.DeleteRecursively('/tmp/mdl_logs')
        tf.gfile.MakeDirs('/tmp/mdl_logs')

    tf.add_check_numerics_ops()

    # Add ops to save and restore all the variables.
    if arg.mdl_save:
        saver = tf.train.Saver(max_to_keep=arg.max_to_keep)
    start_time = time.time()
    print()
    #file_for_error = './ray_error_file.txt'
    if arg.save_config_args:
        arg_dict = dict(arg).copy()
        arg_dict = get_remove_functions_from_dict(arg_dict)
        pickle.dump( arg_dict, open( "pickle-slurm-%s_%s.p"%(arg.slurm_jobid,arg.slurm_array_task_id) , "wb" ) )
        #with open('json-slurm-%s_%s.json', 'w+') as f_json:
        #    json.dump(results,f_json,sort_keys=True, indent=2, separators=(',', ': '))
    with open(path+errors_pretty, 'w+') as f_err_msgs:
    #with open(file_for_error, 'w+') as f_err_msgs:
        #with tf.Session() as sess:
        sess = tf.Session()
        ## prepare writers and fetches
        if arg.use_tensorboard:
            merged = tf.merge_all_summaries()
            #writer = tf.train.SummaryWriter(tensorboard_data_dump, sess.graph)
            train_writer = tf.train.SummaryWriter(arg.tensorboard_data_dump_train, sess.graph)
            test_writer = tf.train.SummaryWriter(arg.tensorboard_data_dump_test, sess.graph)
            ##
            fetches_train = [merged, l2_loss]
            fetches_cv = l2_loss
            fetches_test = [merged, l2_loss]
        else:
            fetches_train = l2_loss
            fetches_cv = l2_loss
            fetches_test = l2_loss

        sess.run( tf.initialize_all_variables() )
        for i in range(arg.steps):
            ## Create fake data for y = W.x + b where W = 2, b = 0
            #(batch_xs, batch_ys) = get_batch_feed(X_train, Y_train, M, phase_train)
            feed_dict_batch = get_batch_feed(X_train, Y_train, arg.M, phase_train)
            ## Train
            if i%arg.report_error_freq == 0:
                if arg.use_tensorboard:
                    (summary_str_train,train_error) = sess.run(fetches=fetches_train, feed_dict=feed_dict_train)
                    cv_error = sess.run(fetches=fetches_cv, feed_dict=feed_dict_cv)
                    (summary_str_test,test_error) = sess.run(fetches=fetches_test, feed_dict=feed_dict_test)

                    train_writer.add_summary(summary_str_train, i)
                    test_writer.add_summary(summary_str_test, i)
                else:
                    train_error = sess.run(fetches=fetches_train, feed_dict=feed_dict_train)
                    cv_error = sess.run(fetches=fetches_cv, feed_dict=feed_dict_cv)
                    test_error = sess.run(fetches=fetches_test, feed_dict=feed_dict_test)

                current_learning_rate = sess.run(fetches=learning_rate)
                loss_msg = "=> Mdl*%s*-units%s, task: %s, step %d/%d, train err %g, cv err: %g test err %g"%(arg.mdl,arg.dims,arg.data_file_name,i,arg.steps,train_error,cv_error,test_error)
                mdl_info_msg = "Act: %s, Opt:%s, BN %s, BN_trainable: %s After%d/%d iteration,Init: %s, current_learning_rate %s, M %s, decay_rate %s, decay_steps %s, nb_params %s" % (arg.act.__name__,arg.optimization_alg,arg.bn,arg.trainable_bn,i,arg.steps,arg.init_type,current_learning_rate,arg.M,arg.decay_rate,arg.decay_steps,nb_params)
                errors_to_beat = 'BEAT: hbf1_error: %s RBF error: %s PCA error: %s '%(hbf1_error, rbf_error,pca_error)

                print_messages(loss_msg, mdl_info_msg, errors_to_beat)
                print('S: ', inits_S, flush=True)
                print()

                # store results
                results['train_errors'].append( float(train_error) )
                results['cv_errors'].append( float(cv_error) )
                results['test_errors'].append( float(test_error) )
                # write errors to pretty print
                f_err_msgs.write(loss_msg+'\n')
                f_err_msgs.write(mdl_info_msg+'\n')
                if any_is_NaN(train_error,cv_error,test_error):
                    # if its a nan make sure to stop script
                    print('nan_found')
                    break
                if arg.mdl_save:
                    save_path = saver.save(sess, path+mdl_dir+'/model.ckpt',global_step=i)
            if arg.use_tensorboard:
                sess.run(fetches=[merged,train_step], feed_dict=feed_dict_batch) #sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
            else:
                sess.run(fetches=train_step, feed_dict=feed_dict_batch) #sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})


    sess.close()
    _, best_train, best_cv, best_test =  arg.get_errors_from(results)
    results['best_train'], results['best_cv'], results['best_test'] = best_train, best_cv, best_test
    print('End of main')

    git_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
    results['git_hash'] = str(git_hash)
    #results['tf_rand_seed'] = tf_rand_seed
    #
    seconds = (time.time() - start_time)
    minutes = seconds/ 60
    hours = minutes/ 60
    print("--- %s seconds ---" % seconds )
    print("--- %s minutes ---" % minutes )
    print("--- %s hours ---" % hours )
    ## dump results to JSON
    results['seconds'] = seconds
    results['minutes'] = minutes
    results['hours'] = hours
    #print results
    #results['arg'] = arg
    arg_dict = dict(arg)
    arg_dict = get_remove_functions_from_dict(arg_dict)
    results['arg_dict'] = arg_dict
    with open(path+json_file, 'w+') as f_json:
        print('Writing Json')
        print('path+json_file', path+json_file)
        json.dump(results,f_json,indent=2, separators=(',', ': '))
    print( '\a') #makes beep
    #print(results)
    print('get_errors_from: ', arg.get_errors_from.__name__)
    print('best results: train, cv, test: ', best_train, best_cv, best_test )
from sklearn.metrics.pairwise import euclidean_distances
import numpy as np

import my_tf_pkg as mtf


def get_pairwise_norm_squared(X, Y):
    return mtf.euclidean_distances(X=X, Y=Y, squared=True)


task_name = 'hrushikesh'
X_train, Y_train, X_cv, Y_cv, X_test, Y_test = mtf.get_data(task_name)

print 'X_train.shape', X_train.shape
print 'X_cv.shape', X_cv.shape
print 'X_test.shape', X_test.shape

pairwise_norm_squared = get_pairwise_norm_squared(X=X_train, Y=X_train)
print 'min', np.amin(pairwise_norm_squared)
print 'max', np.amax(pairwise_norm_squared)
print 'mean', np.mean(pairwise_norm_squared)
print 'std', np.std(pairwise_norm_squared)
def main(arg):
    results = {'train_errors':[], 'cv_errors':[],'test_errors':[]}

    path, errors_pretty, mdl_dir, json_file = set_experiment_folders(arg)

    # try to make directory, if it exists do NOP
    mtf.make_and_check_dir(path=path)
    mtf.make_and_check_dir(path=path+mdl_dir)
    # JSON results structure
    #results_dic = mtf.fill_results_dic_with_np_seed(np_rnd_seed=np.random.get_state(), results=results)

    ## Data sets and task
    print( '----====> TASK NAME: %s' % arg.task_name )
    (X_train, Y_train, X_cv, Y_cv, X_test, Y_test) = mtf.get_data(arg.task_name)
    if arg.data_normalize == 'normalize_input':
        X_train, X_cv, X_test = preprocessing.scale(X_train), preprocessing.scale(X_cv), preprocessing.scale(X_test)

    (N_train,D) = X_train.shape
    (N_test,D_out) = Y_test.shape
    print( '(N_train,D) = ', (N_train,D) )
    print( '(N_test,D_out) = ', (N_test,D_out) )

    ##
    units_list = arg.units_list
    dims = [D]+arg.units_list+[D_out]
    mu = arg.W_mu_init(dims, arg)
    std = arg.W_std_init(dims, arg)

    b_init = arg.b_init()

    phase_train = tf.placeholder(tf.bool, name='phase_train') if arg.bn else  None

    steps = np.random.randint(low=arg.steps_low ,high=arg.steps_high)
    M = np.random.randint(low=arg.M_low , high=arg.M_high)
    arg.M = M

    log_learning_rate = np.random.uniform(low=arg.low_log_const_learning_rate, high=arg.high_log_const_learning_rate)
    starter_learning_rate = 10**log_learning_rate
    print( '++> starter_learning_rate ', starter_learning_rate )

    ## decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
    decay_rate = np.random.uniform(low=arg.decay_rate_low, high=arg.decay_rate_high)
    arg.decay_steps_high
    decay_steps = np.random.randint(low=arg.decay_steps_low(arg), high=arg.decay_steps_high(arg) )
    staircase = arg.staircase

    if optimization_alg == 'GD':
        pass
    elif optimization_alg=='Momentum':
        use_nesterov = arg.use_nesterov
        momentum=np.random.uniform(low=0.1, high=0.99)
        results['momentum']=float(momentum)
    elif optimization_alg == 'Adadelta':
        rho=np.random.uniform(low=0.4, high=0.99)
        results['rho']=float(rho)
    elif optimization_alg == 'Adagrad':
        #only has learning rate
        pass
    elif optimization_alg == 'Adam':
        beta1 = arg.get_beta1(arg)
        beta2 = arg.get_beta2(arg)
        results['beta1']=float(beta1)
        results['beta2']=float(beta2)
    elif optimization_alg == 'RMSProp':
        decay = np.random.uniform(low=arg.decay_loc,high=arg.decay_high)
        momentum = np.random.uniform(low=arg.momentum_low,high=arg.momentum_high)
        results['decay']=float(decay)
        results['momentum']=float(momentum)
    else:
        pass
    results['range_learning_rate'] = [low_const_learning_rate, high_const_learning_rate]

    ##############################
    if task_name == 'task_MNIST_flat_auto_encoder':
        PCA_errors = {12:24.8254684915, 48:9.60052317906, 96:4.72118325768}
        if len(units_list) == 1:
            k = units_list[0]
        else:
            k = units_list[0] * len(units_list)
        if not k in PCA_errors.keys():
            print( 'COMPUTING PCA... k = ', k)
            X_reconstruct_pca, _, _ = mtf. get_reconstruction_pca(X_train,k=units_list[0])
            pca_error = mtf.report_l2_loss(Y=X_train,Y_pred=X_reconstruct_pca)
            PCA_errors[k] = pca_error
        else:
            pca_error = PCA_errors[k]
        print( '*************> PCA error: ', pca_error)
    else:
        pca_error = None
        rbf_error = None

    hbf1_error = None
    if model == 'hbf':
        #error, Y_pred, Kern, C, subsampled_data_points = report_RBF_error_from_data(X_train, dims, stddev)
        if len(units_list) > 1:
            k = units_list[0]*len(units_list)
            print( 'RBF units = ', k)
            nb_units = [None, k]
            rbf_error, _, _, _, _ = mtf.report_RBF_error_from_data(X_train, X_train, nb_units, S_init[1])
            print( rbf_error)
            hbf1={12:26.7595}
            if k in hbf1.keys():
                hbf1_error = hbf1[k]
        else:
            nb_units = dims
            rbf_error, _, _, _, _ = mtf.report_RBF_error_from_data(X_train, X_train, nb_units, S_init[1])

    S_init = b_init
    ##

    ## Make Model
    nb_layers = len(dims)-1
    nb_hidden_layers = nb_layers-1
    print( '-----> Running model: %s. (nb_hidden_layers = %d, nb_layers = %d)' % (model,nb_hidden_layers,nb_layers) )
    print( '-----> Units: %s)' % (dims) )
    if model == 'standard_nn':
        rbf_error = None
        #tensorboard_data_dump = '/tmp/standard_nn_logs'
        float_type = tf.float64
        x = tf.placeholder(float_type, shape=[None, D], name='x-input') # M x D
        (inits_C,inits_W,inits_b) = mtf.get_initilizations_standard_NN(init_type=init_type,dims=dims,mu=mu,std=std,b_init=b_init,S_init=S_init, X_train=X_train, Y_train=Y_train)
        with tf.name_scope("standardNN") as scope:
            mdl = mtf.build_standard_NN(x,dims,(inits_C,inits_W,inits_b),phase_train,trainable_bn)
            mdl = mtf.get_summation_layer(l=str(nb_layers),x=mdl,init=inits_C[0])
        inits_S = inits_b
    elif model == 'hbf':
        trainable_S = True if (arg.trainable_S=='train_S') else False
        #tensorboard_data_dump = '/tmp/hbf_logs'
        float_type = tf.float64
        x = tf.placeholder(float_type, shape=[None, D], name='x-input') # M x D
        (inits_C,inits_W,inits_S,rbf_error) = mtf.get_initilizations_HBF(init_type=init_type,dims=dims,mu=mu,std=std,b_init=b_init,S_init=S_init, X_train=X_train, Y_train=Y_train, train_S_type=train_S_type)
        print(inits_W)
        with tf.name_scope("HBF") as scope:
            mdl = mtf.build_HBF2(x,dims,(inits_C,inits_W,inits_S),phase_train,trainable_bn,trainable_S)
            mdl = mtf.get_summation_layer(l=str(nb_layers),x=mdl,init=inits_C[0])
    elif model == 'binary_tree_4D_conv':
        print( 'binary_tree_4D')
        #tensorboard_data_dump = '/tmp/hbf_logs'
        inits_S = None
        pca_error = None
        rbf_error = None
        float_type = tf.float32
        # things that need reshaping
        N_cv = X_cv.shape[0]
        N_test = X_test.shape[0]
        #
        X_train = X_train.reshape(N_train,1,D,1)
        #Y_train = Y_train.reshape(N_train,1,D,1)
        X_cv = X_cv.reshape(N_cv,1,D,1)
        #Y_cv = Y_cv.reshape(N_cv,1,D,1)
        X_test = X_test.reshape(N_test,1,D,1)
        #Y_test = Y_test.reshape(N_test,1,D,1)
        x = tf.placeholder(float_type, shape=[None,1,D,1], name='x-input')
        #
        filter_size = 2 #fixed for Binary Tree BT
        #nb_filters = nb_filters
        mean, stddev = bn_tree_init_stats
        stddev = float( np.random.uniform(low=0.001, high=stddev) )
        print( 'stddev', stddev)
        x = tf.placeholder(float_type, shape=[None,1,D,1], name='x-input')
        with tf.name_scope("build_binary_model") as scope:
            mdl = mtf.build_binary_tree(x,filter_size,nb_filters,mean,stddev,stride_convd1=2,phase_train=phase_train,trainable_bn=trainable_bn)
        #
        dims = [D]+[nb_filters]+[D_out]
        results['nb_filters'] = nb_filters
    elif model == 'binary_tree_D8':
        #tensorboard_data_dump = '/tmp/hbf_logs'
        inits_S = None
        pca_error = None
        rbf_error = None
        float_type = tf.float32
        # things that need reshaping
        N_cv = X_cv.shape[0]
        N_test = X_test.shape[0]
        #
        X_train = X_train.reshape(N_train,1,D,1)
        #Y_train = Y_train.reshape(N_train,1,D,1)
        X_cv = X_cv.reshape(N_cv,1,D,1)
        #Y_cv = Y_cv.reshape(N_cv,1,D,1)
        X_test = X_test.reshape(N_test,1,D,1)
        #Y_test = Y_test.reshape(N_test,1,D,1)
        x = tf.placeholder(float_type, shape=[None,1,D,1], name='x-input')
        #
        filter_size = 2 #fixed for Binary Tree BT
        nb_filters1,nb_filters2 = nb_filters
        mean1,stddev1,mean2,stddev2,mean3,stddev3 = bn_tree_init_stats
        x = tf.placeholder(float_type, shape=[None,1,D,1], name='x-input')
        with tf.name_scope("binary_tree_D8") as scope:
            mdl = mtf.build_binary_tree_8D(x,nb_filters1,nb_filters2,mean1,stddev1,mean2,stddev2,mean3,stddev3,stride_conv1=2)
        #
        dims = [D]+nb_filters+[D_out]
        results['nb_filters'] = nb_filters

    ## Output and Loss
    y = mdl
    y_ = tf.placeholder(float_type, shape=[None, D_out]) # (M x D)
    with tf.name_scope("L2_loss") as scope:
        l2_loss = tf.reduce_sum( tf.reduce_mean(tf.square(y_-y), 0) )
        #l2_loss = (2.0/N_train)*tf.nn.l2_loss(y_-y)
        #l2_loss = tf.reduce_mean(tf.square(y_-y))

    ##

    with tf.name_scope("train") as scope:
        # starter_learning_rate = 0.0000001
        # decay_rate = 0.9
        # decay_steps = 100
        # staircase = True
        # decay_steps = 10000000
        # staircase = False
        # decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
        global_step = tf.Variable(0, trainable=False)
        learning_rate = tf.train.exponential_decay(learning_rate=starter_learning_rate, global_step=global_step,decay_steps=decay_steps, decay_rate=decay_rate, staircase=staircase)

        # Passing global_step to minimize() will increment it at each step.
        if optimization_alg == 'GD':
            opt = tf.train.GradientDescentOptimizer(learning_rate)
        elif optimization_alg == 'Momentum':
            opt = tf.train.MomentumOptimizer(learning_rate=learning_rate,momentum=momentum,use_nesterov=use_nesterov)
        elif optimization_alg == 'Adadelta':
            tf.train.AdadeltaOptimizer(learning_rate=learning_rate, rho=rho, epsilon=1e-08, use_locking=False, name='Adadelta')
        elif optimization_alg == 'Adam':
            opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1, beta2=beta2, epsilon=1e-08, name='Adam')
        elif optimization_alg == 'Adagrad':
            opt = tf.train.AdagradOptimizer(learning_rate)
        elif optimization_alg == 'RMSProp':
            opt = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=decay, momentum=momentum, epsilon=1e-10, name='RMSProp')

    ##
    if re_train == 're_train' and task_name == 'hrushikesh':
        print( 'task_name: ', task_name)
        print( 're_train: ', re_train)
        var_list = [v for v in tf.all_variables() if v.name == 'C:0']
        #train_step = opt.minimize(l2_loss, var_list=var_list)
    else:
        train_step = opt.minimize(l2_loss, global_step=global_step)

    ##
    with tf.name_scope("l2_loss") as scope:
        ls_scalar_summary = tf.scalar_summary("l2_loss", l2_loss)

    if task_name == 'task_MNIST_flat_auto_encoder':
        with tf.name_scope('input_reshape'):
            x_image = tf.to_float(x, name='ToFloat')
            image_shaped_input_x = tf.reshape(x_image, [-1, 28, 28, 1])
            # tf.image_summary(tag, tensor, max_images=3, collections=None, name=None)
            tf.image_summary('input', image_shaped_input_x, 10)

        with tf.name_scope('reconstruct'):
            y_image = tf.to_float(y, name='ToFloat')
            image_shaped_input_y = tf.reshape(x_image, [-1, 28, 28, 1])
            # tf.image_summary(tag, tensor, max_images=3, collections=None, name=None)
            tf.image_summary('reconstruct', image_shaped_input_y, 10)

    def register_all_variables_and_grads(y):
        all_vars = tf.all_variables()
        grad_vars = opt.compute_gradients(y,all_vars) #[ (gradient,variable) ]
        for (dldw,v) in grad_vars:
            if dldw != None:
                prefix_name = 'derivative_'+v.name
                suffix_text = 'dJd'+v.name
                #mtf.put_summaries(var=tf.sqrt( tf.reduce_sum(tf.square(dldw)) ),prefix_name=prefix_name,suffix_text=suffix_text)
                mtf.put_summaries(var=tf.abs(dldw),prefix_name=prefix_name,suffix_text='_abs_'+suffix_text)
                tf.histogram_summary('hist'+prefix_name, dldw)

    register_all_variables_and_grads(y)
    ## TRAIN
    if phase_train is not None:
        #DO BN
        feed_dict_train = {x:X_train, y_:Y_train, phase_train: False}
        feed_dict_cv = {x:X_cv, y_:Y_cv, phase_train: False}
        feed_dict_test = {x:X_test, y_:Y_test, phase_train: False}
    else:
        #Don't do BN
        feed_dict_train = {x:X_train, y_:Y_train}
        feed_dict_cv = {x:X_cv, y_:Y_cv}
        feed_dict_test = {x:X_test, y_:Y_test}

    def get_batch_feed(X, Y, M, phase_train):
        mini_batch_indices = np.random.randint(M,size=M)
        Xminibatch =  X[mini_batch_indices,:] # ( M x D^(0) )
        Yminibatch = Y[mini_batch_indices,:] # ( M x D^(L) )
        if phase_train is not None:
            #DO BN
            feed_dict = {x: Xminibatch, y_: Yminibatch, phase_train: True}
        else:
            #Don't do BN
            feed_dict = {x: Xminibatch, y_: Yminibatch}
        return feed_dict

    def print_messages(*args):
        for i, msg in enumerate(args):
            print('>',msg)

    if use_tensorboard:
        if tf.gfile.Exists('/tmp/mdl_logs'):
          tf.gfile.DeleteRecursively('/tmp/mdl_logs')
        tf.gfile.MakeDirs('/tmp/mdl_logs')

    tf.add_check_numerics_ops()

    # Add ops to save and restore all the variables.
    if mdl_save:
        saver = tf.train.Saver(max_to_keep=max_to_keep)
    start_time = time.time()
    file_for_error = './ray_error_file.txt'
    #with open(path+errors_pretty, 'w+') as f_err_msgs:
    with open(file_for_error, 'w+') as f_err_msgs:
        with tf.Session() as sess:
            ## prepare writers and fetches
            if use_tensorboard:
                merged = tf.merge_all_summaries()
                #writer = tf.train.SummaryWriter(tensorboard_data_dump, sess.graph)
                train_writer = tf.train.SummaryWriter(tensorboard_data_dump_train, sess.graph)
                test_writer = tf.train.SummaryWriter(tensorboard_data_dump_test, sess.graph)
                ##
                fetches_train = [merged, l2_loss]
                fetches_cv = l2_loss
                fetches_test = [merged, l2_loss]
            else:
                fetches_train = l2_loss
                fetches_cv = l2_loss
                fetches_test = l2_loss

            sess.run( tf.initialize_all_variables() )
            for i in range(steps):
                ## Create fake data for y = W.x + b where W = 2, b = 0
                #(batch_xs, batch_ys) = get_batch_feed(X_train, Y_train, M, phase_train)
                feed_dict_batch = get_batch_feed(X_train, Y_train, M, phase_train)
                ## Train
                if i%report_error_freq == 0:
                    if use_tensorboard:
                        (summary_str_train,train_error) = sess.run(fetches=fetches_train, feed_dict=feed_dict_train)
                        cv_error = sess.run(fetches=fetches_cv, feed_dict=feed_dict_cv)
                        (summary_str_test,test_error) = sess.run(fetches=fetches_test, feed_dict=feed_dict_test)

                        train_writer.add_summary(summary_str_train, i)
                        test_writer.add_summary(summary_str_test, i)
                    else:
                        train_error = sess.run(fetches=fetches_train, feed_dict=feed_dict_train)
                        cv_error = sess.run(fetches=fetches_cv, feed_dict=feed_dict_cv)
                        test_error = sess.run(fetches=fetches_test, feed_dict=feed_dict_test)

                    loss_msg = "Mdl*%s%s*-units%s, task: %s, step %d/%d, train err %g, cv err: %g test err %g"%(model,nb_hidden_layers,dims,task_name,i,steps,train_error,cv_error,test_error)
                    mdl_info_msg = "Opt:%s, BN %s, BN_trainable: %s After%d/%d iteration,Init: %s" % (optimization_alg,bn,trainable_bn,i,steps,init_type)
                    errors_to_beat = 'BEAT: hbf1_error: %s RBF error: %s PCA error: %s '%(hbf1_error, rbf_error,pca_error)
                    print_messages(loss_msg, mdl_info_msg, errors_to_beat)
                    #sys.stdout.flush()
                    loss_msg+="\n"
                    mdl_info_msg+="\n"
                    errors_to_beat+="\n"

                    print( 'S: ', inits_S)
                    # store results
                    #print type(train_error)
                    results['train_errors'].append( float(train_error) )
                    #print type(cv_error)
                    results['cv_errors'].append( float(cv_error) )
                    #print type(test_error)
                    results['test_errors'].append( float(test_error) )
                    # write errors to pretty print
                    f_err_msgs.write(loss_msg)
                    f_err_msgs.write(mdl_info_msg)
                    # save mdl
                    if mdl_save:
                        save_path = saver.save(sess, path+mdl_dir+'/model.ckpt',global_step=i)
                if use_tensorboard:
                    sess.run(fetches=[merged,train_step], feed_dict=feed_dict_batch) #sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
                else:
                    sess.run(fetches=train_step, feed_dict=feed_dict_batch) #sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

    git_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
    mtf.load_results_dic(results,git_hash=git_hash,dims=dims,mu=mu,std=std,init_constant=init_constant,b_init=b_init,S_init=S_init,\
        init_type=init_type,model=model,bn=bn,path=path,\
        tensorboard_data_dump_test=tensorboard_data_dump_test,tensorboard_data_dump_train=tensorboard_data_dump_train,\
        report_error_freq=report_error_freq,steps=steps,M=M,optimization_alg=optimization_alg,\
        starter_learning_rate=starter_learning_rate,decay_rate=decay_rate,staircase=staircase)

    ##
    results['job_name'] = job_name
    results['slurm_jobid'] = slurm_jobid
    results['slurm_array_task_id'] = slurm_array_task_id
    #results['tf_rand_seed'] = tf_rand_seed
    results['date'] = date
    results['bn'] = bn
    results['trainable_bn'] = trainable_bn

    seconds = (time.time() - start_time)
    minutes = seconds/ 60
    hours = minutes/ 60
    print("--- %s seconds ---" % seconds )
    print("--- %s minutes ---" % minutes )
    print("--- %s hours ---" % hours )
    ## dump results to JSON
    results['seconds'] = seconds
    results['minutes'] = minutes
    results['hours'] = hours
    #print results
    with open(path+json_file, 'w+') as f_json:
        json.dump(results,f_json,sort_keys=True, indent=2, separators=(',', ': '))
    print( '\a') #makes beep
def main(argv):
    (_, task_name, result_loc, nb_inits, nb_rbf_shapes, units)  = argv
    nb_inits, nb_rbf_shapes = int(nb_inits), int(nb_rbf_shapes)
    units_list =  units.split(',')
    nb_centers_list = [ int(a) for a in units_list ]

    print 'task_name ', task_name
    #pdb.set_trace()
    #(X_train, Y_train, X_cv, Y_cv, X_test, Y_test) = mtf.get_data_from_file(file_name='./f_1d_cos_no_noise_data.npz')
    #task_name = 'qianli_func'
    #task_name = 'hrushikesh'
    #task_name = 'f_2D_task2'
    (X_train, Y_train, X_cv, Y_cv, X_test, Y_test) = mtf.get_data(task_name)
    data = (X_train, Y_train, X_cv, Y_cv, X_test, Y_test)

    replace = False # with or without replacement
    #nb_rbf_shapes = 2 #<--
    stddevs = np.linspace(start=0.1, stop=3, num=nb_rbf_shapes)
    print 'number of RBF stddev tried:', len(stddevs)
    print 'start stddevs: ', stddevs
    #nb_centers_list = [3, 6, 9, 12, 16, 24, 30, 39, 48, 55]
    #nb_centers_list = [2, 4, 6, 8, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
    #nb_centers_list = [2, 4]
    #nb_centers_list = [4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096]
    #centers_to_reconstruct_index = [1, 3, 5, 7, 9]
    #centers_to_reconstruct_index = [1, 4, 7] # corresponds to centers 4, 12, 18
    #colours = ['g','r','c','m','y']

    #nb_inits = 2 #<--

    mdl_best_params, mdl_mean_params, errors_best, errors_stats, reconstructions_best, reconstructions_mean = evalaute_models(data, stddevs, nb_centers_list, replace=False, nb_inits=nb_inits)
    (C_hat_bests, centers_bests, best_stddevs) = mdl_best_params
    print 'best_stddevs: ',best_stddevs
    (train_errors_bests, _, test_errors_bests) = errors_best
    (train_errors_means,_,test_errors_means, train_error_stds,_,test_error_stds) = errors_stats
    (Y_pred_train_best, _, Y_pred_test_best) = reconstructions_best

    # plot errors
    print 'plotting errors'
    plt.figure(3)
    plot_errors(nb_centers_list, train_errors_bests,label='train_Errors_best', markersize=3,colour='b')
    plot_errors(nb_centers_list, test_errors_bests,label='test_Errors_best', markersize=3,colour='r')
    plot_errors_and_bars(nb_centers_list, train_errors_means, train_error_stds, label='train_Errors_average', markersize=3,colour='b')
    plot_errors_and_bars(nb_centers_list, test_errors_means, test_error_stds, label='test_Errors_average', markersize=3,colour='r')

    # get things to reconstruct
    if task_name == 'qianli_func':
        plot_one_func(fig_num=1, X_original=X_train,Y_original=Y_train, markersize=3, title_name='Reconstruction')
        print 'plotting reconstruct for task %s'%task_name
        nb_centers_reconstruct = [nb_centers_list[i] for i in centers_to_reconstruct_index]
        rbf_predictions_reconstruct_train = [Y_pred_train_best[i] for i in centers_to_reconstruct_index]
        rbf_predictions_reconstruct_test = [Y_pred_test_best[i] for i in centers_to_reconstruct_index]
        colours = colours[0:len(centers_to_reconstruct_index)]
        #colours = [colours[i] for i in centers_to_reconstruct_index]
        # plot reconstructions
        print 'plotting reconstructions'
        plot_reconstruction(fig_num=1, X_original=X_train,Y_original=Y_train, nb_centers=nb_centers_reconstruct, \
        rbf_predictions=rbf_predictions_reconstruct_train, colours=colours, markersize=3,title_name='Reconstruction_train')
        plot_reconstruction(fig_num=2, X_original=X_test,Y_original=Y_test, nb_centers=nb_centers_reconstruct, \
        rbf_predictions=rbf_predictions_reconstruct_test, colours=colours, markersize=3,title_name='Reconstruction_test')
    elif task_name == 'f_2D_task2':
        print 'HERE'
        pass
        print 'plotting reconstruct for task %s'%task_name
    # plot show
    plt.legend()
    plt.show()

    #result_loc = './tmp_test_experiments/tmp_krls_workspace'
    mtf.save_workspace(filename=result_loc,names_of_spaces_to_save=dir(),dict_of_values_to_save=locals())
    print '\a' #makes beep
import numpy as np
import json
#from sklearn.cross_validation import train_test_split

from tensorflow.examples.tutorials.mnist import input_data

import my_tf_pkg as mtf
import namespaces as ns

import pdb

#task_name = 'task_MNIST_flat_auto_encoder'
#task_name = 'task_f_4D_conv_2nd'
arg = ns.Namespace()
arg.data_dirpath = './data/'
arg.data_file_name = 'f_8D_conv_cos_poly1_poly1'
X_train, Y_train, X_cv, Y_cv, X_test, Y_test = mtf.get_data(arg)

print('max: ', np.max(Y_train))
print('min: ', np.min(Y_train))

print('mean: ',np.mean(Y_train))
print('std: ',np.std(Y_train))
Beispiel #11
0
def main(argv):
    (_, task_name, prefix, experiment_name, nb_inits, nb_rbf_shapes,
     units) = argv
    nb_inits, nb_rbf_shapes = int(nb_inits), int(nb_rbf_shapes)
    units_list = units.split(',')
    nb_centers_list = [int(a) for a in units_list]
    date = datetime.date.today().strftime("%B %d").replace(" ", "_")
    print 'task_name ', task_name

    (X_train, Y_train, X_cv, Y_cv, X_test, Y_test) = mtf.get_data(task_name)
    data = (X_train, Y_train, X_cv, Y_cv, X_test, Y_test)
    print X_train.shape

    replace = False  # with or without replacement
    stddevs = np.linspace(start=0.01, stop=10, num=nb_rbf_shapes)
    print 'number of RBF stddev tried:', len(stddevs)
    print 'start stddevs: ', stddevs

    mdl_best_params, mdl_mean_params, errors_best, errors_stats, reconstructions_best, reconstructions_mean = mtf.evalaute_models(
        data, stddevs, nb_centers_list, replace=False, nb_inits=nb_inits)
    (C_hat_bests, centers_bests, best_stddevs) = mdl_best_params
    print 'best_stddevs: ', best_stddevs
    (train_errors_bests, cv_errors_bests, test_errors_bests) = errors_best
    (train_errors_means, cv_errors_means, test_errors_means, train_error_stds,
     cv_error_stds, test_error_stds) = errors_stats
    (train_errors_bests, cv_errors_bests,
     test_errors_bests) = [list(err_list) for err_list in errors_best]
    (train_errors_means, cv_errors_means, test_errors_means, train_error_stds,
     cv_error_stds,
     test_error_stds) = [list(err_list) for err_list in errors_stats]
    #(Y_pred_train_best, _, Y_pred_test_best) = reconstructions_best
    print 'train_errors_means: ', train_errors_means

    dir_path = './%s_experiments/%s_%s' % (prefix, date, experiment_name)
    mtf.make_and_check_dir(dir_path)
    # save rbf
    rbf_params_loc = dir_path + '/rbf_params_%s_%s' % (date, experiment_name)
    np.savez(rbf_params_loc,
             C_hat_bests=C_hat_bests,
             centers_bests=centers_bests,
             best_stddevs=best_stddevs,
             units_list=np.array(nb_centers_list))
    # save errors
    result_loc = dir_path + '/results_json_%s_%s' % (date, experiment_name)
    results = {'nb_centers_list': nb_centers_list}
    mtf.load_results_dic(results,
                         train_errors_bests=train_errors_bests,
                         cv_errors_bests=cv_errors_bests,
                         test_errors_bests=test_errors_bests,
                         train_errors_means=train_errors_means,
                         cv_errors_means=cv_errors_means,
                         test_errors_means=test_errors_means,
                         train_error_stds=train_error_stds,
                         cv_error_stds=cv_error_stds,
                         test_error_stds=test_error_stds)
    with open(result_loc, 'w+') as f_json:
        json.dump(results,
                  f_json,
                  sort_keys=True,
                  indent=2,
                  separators=(',', ': '))
Beispiel #12
0
def main_hp(arg):
    '''
    executes the current hp (hyper param) slurm array task. Usually means that
    it has to either continue training a model that wasn't finished training
    or start one from scratch.

    note:
    '''
    #arg.rand_x = int.from_bytes(os.urandom(4), sys.byteorder)
    if arg.rand_x != None:
        np.random.seed(arg.rand_x)
        random.seed(arg.rand_x)
        tf.set_random_seed( arg.rand_x )
    # force to flushing to output as default
    print = print_func_flush_false
    if arg.slurm_array_task_id == '1':
        #arg.display_training = True
        print = print_func_flush_true
    if arg.flush:
        print = print_func_flush_true
    print(print)
    print('>>> arg.restore = ', arg.restore)
    arg.date = datetime.date.today().strftime("%B %d").replace (" ", "_")
    # tensorboard
    if arg.use_tb:
        if tf.gfile.Exists(arg.tb_data_dump):
          tf.gfile.DeleteRecursively(arg.tb_data_dump)
        tf.gfile.MakeDirs(arg.tb_data_dump)
    #
    current_job_mdl_folder = 'job_mdl_folder_%s/'%arg.job_name
    arg.path_to_hp = arg.get_path_root(arg)+current_job_mdl_folder
    arg.path_to_ckpt = arg.get_path_root_ckpts(arg)+current_job_mdl_folder
    arg.hp_folder_for_ckpt = 'hp_stid_%s/'%str(arg.slurm_array_task_id)
    ### get folder structure for experiment
    mtf.make_and_check_dir(path=arg.get_path_root(arg)+current_job_mdl_folder)
    mtf.make_and_check_dir(path=arg.get_path_root_ckpts(arg)+current_job_mdl_folder)
    #
    #errors_pretty = '/errors_file_%s_slurm_sj%s.txt'%(arg.date,arg.slurm_array_task_id)
    arg.json_hp_filename = 'json_hp_stid%s'%(arg.slurm_array_task_id)
    arg.csv_errors_filename = 'csv_errors_slurm_array_id%s'%(arg.slurm_array_task_id)
    ##
    # if arg.restore: TODO
    #     arg = restore_hps(arg)
    #     arg.float_type = tf.float32
    ## get data set
    X_train, Y_train, X_cv, Y_cv, X_test, Y_test = mtf.get_data(arg,arg.N_frac)
    print( '(N_train,D) = (%d,%d) \n (N_test,D_out) = (%d,%d) ' % (arg.N_train,arg.D, arg.N_test,arg.D_out) )
    ## if (preprocess_data, then preprocess) else (do nothing to the data)
    if arg.type_preprocess_data:
        X_train, Y_train, X_cv, Y_cv, X_test, Y_test = preprocess_data(arg, X_train, Y_train, X_cv, Y_cv, X_test, Y_test)
    #### build graph
    graph = tf.Graph()
    with graph.as_default():
        ### get mdl
        x = tf.placeholder(arg.float_type, arg.get_x_shape(arg), name='x-input')
        y_ = tf.placeholder(arg.float_type, arg.get_y_shape(arg) )
        phase_train = tf.placeholder(tf.bool, name='phase_train') # phase_train = tf.placeholder(tf.bool, name='phase_train') if arg.bn else  None
        arg.phase_train = phase_train
        y = get_mdl(arg,x)
        ### get loss and accuracy
        loss, accuracy = get_accuracy_loss(arg,x,y,y_)
        ### get optimizer variables
        opt = get_optimizer(arg)
        train_step = opt.minimize(loss, global_step=arg.global_step)
        # step for optimizer (useful for ckpts)
        step, nb_iterations = tf.Variable(0, name='step'), tf.Variable(arg.nb_steps, name='nb_iterations')
        batch_size = tf.Variable(arg.batch_size, name='batch_size')
        # saves everything that was saved in the session
        saver = tf.train.Saver()
    #### run session
    arg.save_ckpt_freq = arg.get_save_ckpt_freq(arg)
    start_time = time.time()
    #pdb.set_trace()
    with tf.Session(graph=graph) as sess:
        with open(arg.path_to_hp+arg.csv_errors_filename,mode='a') as errors_csv_f: # a option: Opens a file for appending. The file pointer is at the end of the file if the file exists. That is, the file is in the append mode. If the file does not exist, it creates a new file for writing.
            #writer = csv.Writer(errors_csv_f)
            writer = csv.DictWriter(errors_csv_f,['train_error', 'cv_error', 'test_error','train_acc','cv_acc','test_acc'])
            # if (there is a restore ckpt mdl restore it) else (create a structure to save ckpt files)
            if arg.restore:
                arg.restore = False # after the model has been restored, we continue normal until all hp's are finished
                saver.restore(sess=sess, save_path=arg.save_path_to_ckpt2restore) # e.g. saver.restore(sess=sess, save_path='./tmp/my-model')
                arg = restore_hps(arg)
                print('arg ', arg)
                #print('arg.save_path_to_ckpt2restore: ',arg.save_path_to_ckpt2restore)
                print('restored model trained up to, STEP: ', step.eval())
                print('restored model, ACCURACY:', sess.run(fetches=accuracy, feed_dict={x: X_train, y_: Y_train, phase_train: False}))
            else: # NOT Restore
                # not restored, so its a virgin run from scratch for this hp
                deleteContent(pfile=errors_csv_f) # since its a virgin run we
                writer.writeheader()
                #
                save_hps(arg) # save current hyper params
                if arg.save_checkpoints or arg.save_last_mdl:
                    mtf.make_and_check_dir(path=arg.path_to_ckpt+arg.hp_folder_for_ckpt) # creates ./all_ckpts/exp_task_name/mdl_nn10/hp_stid_N
                sess.run(tf.global_variables_initializer())
            start_iteration = step.eval() # last iteration trained is the first iteration for this model
            batch_size_eval = batch_size.eval()
            # Setup TensorBoard (or not)
            if arg.use_tb:
                merged = tf.summary.merge_all()
                train_writer, cv_writer, test_writer = tf.summary.FileWriter(arg.tb_data_dump+'/train',graph), tf.summary.FileWriter(arg.tb_data_dump+'/cv',graph), tf.summary.FileWriter(arg.tb_data_dump+'/test',graph)
                fetches_loss, fetches_acc = maps.NamedDict({'loss':loss, 'merged':merged}), maps.NamedDict({'acc':accuracy, 'merged':merged})
            else:
                fetches_loss, fetches_acc = maps.NamedDict({'loss':loss}), maps.NamedDict({'acc':accuracy})
            # train
            train_accs = []
            train_errs = []
            #Y_train_c, Y_cv_c, Y_test_c = mtf.radamacher_to_one_hot(Y_train), mtf.radamacher_to_one_hot(Y_cv), mtf.radamacher_to_one_hot(Y_test)
            for i in range(start_iteration,nb_iterations.eval()):
                batch_xs, batch_ys = get_batch_feed(X_train, Y_train, batch_size.eval())
                sess.run(fetches=train_step, feed_dict={x: batch_xs, y_: batch_ys})
                #pdb.set_trace()
                if i % arg.report_error_freq == 0:
                    sess.run(step.assign(i))
                    # train evaluate
                    #pdb.set_trace()
                    fetched_loss_train = sess.run(fetches=fetches_loss, feed_dict={x: X_train, y_: Y_train}) # fltr = fetches_loss_train
                    fetched_loss_train.acc = -1
                    #pdb.set_trace()
                    if arg.classification:
                        fetched_acc_train = sess.run(fetches=fetches_acc, feed_dict={x: X_train, y_: Y_train}) # fatr = fetches_acc_train
                    # cv, test evaluate
                    if arg.collect_generalization:
                        fetched_loss_cv = sess.run(fetches=fetches_loss, feed_dict={x: X_cv, y_: Y_cv})
                        fetched_loss_test = sess.run(fetches=fetches_loss, feed_dict={x: X_test, y_: Y_test})
                        fetched_loss_cv.acc, fetched_loss_test= -1, -1
                        if arg.classification:
                            cv_acc = sess.run(fetches=fetches_acc, feed_dict={x: X_cv, y_: Y_cv})
                            test_acc = sess.run(fetches=fetches_acc, feed_dict={x: X_test, y_: Y_test})
                    # set variables
                    train_error, train_acc = fetched_loss_train.loss, fetched_acc_train.acc
                    if arg.collect_generalization:
                        cv_error, test_error = fetched_loss_cv.loss, fetched_loss_test.loss
                        cv_acc, test_acc = fetched_acc_cv.acc, fetched_acc_test.acc
                    # display training stuff
                    if arg.display_training:
                        #y_pred_val = sess.run(fetches=tf.nn.softmax(y), feed_dict={x: X_train, y_: Y_train})
                        #print('y_val: %s \n Y_train: %s '%(y_pred_val, Y_train))
                        print('step %d, train error: %s | train acc %s | batch_size(step.eval(),arg.batch_size): %s,%s log_learning_rate: %s | mdl %s '%(i,train_error,train_acc,batch_size_eval,arg.batch_size,arg.log_learning_rate,arg.mdl) )
                    # write files
                    if not arg.collect_generalization:
                        dict_errs = {'train_error':train_error,'train_acc':train_acc}
                    else:
                        dict_errs = {'train_error':train_error,'cv_error':cv_error,'test_error':test_error,'train_acc':train_acc,'cv_error':cv_error,'test_error':test_error}
                    writer.writerow(dict_errs)
                    # append quick checks
                    train_accs.append(train_acc)
                    train_errs.append(train_error)
                    # write tensorboard stats
                    if arg.use_tb:
                        train_writer.add_summary(fetched_loss_train.merged, i), train_writer.add_summary(fetched_acc_train.merged, i)
                        if arg.collect_generalization:
                            cv_writer.add_summary(fetched_loss_cv.merged, i), cv_writer.add_summary(fetched_acc_cv.merged, i)
                            test_writer.add_summary(fetched_loss_test.merged, i), test_writer.add_summary(fetched_acc_test.merged, i)
                    # save checkpoint
                    if arg.save_checkpoints:
                        if i % arg.save_ckpt_freq == 0:
                            #print('>>>>>>>>>>CKPT',i,arg.save_ckpt_freq)
                            saver.save(sess=sess,save_path=arg.path_to_ckpt+arg.hp_folder_for_ckpt+arg.prefix_ckpt)
            # save last model
            if arg.save_last_mdl:
                saver.save(sess=sess,save_path=arg.path_to_ckpt+arg.hp_folder_for_ckpt+arg.prefix_ckpt)
            # evaluate
            #print('Final Test Acc/error: ', sess.run(fetches=accuracy, feed_dict={x: X_test, y_: Y_test}))
            print('Final -> | train error %s | train: %s'% (min(train_errs), max(train_accs)) )
            seconds = (time.time() - start_time)
            minutes = seconds/ 60
            hours = minutes/ 60
            print("--- %s seconds ---" % seconds )
            print("--- %s minutes ---" % minutes )
            print("--- %s hours ---" % hours )