예제 #1
0
파일: bart.py 프로젝트: mazphilip/pgbart
def main():
    settings = process_command_line()
    print('Current settings:')
    pp.pprint(vars(settings))

    # Resetting random seed
    np.random.seed(settings.init_id * 1000)
    random.seed(settings.init_id * 1000)

    # load data
    print('Loading data ...')
    data = load_data(settings)
    print('Loading data ... completed')
    if settings.center_y:
        print('center_y = True; centering the y variables at mean(data[y_train])')
        center_labels(data, settings)
    backup_target(data, settings)
   
    #pre-compute & initialize
    time_start = time.clock()
    param, cache, cache_tmp = precompute(data, settings)
    bart = BART(data, param, settings, cache, cache_tmp)
    time_initialization = time.clock() - time_start

    # initialize stuff for results
    mcmc_stats = np.zeros((settings.m_bart, settings.n_iterations, 10))
    mcmc_stats_bart = np.zeros((settings.n_iterations, 10))
    mcmc_stats_bart_desc = ['loglik', 'logprior', 'logprob', \
                            'mean depth', 'mean num_leaves', 'mean num_nonleaves', 'mean change', \
                            'mse_train', 'lambda_bart', 'time_itr']
    mcmc_counts = None
    mcmc_tree_predictions = init_performance_storage(data, settings)
    n_burn_in = 0       
    # NOTE: predictions are stored without discarding burn-in
    assert n_burn_in == 0
    
    time_init = time.clock()
    time_init_run_avg = time.clock()
    itr_run_avg = 0
    change = True
    tree_order = range(settings.m_bart)

    print('initial settings:')
    print('lambda_bart value = %.3f' % param.lambda_bart)
    loglik_train, mse_train = bart.compute_train_loglik(data, settings, param)
    print('mse train = %.3f, loglik_train = %.3f' % (mse_train, loglik_train))
    
    for itr in range(settings.n_iterations):
        time_init_current = time.clock()
        if settings.verbose >= 1:
            print('\n%s BART iteration = %7d %s' % ('*'*30, itr, '*'*30))

        logprior = 0.

        if settings.sample_y == 1 and settings.mcmc_type != 'prior':    # Successive conditional simulator
            bart.sample_labels(data, settings, param)

        # sampling lambda_bart
        bart.sample_lambda_bart(param, data, settings)
        time_sample_lambda = time.clock() - time_init_current
        logprior += bart.lambda_logprior

        random.shuffle(tree_order)
        for i_t in tree_order:
            if settings.debug == 1:
                print('\ntree_id = %3d' % i_t)
            time_init_current_tree = time.clock()
            # update data['y_train']
            bart.update_residual(i_t, data)
            update_cache_tmp(cache_tmp, data, param, settings)

            # MCMC for i_t'th tree
            bart.trees[i_t].update_loglik_node_all(data, param, cache, settings)
            (bart.trees[i_t], change) = run_mcmc_single_tree(bart.trees[i_t], settings, data, param, \
                                        cache, change, mcmc_counts, cache_tmp, bart.pmcmc_objects[i_t])

            # update parameters
            sample_param(bart.trees[i_t], settings, param)
            logprior += bart.trees[i_t].pred_val_logprior
           
            # update pred_val
            bart.update_pred_val(i_t, data, param, settings)

            # update stats
            # 'change' indicates whether MCMC move was accepted 
            bart.trees[i_t].update_depth()
            mcmc_stats[i_t, itr, [3,6,7,8,9]] = np.array([bart.trees[i_t].depth, \
                    len(bart.trees[i_t].leaf_nodes), len(bart.trees[i_t].non_leaf_nodes), \
                    change, time.clock() - time_init_current_tree])
            # NOTE: this logprior computation does not affect timing
            if settings.mcmc_type == 'cgm' or settings.mcmc_type == 'growprune':
                mcmc_stats[i_t, itr, 1] = bart.trees[i_t].compute_logprior()
            else:
                mcmc_stats[i_t, itr, 1] = -np.inf
                #NOTE: compute_logprior could be incorrect for PG 
                # (prior over feature_ids is 1/D rather than 1/numValidDimensions)
        
        if settings.sample_y == 1 and settings.mcmc_type == 'prior':    # Marginal conditional simulator
            bart.sample_labels(data, settings, param)

        if settings.mcmc_type == 'cgm' or settings.mcmc_type == 'growprune':
            logprior += float(np.sum(mcmc_stats[:, itr, 1]))    
        else:
            logprior = -np.inf
        loglik_train, mse_train = bart.compute_train_loglik(data, settings, param)
        logprob_bart = logprior + loglik_train
#        mcmc_stats_bart_desc = 0: loglik, 1: logprior, 2: logprob, 
#                    3: mean depth, 4: mean num_leaves, 5: mean num_nonleaves, 6: mean change, 
#                    7: mse_train, 8: lambda_bart, 9: time_itr
        mcmc_stats_bart[itr, :3] = [loglik_train, logprior, logprob_bart]
        mcmc_stats_bart[itr, 3:7] = np.mean(mcmc_stats[:, itr, [3,6,7,8]], 0)     # depth, #leaf, #nonleaf, change
        mcmc_stats_bart[itr, -3:-1] = [mse_train, param.lambda_bart]
        mcmc_stats_bart[itr, -1] = np.sum(mcmc_stats[:, itr, -1]) + time_sample_lambda  # total time per iteration
        if itr == 0:
            mcmc_stats_bart[itr, -1] += time_initialization
        if settings.verbose >=2 :
            print('fraction of trees in which MCMC move was accepted = %.3f' % mcmc_stats_bart[itr, 6])
        if (settings.save == 1):
            for tree in bart.trees:
                tree.gen_rules_tree()
            pred_tmp = {'train': bart.predict_train(data, param, settings), \
                        'test': bart.predict(data['x_test'], data['y_test_orig'], param, settings)}
            for k_data in settings.perf_dataset_keys:
                for k_store in settings.perf_store_keys:
                    mcmc_tree_predictions[k_data]['accum'][k_store] += pred_tmp[k_data][k_store]
            if itr == 0 and settings.verbose >= 1:
                print('Cumulative: itr, itr_run_avg, [mse train, logprob_train, mse test, ' \
                    'logprob_test, time_mcmc, time_mcmc_prediction], time_mcmc_cumulative')
                print('itr, [mse train, logprob_train, mse test, ' \
                    'logprob_test, time_mcmc, time_mcmc+time_prediction]')
            if settings.store_every_iteration == 1:
                store_every_iteration(mcmc_tree_predictions, data, settings, param, itr, \
                                        pred_tmp, mcmc_stats_bart[itr, -1], time_init_current)
            if (itr > 0) and (itr % settings.n_run_avg == (settings.n_run_avg - 1)):
                metrics = {}
                for k_data in settings.perf_dataset_keys:
                    k_data_tmp, k_data_n = get_k_data_names(settings, k_data)
                    for k_store in settings.perf_store_keys:
                        mcmc_tree_predictions[k_data][k_store][itr_run_avg] = \
                            mcmc_tree_predictions[k_data]['accum'][k_store] / (itr + 1)
                    metrics[k_data] = compute_metrics_regression(data[k_data_tmp], \
                            mcmc_tree_predictions[k_data]['pred_mean'][itr_run_avg], \
                            mcmc_tree_predictions[k_data]['pred_prob'][itr_run_avg])
                itr_range = range(itr_run_avg * settings.n_run_avg, (itr_run_avg + 1) * settings.n_run_avg)
                if settings.debug == 1:
                    print('itr_range = %s' % itr_range)
                time_mcmc_train = np.sum(mcmc_stats_bart[itr_range, -1])
                mcmc_tree_predictions['run_avg_stats'][:, itr_run_avg] = \
                        [ metrics['train']['mse'], metrics['train']['log_prob'], \
                        metrics['test']['mse'], metrics['test']['log_prob'], \
                        time_mcmc_train, time.clock() - time_init_run_avg ]
                if settings.verbose >= 1:
                    print('Cumulative: %7d, %7d, %s, %.2f'.format(itr, itr_run_avg, mcmc_tree_predictions['run_avg_stats'][:, itr_run_avg].T, \
                        np.sum(mcmc_tree_predictions['run_avg_stats'][-2, :itr_run_avg+1])))
                itr_run_avg += 1
                time_init_run_avg = time.clock()
    
    # print results
    print('\nTotal time (seconds) = %f' % (time.clock() - time_init))
    if settings.verbose >=2:
        print('mcmc_stats_bart[:, 3:] (not cumulative) = ')
        print('mean depth, mean num_leaves, mean num_nonleaves, ' + \
                'mean change, mse_train, lambda_bart, time_itr')
        print(mcmc_stats_bart[:, 3:])
    if settings.verbose >=1:
        print('mean of mcmc_stats_bart (discarding first 50% of the chain)')
        itr_start = mcmc_stats_bart.shape[0] / 2
        for k, s in enumerate(mcmc_stats_bart_desc):
            print('%20s\t%.2f'.format(s, np.mean(mcmc_stats_bart[itr_start:, k])))

    if settings.save == 1:
        print('predictions averaged across all previous additive trees:')
        print('mse train, mean log_prob_train, mse test, mean log_prob_test')
        print(mcmc_tree_predictions['run_avg_stats'][:4,:].T)

    # Write results to disk
    if settings.save == 1:
        filename = get_filename_bart(settings)
        print('filename = ' + filename)
        results = {}
        results['mcmc_stats_bart'] = mcmc_stats_bart
        results['mcmc_stats_bart_desc'] = mcmc_stats_bart_desc
        if settings.store_all_stats:
            results['mcmc_stats'] = mcmc_stats
        results['settings'] = settings
        if settings.dataset[:8] == 'friedman' or settings.dataset[:3] == 'toy':
            results['data'] = data
        pickle.dump(results, open(filename, "wb"), protocol=pickle.HIGHEST_PROTOCOL)
        filename2 = filename[:-1] + 'tree_predictions.p'
        print('predictions stored in file: %s' % filename2)
        pickle.dump(mcmc_tree_predictions, open(filename2, "wb"), protocol=pickle.HIGHEST_PROTOCOL)
예제 #2
0
파일: bart.py 프로젝트: Sandy4321/pgbart
def main():
    settings = process_command_line()
    print 'Current settings:'
    pp.pprint(vars(settings))

    # Resetting random seed
    np.random.seed(settings.init_id * 1000)
    random.seed(settings.init_id * 1000)

    # load data
    print 'Loading data ...'
    data = load_data(settings)
    print 'Loading data ... completed'
    if settings.center_y:
        print 'center_y = True; centering the y variables at mean(data[y_train])'
        center_labels(data, settings)
    backup_target(data, settings)
   
    #pre-compute & initialize
    time_start = time.clock()
    param, cache, cache_tmp = precompute(data, settings)
    bart = BART(data, param, settings, cache, cache_tmp)
    time_initialization = time.clock() - time_start

    # initialize stuff for results
    mcmc_stats = np.zeros((settings.m_bart, settings.n_iterations, 10))
    mcmc_stats_bart = np.zeros((settings.n_iterations, 10))
    mcmc_stats_bart_desc = ['loglik', 'logprior', 'logprob', \
                            'mean depth', 'mean num_leaves', 'mean num_nonleaves', 'mean change', \
                            'mse_train', 'lambda_bart', 'time_itr']
    mcmc_counts = None
    mcmc_tree_predictions = init_performance_storage(data, settings)
    n_burn_in = 0       
    # NOTE: predictions are stored without discarding burn-in
    assert n_burn_in == 0
    
    time_init = time.clock()
    time_init_run_avg = time.clock()
    itr_run_avg = 0
    change = True
    tree_order = range(settings.m_bart)

    print 'initial settings:'
    print 'lambda_bart value = %.3f' % param.lambda_bart
    loglik_train, mse_train = bart.compute_train_loglik(data, settings, param)
    print 'mse train = %.3f, loglik_train = %.3f' % (mse_train, loglik_train)
    
    for itr in range(settings.n_iterations):
        time_init_current = time.clock()
        if settings.verbose >= 1:
            print '\n%s BART iteration = %7d %s' % ('*'*30, itr, '*'*30)

        logprior = 0.

        if settings.sample_y == 1 and settings.mcmc_type != 'prior':    # Successive conditional simulator
            bart.sample_labels(data, settings, param)

        # sampling lambda_bart
        bart.sample_lambda_bart(param, data, settings)
        time_sample_lambda = time.clock() - time_init_current
        logprior += bart.lambda_logprior

        random.shuffle(tree_order)
        for i_t in tree_order:
            if settings.debug == 1:
                print '\ntree_id = %3d' % i_t
            time_init_current_tree = time.clock()
            # update data['y_train']
            bart.update_residual(i_t, data)
            update_cache_tmp(cache_tmp, data, param, settings)

            # MCMC for i_t'th tree
            bart.trees[i_t].update_loglik_node_all(data, param, cache, settings)
            (bart.trees[i_t], change) = run_mcmc_single_tree(bart.trees[i_t], settings, data, param, \
                                        cache, change, mcmc_counts, cache_tmp, bart.pmcmc_objects[i_t])

            # update parameters
            sample_param(bart.trees[i_t], settings, param)
            logprior += bart.trees[i_t].pred_val_logprior
           
            # update pred_val
            bart.update_pred_val(i_t, data, param, settings)

            # update stats
            # 'change' indicates whether MCMC move was accepted 
            bart.trees[i_t].update_depth()
            mcmc_stats[i_t, itr, [3,6,7,8,9]] = np.array([bart.trees[i_t].depth, \
                    len(bart.trees[i_t].leaf_nodes), len(bart.trees[i_t].non_leaf_nodes), \
                    change, time.clock() - time_init_current_tree])
            # NOTE: this logprior computation does not affect timing
            if settings.mcmc_type == 'cgm' or settings.mcmc_type == 'growprune':
                mcmc_stats[i_t, itr, 1] = bart.trees[i_t].compute_logprior()
            else:
                mcmc_stats[i_t, itr, 1] = -np.inf
                #NOTE: compute_logprior could be incorrect for PG 
                # (prior over feature_ids is 1/D rather than 1/numValidDimensions)
        
        if settings.sample_y == 1 and settings.mcmc_type == 'prior':    # Marginal conditional simulator
            bart.sample_labels(data, settings, param)

        if settings.mcmc_type == 'cgm' or settings.mcmc_type == 'growprune':
            logprior += float(np.sum(mcmc_stats[:, itr, 1]))    
        else:
            logprior = -np.inf
        loglik_train, mse_train = bart.compute_train_loglik(data, settings, param)
        logprob_bart = logprior + loglik_train
#        mcmc_stats_bart_desc = 0: loglik, 1: logprior, 2: logprob, 
#                    3: mean depth, 4: mean num_leaves, 5: mean num_nonleaves, 6: mean change, 
#                    7: mse_train, 8: lambda_bart, 9: time_itr
        mcmc_stats_bart[itr, :3] = [loglik_train, logprior, logprob_bart]
        mcmc_stats_bart[itr, 3:7] = np.mean(mcmc_stats[:, itr, [3,6,7,8]], 0)     # depth, #leaf, #nonleaf, change
        mcmc_stats_bart[itr, -3:-1] = [mse_train, param.lambda_bart]
        mcmc_stats_bart[itr, -1] = np.sum(mcmc_stats[:, itr, -1]) + time_sample_lambda  # total time per iteration
        if itr == 0:
            mcmc_stats_bart[itr, -1] += time_initialization
        if settings.verbose >=2 :
            print 'fraction of trees in which MCMC move was accepted = %.3f' % mcmc_stats_bart[itr, 6]
        if (settings.save == 1):
            for tree in bart.trees:
                tree.gen_rules_tree()
            pred_tmp = {'train': bart.predict_train(data, param, settings), \
                        'test': bart.predict(data['x_test'], data['y_test_orig'], param, settings)}
            for k_data in settings.perf_dataset_keys:
                for k_store in settings.perf_store_keys:
                    mcmc_tree_predictions[k_data]['accum'][k_store] += pred_tmp[k_data][k_store]
            if itr == 0 and settings.verbose >= 1:
                print 'Cumulative: itr, itr_run_avg, [mse train, logprob_train, mse test, ' \
                    'logprob_test, time_mcmc, time_mcmc_prediction], time_mcmc_cumulative'
                print 'itr, [mse train, logprob_train, mse test, ' \
                    'logprob_test, time_mcmc, time_mcmc+time_prediction]'
            if settings.store_every_iteration == 1:
                store_every_iteration(mcmc_tree_predictions, data, settings, param, itr, \
                                        pred_tmp, mcmc_stats_bart[itr, -1], time_init_current)
            if (itr > 0) and (itr % settings.n_run_avg == (settings.n_run_avg - 1)):
                metrics = {}
                for k_data in settings.perf_dataset_keys:
                    k_data_tmp, k_data_n = get_k_data_names(settings, k_data)
                    for k_store in settings.perf_store_keys:
                        mcmc_tree_predictions[k_data][k_store][itr_run_avg] = \
                            mcmc_tree_predictions[k_data]['accum'][k_store] / (itr + 1)
                    metrics[k_data] = compute_metrics_regression(data[k_data_tmp], \
                            mcmc_tree_predictions[k_data]['pred_mean'][itr_run_avg], \
                            mcmc_tree_predictions[k_data]['pred_prob'][itr_run_avg])
                itr_range = range(itr_run_avg * settings.n_run_avg, (itr_run_avg + 1) * settings.n_run_avg)
                if settings.debug == 1:
                    print 'itr_range = %s' % itr_range
                time_mcmc_train = np.sum(mcmc_stats_bart[itr_range, -1])
                mcmc_tree_predictions['run_avg_stats'][:, itr_run_avg] = \
                        [ metrics['train']['mse'], metrics['train']['log_prob'], \
                        metrics['test']['mse'], metrics['test']['log_prob'], \
                        time_mcmc_train, time.clock() - time_init_run_avg ]
                if settings.verbose >= 1:
                    print 'Cumulative: %7d, %7d, %s, %.2f' % \
                        (itr, itr_run_avg, mcmc_tree_predictions['run_avg_stats'][:, itr_run_avg].T, \
                        np.sum(mcmc_tree_predictions['run_avg_stats'][-2, :itr_run_avg+1]))
                itr_run_avg += 1
                time_init_run_avg = time.clock()
    
    # print results
    print '\nTotal time (seconds) = %f' % (time.clock() - time_init)
    if settings.verbose >=2:
        print 'mcmc_stats_bart[:, 3:] (not cumulative) = '
        print 'mean depth, mean num_leaves, mean num_nonleaves, ' + \
                'mean change, mse_train, lambda_bart, time_itr'
        print mcmc_stats_bart[:, 3:]
    if settings.verbose >=1:
        print 'mean of mcmc_stats_bart (discarding first 50% of the chain)'
        itr_start = mcmc_stats_bart.shape[0] / 2
        for k, s in enumerate(mcmc_stats_bart_desc):
            print '%20s\t%.2f' % (s, np.mean(mcmc_stats_bart[itr_start:, k]))

    if settings.save == 1:
        print 'predictions averaged across all previous additive trees:'
        print 'mse train, mean log_prob_train, mse test, mean log_prob_test'
        print mcmc_tree_predictions['run_avg_stats'][:4,:].T

    # Write results to disk
    if settings.save == 1:
        filename = get_filename_bart(settings)
        print 'filename = ' + filename
        results = {}
        results['mcmc_stats_bart'] = mcmc_stats_bart
        results['mcmc_stats_bart_desc'] = mcmc_stats_bart_desc
        if settings.store_all_stats:
            results['mcmc_stats'] = mcmc_stats
        results['settings'] = settings
        if settings.dataset[:8] == 'friedman' or settings.dataset[:3] == 'toy':
            results['data'] = data
        pickle.dump(results, open(filename, "wb"), protocol=pickle.HIGHEST_PROTOCOL)
        filename2 = filename[:-1] + 'tree_predictions.p'
        print 'predictions stored in file: %s' % filename2
        pickle.dump(mcmc_tree_predictions, open(filename2, "wb"), protocol=pickle.HIGHEST_PROTOCOL)
예제 #3
0
def main():
    settings = process_command_line()
    print('Current Settings:')
    ppt.pprint(vars(settings))

    np.random.seed(settings.init_id*1000)
    random.seed(settings.init_id*1000)

    print("Loading data....")
    data = load_data(settings)
    print("Dating loading completed")

    if settings.center_y:
        print('center_y = True; center the y variables at mean(data[y_train])')
        center_labels(data, settings)
    backup_target(data,settings)

    time_start = time.clock()
    param , cache , cache_tmp = precompute(data,settings)
    bart = BART(data,param,settings,cache,cache_tmp)
    time_initialization = time.clock() - time_start
    
    mcmc_stats = np.zeros((settings.m_bart,settings.n_iterations,10))
    mcmc_stats_bart = np.zeros((settings.n_iterations, 10))
    mcmc_stats_bart_desc = ['loglik','logprior','logprob','mean_depth','mean num_leaves','mean num_nonleaves','mean change','mse_train','lambda_bart','time_itr']
    mcmc_counts = None
    mcmc_tree_predictions = init_performance_storage(data, settings)

    burn_in_number = 0
    assert burn_in_number == 0

    init_time = time.clock()
    init_time_run_average = time.clock()
    iteration_run_average = 0
    change = True
    tree_order = range(settings.m_bart)

    print('Initial settings')
    print('lambda_bart value = %.3f' % param.lambda_bart)
    loglikelihood_training , mse_training = bart.compute_train_loglikelihood(data,settings,param)
    print('mse train =%.3f, loglik_train= %.3f' %(mse_training,loglikelihood_training))

    for iterator in range(settings.n_iterations):
        init_current_time = time.clock()
        if settings.verbose >= 1:
            print('\n%s BART ITERATION = %7d %s' % ('*'*30,iterator, '*'*30))

        logarithmic_prior = 0.

        if settings.sample_y == 1 and settings.mcmc_type != 'prior':
            bart.sample_labels(data,settings,param)

        bart.sample_lambda_bart(param,data,settings)
        time_sample_lambda = time.clock() -init_current_time
        logarithmic_prior += bart.lambda_logprior

        random.shuffle(tree_order)
        for ele in tree_order:
            if settings.debug == 1:
                print('\ntree_id = %3d' % ele)
            init_current_tree_time = time.clock()

            #set data['y_train'] to new value 
            bart.update_residual(ele,data)
            update_cache_tmp(cache_tmp, data , param , settings)
            
            #Get the MCMC for i_t'th tree
            bart.trees[ele].update_loglik_node_all(data,param, cache, settings)
            (bart.trees[ele],change) = run_mcmc_single_tree(bart.trees[ele],settings, data, param,cache,change,mcmc_counts,cache_tmp,bart.pmcmc_objects[ele])

            # update to new parameters
            sample_param(bart.trees[ele],settings,param)
            logarithmic_prior += bart.trees[ele].pred_val_logprior

            # update predicted value
            bart.update_predicted_value(ele,data,param,settings)

            bart.trees[ele].update_depth()
            mcmc_stats[ele,iterator,[3,6,7,8,9]] = np.array([bart.trees[ele].depth,len(bart.trees[ele].leaf_nodes),len(bart.trees[ele].non_leaf_nodes),change,time.clock()-init_current_tree_time])

            if settings.mcmc_type == 'cgm' or settings.mcmc_type == 'grow_prune':
                mcmc_stats[ele,iterator,1] = bart.trees[ele].compute_logprior()
            else:
                mcmc_stats[ele,iterator,1] = -np.inf


        if settings.sample_y == 1 and settings.mcmc_type == 'prior':
            bart.sample_labels(data,settings,param)

        if settings.mcmc_type == 'cgm' or settings.mcmc_type == 'growprune':
            logarithmic_prior +=float(np.sum(mcmc_stats[:,iterator,1]))
        else:
            logarithmic_prior = -np.inf
        loglikelihood_training,mse_training = bart.compute_train_loglikelihood(data,settings,param)
        bart_log_probability = logarithmic_prior + loglikelihood_training

        mcmc_stats_bart[iterator,:3]=[loglikelihood_training,logarithmic_prior,bart_log_probability]
        mcmc_stats_bart[iterator,3:7]= np.mean(mcmc_stats[:,iterator,[3,6,7,8]],0)
        mcmc_stats_bart[iterator,-3:-1]=[mse_training,param.lambda_bart]
        mcmc_stats_bart[iterator,-1] = np.sum(mcmc_stats[:,iterator,-1]) + time_sample_lambda

        if iterator == 0:
            mcmc_stats_bart[iterator,-1] += time_initialization
        if (settings.verbose >= 2):
            print('Fraction of trees where MCMC moves were accepted = %.3f' % mcmc_stats_bart[iterator,6])
        if (settings.save == 1):
            for tree_ele in bart.trees:
                tree_ele.gen_rules_tree()
            pred_tmp = {'train':bart.predict_training(data,param,settings),'test':bart.predict(data['x_test'],data['y_test_orig'],param,settings)}
            for data_of_keys in settings.perf_dataset_keys:
                for stored_keys in settings.perf_store_keys:
                    mcmc_predict_predictions[data_of_keys]['accum'][stored_keys] += pred_tmp[data_of_keys][stored_keys]
            if iterator == 0 and settings.verbose >= 1:
                print('Cumulative: itr, itr_run_avg, [mse train, logprob_train, mse test, ' 'logprob_test, time_mcmc, time_mcmc_prediction], time_mcmc_cumulative')
                print('itr, [mse train, logprob_train, mse test, ' 'logprob_test, time_mcmc, time_mcmc+time_prediction]')
            if settings.store_every_iteration == 1:
                store_every_iteration(mcmc_tree_predictions,data,settings,param,iterator,pred_tmp,mcmc_stats_bart[iterator,-1],init_current_time)
            if iterator > 0 and iterator % settings.n_run_avg == settings.n_run_avg - 1 :
                metrics={}
                for data_of_keys in settings.perf_dataset_keys:
                    k_temp,k_data_n = get_k_data_names(settings,data_of_keys)
                    for stored_keys in settings.perf_store_keys:
                        mcmc_tree_predictions[data_of_keys][stored_keys][iteration_run_average] = mcmc_tree_predictions[data_of_keys]['accum'][stored_keys]
                    metrics[data_of_keys] = compute_metrics_regression(data[k_temp],mcmc_tree_predictions[data_of_keys]['pred_mean'][iteration_run_average],mcmc_tree_predictions[data_of_keys]['pred_mean'][iteration_run_average])

                iterator_range = range(iteration_run_average*settings.n_run_avg,(iteration_run_average+1)*settings.n_run_avg)
                if settings.debug == 1:
                    print('Iteration range = %s' % iteration_range)
                mcmc_train_timing = np.sum(mcmc_stats_bart[iteration_range,-1])
                mcmc_tree_predictions['run_avg_tests'][:,iteration_run_average] = [metrics['train']['mse'],metrics['train']['log_prob'],metrics['test']['mse'],metrics['test']['log_prob'],mcmc_train_timing,time.clock()-init_time_run_average]
                if settings.verbose >= 1:
                    print('Cumulative: %7d, %7d, %s, %.2f' % (iterator,iteration_run_average,mcmc_tree_predictions['run_avg_stats'][:,iteration_run_average].T,np.sum(mcmc_tree_predictions['run_avg_stats'][-2,:iteration_run_average+1])))
                iteration_run_average += 1
                init_time_run_average = time.clock()


    print('\nTotal time in seconds =%f' % (time.clock()-init_time))
    if settings.verbose >= 2:
        print('mcmc_stats_bart[:,3:] (non cummulative) =')
        print('mean_depth,mean num_leaves ,mean num_nonleaves,mean change,mse_training,lambda_bart,time_iterations')
        print(mcmc_stats_bart[:,3:])
    if settings.verbose >=1:
        print('mean of mcmc_stats_bart discarding first 50% of the chain')
        iteration_start = mcmc_stats_bart.shape[0]/2
        for k_ele,s_ele in enumerate(mcmc_stats_bart_desc):
            print('%20s\t%.2f' %(s_ele,np.mean(mcmc_stats_bart[iteration_start:,k_ele])))
            
            
    if settings.save == 1:
        print('Averaged predictions across all previous additive trees:')
        print('mse training,mean log_prob_train, mse test,mean log_prob_test')
        print(mcmc_tree_predictions['run_avg_tests'][:4,:].T)

    if settings.save == 1:
        filename_to_use = get_filename_bart(settings)
        print('filename = '+filename_to_use)
        prediction_results={}
        prediction_results['mcmc_stats_bart'] = mcmc_stats_bart
        prediction_results['mcmc_stats_bart_desc'] = mcmc_stats_bart_desc
        if settings.store_all_stats:
            prediction_results['mcmc_stats'] = mcmc_stats
        prediction_results['settings'] = settings
        if settings.dataset[:8] == 'friedman' or settings.dataset[:3] == 'toy':
            results['data'] = data
        pickle.dump(prediction_results,open(filename_to_use,"wb"),protocol=pickle.HIGHEST_PROTOCOL)
        second_filename_to_use = filename_to_use[:-1]+ 'tree_predictions.p'
        print('predictions stored in file: %s' % second_filename_to_use)
        pickle.dump(mcmc_tree_predictions,open(second_filename_to_use,'wb'),protocol=pickle.HIGHEST_PROTOCOL)