def launcher_do_nontarget_bootstrap(args): ''' Compute a bootstrap estimate, using outputs from the model run earlier ''' print "Doing a piece of work for launcher_do_nontarget_bootstrap" try: # Convert Argparse.Namespace to dict all_parameters = vars(args) except TypeError: # Assume it's already done assert type(args) is dict, "args is neither Namespace nor dict, WHY?" all_parameters = args print all_parameters # Create DataIO # (complete label with current variable state) dataio = DataIO.DataIO(output_folder=all_parameters['output_directory'], label=all_parameters['label'].format(**all_parameters)) save_every = 1 run_counter = 0 # Load the data if all_parameters['subaction'] == 'mixed': # Mixed runs model_outputs = utils.load_npy( os.path.join(os.getenv("WORKDIR_DROP", None), 'Experiments', 'bootstrap_nontargets', 'global_plots_errors_distribution-plots_errors_distribution-d977e237-cfce-473b-a292-00695e725259.npy')) else: # Conjunctive runs model_outputs = utils.load_npy( os.path.join(os.getenv("WORKDIR_DROP", None), 'Experiments', 'bootstrap_nontargets', 'global_plots_errors_distribution-plots_errors_distribution-cc1a49b0-f5f0-4e82-9f0f-5a16a2bfd4e8.npy')) data_responses_all = model_outputs['result_responses_all'][..., 0] data_target_all = model_outputs['result_target_all'][..., 0] data_nontargets_all = model_outputs['result_nontargets_all'][..., 0] T_space = model_outputs['T_space'] sigmax_space = model_outputs['sigmax_space'] K = data_nontargets_all.shape[-1] # Result arrays result_bootstrap_samples_allitems = np.nan*np.ones((sigmax_space.size, T_space.size, all_parameters['num_repetitions'])) result_bootstrap_samples = np.nan*np.ones((sigmax_space.size, T_space.size, all_parameters['num_repetitions'])) result_bootstrap_samples_allitems_uniquekappa_sumnontarget = np.nan*np.ones((sigmax_space.size, T_space.size, all_parameters['num_repetitions'])) result_bootstrap_samples_allitems_uniquekappa_allnontarget = np.nan*np.ones((sigmax_space.size, T_space.size, K*all_parameters['num_repetitions'])) search_progress = progress.Progress(sigmax_space.size*(T_space.size-1)) for sigmax_i, sigmax in enumerate(sigmax_space): for T_i, T in enumerate(T_space[1:]): T_i += 1 print "%.2f%%, %s left - %s" % (search_progress.percentage(), search_progress.time_remaining_str(), search_progress.eta_str()) print "Bootstrap for T=%d, sigmax=%.2f, %d bootstrap samples" % (T, sigmax, all_parameters['num_repetitions']) # Update parameter ### WORK WORK WORK work? ### # Get some bootstrap samples bootstrap_allitems_nontargets_allitems_uniquekappa = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat( data_responses_all[sigmax_i, T_i], data_target_all[sigmax_i, T_i], data_nontargets_all[sigmax_i, T_i, :, :T_i], nb_bootstrap_samples=all_parameters['num_repetitions'], resample_targets=False) # bootstrap_allitems_nontargets_allitems = em_circularmixture_allitems.bootstrap_nontarget_stat( # data_responses_all[sigmax_i, T_i], # data_target_all[sigmax_i, T_i], # data_nontargets_all[sigmax_i, T_i, :, :T_i], # nb_bootstrap_samples=all_parameters['num_repetitions'], # resample_targets=False) bootstrap_allitems_nontargets = em_circularmixture.bootstrap_nontarget_stat( data_responses_all[sigmax_i, T_i], data_target_all[sigmax_i, T_i], data_nontargets_all[sigmax_i, T_i, :, :T_i], nb_bootstrap_samples=all_parameters['num_repetitions'], resample_targets=False) # Collect and store responses # result_bootstrap_samples_allitems[sigmax_i, T_i] = bootstrap_allitems_nontargets_allitems['nontarget_bootstrap_samples'] result_bootstrap_samples[sigmax_i, T_i] = bootstrap_allitems_nontargets['nontarget_bootstrap_samples'] result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i] = bootstrap_allitems_nontargets_allitems_uniquekappa['nontarget_bootstrap_samples'] result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i, :all_parameters['num_repetitions']*T_i] = bootstrap_allitems_nontargets_allitems_uniquekappa['allnontarget_bootstrap_samples'] print result_bootstrap_samples_allitems[sigmax_i, T_i] print result_bootstrap_samples[sigmax_i, T_i] print result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i] print result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i] ### /Work ### search_progress.increment() if run_counter % save_every == 0 or search_progress.done(): dataio.save_variables_default(locals()) run_counter += 1 # Finished dataio.save_variables_default(locals()) print "All finished" return locals()
def load(self): for idx, (name, array) in enumerate( zip(['actions', 'rewards', 'screens', 'terminals', 'prestates', 'poststates'], [self.actions, self.rewards, self.screens, self.terminals, self.prestates, self.poststates])): array = load_npy(os.path.join(self.model_dir, name))
def launcher_do_nontarget_bootstrap_misbindingruns(args): ''' Compute a bootstrap estimate, using outputs from a Misbinding generator run. ''' print "Doing a piece of work for launcher_do_nontarget_bootstrap" try: # Convert Argparse.Namespace to dict all_parameters = vars(args) except TypeError: # Assume it's already done assert type(args) is dict, "args is neither Namespace nor dict, WHY?" all_parameters = args print all_parameters # Create DataIO # (complete label with current variable state) dataio = DataIO.DataIO(output_folder=all_parameters['output_directory'], label=all_parameters['label'].format(**all_parameters)) save_every = 1 run_counter = 0 # Load the data if all_parameters['subaction'] == 'mixed' or all_parameters['subaction'] == '': # Mixed runs model_outputs = utils.load_npy( os.path.join(os.getenv("WORKDIR_DROP", None), 'Experiments', 'bootstrap_nontargets', 'SAVE_global_plots_misbinding_logposterior-plots_misbinding_logposterior-36eb41e9-6370-453e-995e-3876d5105388.npy')) data_responses_all = model_outputs['result_all_thetas'] data_target = model_outputs['target_angle'] data_nontargets = model_outputs['nontarget_angles'] ratio_space = model_outputs['ratio_space'] # Result arrays result_bootstrap_samples_allitems = np.nan*np.ones((ratio_space.size, all_parameters['num_repetitions'])) result_bootstrap_samples = np.nan*np.ones((ratio_space.size, all_parameters['num_repetitions'])) result_bootstrap_samples_allitems_uniquekappa_sumnontarget = np.nan*np.ones((ratio_space.size, all_parameters['num_repetitions'])) result_bootstrap_samples_allitems_uniquekappa_allnontarget = np.nan*np.ones((ratio_space.size, all_parameters['num_repetitions'])) search_progress = progress.Progress(ratio_space.size) for ratio_conj_i, ratio_conj in enumerate(ratio_space): print "%.2f%%, %s left - %s" % (search_progress.percentage(), search_progress.time_remaining_str(), search_progress.eta_str()) print "Bootstrap for ratio=%.2f, %d bootstrap samples" % (ratio_conj, all_parameters['num_repetitions']) ### WORK WORK WORK work? ### # Get some bootstrap samples bootstrap_allitems_nontargets_allitems_uniquekappa = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat( data_responses_all[ratio_conj_i], data_target, data_nontargets, nb_bootstrap_samples=all_parameters['num_repetitions'], resample_targets=False) bootstrap_allitems_nontargets = em_circularmixture.bootstrap_nontarget_stat( data_responses_all[ratio_conj_i], data_target, data_nontargets, nb_bootstrap_samples=all_parameters['num_repetitions'], resample_targets=False) # Collect and store responses result_bootstrap_samples[ratio_conj_i] = bootstrap_allitems_nontargets['nontarget_bootstrap_samples'] result_bootstrap_samples_allitems_uniquekappa_sumnontarget[ratio_conj_i] = bootstrap_allitems_nontargets_allitems_uniquekappa['nontarget_bootstrap_samples'] result_bootstrap_samples_allitems_uniquekappa_allnontarget[ratio_conj_i] = bootstrap_allitems_nontargets_allitems_uniquekappa['allnontarget_bootstrap_samples'] print result_bootstrap_samples_allitems[ratio_conj_i] print result_bootstrap_samples[ratio_conj_i] print result_bootstrap_samples_allitems_uniquekappa_sumnontarget[ratio_conj_i] print result_bootstrap_samples_allitems_uniquekappa_allnontarget[ratio_conj_i] ### /Work ### search_progress.increment() if run_counter % save_every == 0 or search_progress.done(): dataio.save_variables_default(locals()) run_counter += 1 # Finished dataio.save_variables_default(locals()) print "All finished" return locals()
def plots_fitting_experiments_random(data_pbs, generator_module=None): ''' Reload 2D volume runs from PBS and plot them ''' #### SETUP # savefigs = True savedata = True savemovies = False do_bays09 = True do_gorgo11 = True scatter3d_sumT = False plots_flat_sorted_performance = False plots_memorycurves_fits_best = True nb_best_points = 20 nb_best_points_per_T = nb_best_points/6 size_normal_points = 8 size_best_points = 50 downsampling = 2 # do_relaunch_bestparams_pbs = True colormap = None # or 'cubehelix' plt.rcParams['font.size'] = 16 # #### /SETUP print "Order parameters: ", generator_module.dict_parameters_range.keys() # parameters: ratio_conj, sigmax, T # Extract data result_fitexperiments_flat = np.array(data_pbs.dict_arrays['result_fitexperiments']['results_flat']) result_fitexperiments_all_flat = np.array(data_pbs.dict_arrays['result_fitexperiments_all']['results_flat']) result_fitexperiments_noiseconv_flat = np.array(data_pbs.dict_arrays['result_fitexperiments_noiseconv']['results_flat']) result_fitexperiments_noiseconv_all_flat = np.array(data_pbs.dict_arrays['result_fitexperiments_noiseconv_all']['results_flat']) result_parameters_flat = np.array(data_pbs.dict_arrays['result_fitexperiments']['parameters_flat']) all_repeats_completed = data_pbs.dict_arrays['result_fitexperiments']['repeats_completed'] all_args = data_pbs.loaded_data['args_list'] all_args_arr = np.array(all_args) num_repetitions = generator_module.num_repetitions # Extract order of datasets experiment_ids = data_pbs.loaded_data['datasets_list'][0]['fitexperiment_parameters']['experiment_ids'] parameter_names_sorted = data_pbs.dataset_infos['parameters'] T_space = data_pbs.loaded_data['datasets_list'][0]['T_space'] dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename']) # filter_data = (result_parameters_flat[:, -1] < 1.0) & (all_repeats_completed == num_repetitions - 1) # filter_data = (all_repeats_completed == num_repetitions - 1) # result_fitexperiments_flat = result_fitexperiments_flat[filter_data] # result_fitexperiments_all_flat = result_fitexperiments_all_flat[filter_data] # result_fitexperiments_noiseconv_flat = result_fitexperiments_noiseconv_flat[filter_data] # result_fitexperiments_noiseconv_all_flat = result_fitexperiments_noiseconv_all_flat[filter_data] # result_parameters_flat = result_parameters_flat[filter_data] # Compute some stuff # Data is summed over all experiments for _flat, contains bic, ll and ll90. # for _all_flat, contains bic, ll and ll90 per experiment. Given that Gorgo11 and Bays09 are incompatible, shouldn't really use the combined version directly! result_fitexperiments_noiseconv_bic_avg_allT = utils.nanmean(result_fitexperiments_noiseconv_flat, axis=-1)[..., 0] result_fitexperiments_noiseconv_allexp_bic_avg_allT = utils.nanmean(result_fitexperiments_noiseconv_all_flat, axis=-1)[:, :, 0] result_fitexperiments_noiseconv_allexp_ll90_avg_allT = -utils.nanmean(result_fitexperiments_noiseconv_all_flat, axis=-1)[:, :, -1] ### BIC # result_fitexperiments_noiseconv_allexp_bic_avg_allT: N x T x exp result_fitexperiments_noiseconv_bays09_bic_avg_allT = result_fitexperiments_noiseconv_allexp_bic_avg_allT[..., 0] result_fitexperiments_noiseconv_gorgo11_bic_avg_allT = result_fitexperiments_noiseconv_allexp_bic_avg_allT[..., 1] result_fitexperiments_noiseconv_dualrecall_bic_avg_allT = result_fitexperiments_noiseconv_allexp_bic_avg_allT[..., 2] # Summed T result_fitexperiments_noiseconv_bays09_bic_avg_sumT = np.nansum(result_fitexperiments_noiseconv_bays09_bic_avg_allT, axis=-1) result_fitexperiments_noiseconv_gorgo11_bic_avg_sumT = np.nansum(result_fitexperiments_noiseconv_gorgo11_bic_avg_allT, axis=-1) result_fitexperiments_noiseconv_dualrecall_bic_avg_sumT = np.nansum(result_fitexperiments_noiseconv_dualrecall_bic_avg_allT, axis=-1) ### LL90 # N x T x exp result_fitexperiments_noiseconv_bays09_ll90_avg_allT = result_fitexperiments_noiseconv_allexp_ll90_avg_allT[..., 0] result_fitexperiments_noiseconv_gorgo11_ll90_avg_allT = result_fitexperiments_noiseconv_allexp_ll90_avg_allT[..., 1] result_fitexperiments_noiseconv_dualrecall_ll90_avg_allT = result_fitexperiments_noiseconv_allexp_ll90_avg_allT[..., 2] # Summed T result_fitexperiments_noiseconv_bays09_ll90_avg_sumT = np.nansum(result_fitexperiments_noiseconv_bays09_ll90_avg_allT, axis=-1) result_fitexperiments_noiseconv_gorgo11_ll90_avg_sumT = np.nansum(result_fitexperiments_noiseconv_gorgo11_ll90_avg_allT, axis=-1) result_fitexperiments_noiseconv_dualrecall_ll90_avg_sumT = np.nansum(result_fitexperiments_noiseconv_dualrecall_ll90_avg_allT, axis=-1) def mask_outliers_array(result_dist_to_use, sigma_outlier=3): ''' Mask outlier datapoints. Compute the mean of the results and assume that points with: result > mean + sigma_outlier*std are outliers. As we want the minimum values, do not mask small values ''' return np.ma.masked_greater(result_dist_to_use, np.mean(result_dist_to_use) + sigma_outlier*np.std(result_dist_to_use)) def best_points_allT(result_dist_to_use): ''' Best points for all T ''' return np.argsort(result_dist_to_use)[:nb_best_points] def str_best_params(best_i, result_dist_to_use): return ' '.join(["%s %.4f" % (parameter_names_sorted[param_i], result_parameters_flat[best_i, param_i]) for param_i in xrange(len(parameter_names_sorted))]) + ' >> %f' % result_dist_to_use[best_i] def plot_scatter(all_vars, result_dist_to_use_name, title='', log_color=True, downsampling=1, label_file='', mask_outliers=True): result_dist_to_use = all_vars[result_dist_to_use_name] result_parameters_flat = all_vars['result_parameters_flat'] # Filter if downsampling filter_downsampling = np.arange(0, result_dist_to_use.size, downsampling) result_dist_to_use = result_dist_to_use[filter_downsampling] result_parameters_flat = result_parameters_flat[filter_downsampling] if mask_outliers: result_dist_to_use = mask_outliers_array(result_dist_to_use) best_points_result_dist_to_use = np.argsort(result_dist_to_use)[:nb_best_points] # Construct all permutations of 3 parameters, for 3D scatters params_permutations = set([tuple(np.sort(np.random.choice(result_parameters_flat.shape[-1], 3, replace=False)).tolist()) for i in xrange(1000)]) for param_permut in params_permutations: fig = plt.figure() ax = Axes3D(fig) # One plot per parameter permutation if log_color: color_points = np.log(result_dist_to_use) else: color_points = result_dist_to_use utils.scatter3d(result_parameters_flat[:, param_permut[0]], result_parameters_flat[:, param_permut[1]], result_parameters_flat[:, param_permut[2]], s=size_normal_points, c=color_points, xlabel=parameter_names_sorted[param_permut[0]], ylabel=parameter_names_sorted[param_permut[1]], zlabel=parameter_names_sorted[param_permut[2]], title=title, ax_handle=ax) utils.scatter3d(result_parameters_flat[best_points_result_dist_to_use, param_permut[0]], result_parameters_flat[best_points_result_dist_to_use, param_permut[1]], result_parameters_flat[best_points_result_dist_to_use, param_permut[2]], c='r', s=size_best_points, ax_handle=ax) if savefigs: dataio.save_current_figure('scatter3d_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)) if savemovies: try: utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.mp4' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), bitrate=8000, min_duration=8) utils.rotate_plot3d(ax, dataio.create_formatted_filename('scatter3d_%s_%s%s_{label}_{unique_id}.gif' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)), nb_frames=30, min_duration=8) except Exception: # Most likely wrong aggregator... print "failed when creating movies for ", result_dist_to_use_name if False and savefigs: ax.view_init(azim=90, elev=10) dataio.save_current_figure('scatter3d_view2_%s_%s%s_{label}_{unique_id}.pdf' % (result_dist_to_use_name, '_'.join([parameter_names_sorted[i] for i in param_permut]), label_file)) # plt.close('all') print "Parameters: %s" % ', '.join(parameter_names_sorted) print "Best points, %s:" % title print '\n'.join([str_best_params(best_i, result_dist_to_use) for best_i in best_points_result_dist_to_use]) if scatter3d_sumT: plot_scatter(locals(), 'result_fitexperiments_noiseconv_bays09_bic_avg_sumT', 'BIC Bays09') plot_scatter(locals(), 'result_fitexperiments_noiseconv_bays09_ll90_avg_sumT', 'LL90 Bays09') plot_scatter(locals(), 'result_fitexperiments_noiseconv_gorgo11_bic_avg_sumT', 'BIC Gorgo11') plot_scatter(locals(), 'result_fitexperiments_noiseconv_gorgo11_ll90_avg_sumT', 'LL90 Gorgo11') plot_scatter(locals(), 'result_fitexperiments_noiseconv_dualrecall_bic_avg_sumT', 'BIC Dual recall') plot_scatter(locals(), 'result_fitexperiments_noiseconv_dualrecall_ll90_avg_sumT', 'LL90 Dual recall') if plots_flat_sorted_performance: result_dist_to_try = [] if do_bays09: result_dist_to_try.extend(['result_fitexperiments_noiseconv_bays09_bic_avg_sumT', 'result_fitexperiments_noiseconv_bays09_ll90_avg_sumT']) if do_gorgo11: result_dist_to_try.extend(['result_fitexperiments_noiseconv_gorgo11_bic_avg_sumT', 'result_fitexperiments_noiseconv_gorgo11_ll90_avg_sumT']) for result_dist in result_dist_to_try: order_indices = np.argsort(locals()[result_dist])[::-1] f, axes = plt.subplots(2, 1) axes[0].plot(np.arange(4) + result_parameters_flat[order_indices]/np.max(result_parameters_flat[order_indices], axis=0)) axes[0].legend(parameter_names_sorted, loc='upper left') axes[0].set_ylabel('Parameters') axes[1].plot(locals()[result_dist][order_indices]) axes[1].set_ylabel(result_dist.split('result_dist_')[-1]) axes[0].set_title('Distance ordered ' + result_dist.split('result_dist_')[-1]) f.canvas.draw() if savefigs: dataio.save_current_figure('plot_sortedperf_full_%s_{label}_{unique_id}.pdf' % (result_dist)) if plots_memorycurves_fits_best: # Alright, will actually reload the data from another set of runs, and find the closest parameter set to the ones found here. data = utils.load_npy('normalisedsigmaxsigmaoutput_random_fitmixturemodels_sigmaxMratiosigmaoutput_repetitions3_280814/outputs/global_plots_fitmixtmodel_random_sigmaoutsigmaxnormMratio-plots_fit_mixturemodels_random-75eb9c74-72e0-4165-8014-92c1ef446f0a.npy') result_em_fits_flat_fitmixture = data['result_em_fits_flat'] result_parameters_flat_fitmixture = data['result_parameters_flat'] all_args_arr_fitmixture = data['all_args_arr'] data_dir = None if not os.environ.get('WORKDIR_DROP'): data_dir = '../experimental_data/' plotting_parameters = launchers_memorycurves_marginal_fi.load_prepare_datasets() def plot_memorycurves_fits_fromexternal(all_vars, result_dist_to_use_name, nb_best_points=10): result_dist_to_use = all_vars[result_dist_to_use_name] result_em_fits_flat_fitmixture = all_vars['result_em_fits_flat_fitmixture'] result_parameters_flat_fitmixture = all_vars['result_parameters_flat_fitmixture'] all_args_arr_fitmixture = all_vars['all_args_arr_fitmixture'] best_point_indices_result_dist = np.argsort(result_dist_to_use)[:nb_best_points] for best_point_index in best_point_indices_result_dist: print "extended plot desired for: " + str_best_params(best_point_index, result_dist_to_use) dist_best_points_fitmixture = np.abs(result_parameters_flat_fitmixture - result_parameters_flat[best_point_index]) dist_best_points_fitmixture -= np.min(dist_best_points_fitmixture, axis=0) dist_best_points_fitmixture /= np.max(dist_best_points_fitmixture, axis=0) best_point_index_fitmixture = np.argmax(np.prod(1-dist_best_points_fitmixture, axis=-1)) print "found closest: " + ' '.join(["%s %.4f" % (parameter_names_sorted[param_i], result_parameters_flat_fitmixture[best_point_index_fitmixture, param_i]) for param_i in xrange(len(parameter_names_sorted))]) # Update arguments all_args_arr_fitmixture[best_point_index_fitmixture].update(dict(zip(parameter_names_sorted, result_parameters_flat_fitmixture[best_point_index_fitmixture]))) packed_data = dict(T_space=T_space, result_em_fits=result_em_fits_flat_fitmixture[best_point_index_fitmixture], all_parameters=all_args_arr_fitmixture[best_point_index_fitmixture]) plotting_parameters['suptitle'] = result_dist_to_use_name plotting_parameters['reuse_axes'] = False if savefigs: packed_data['dataio'] = dataio launchers_memorycurves_marginal_fi.do_memory_plots(packed_data, plotting_parameters) plot_memorycurves_fits_fromexternal(locals(), 'result_external_fitexperiments_noiseconv_bays09_ll90_avg_sumT', nb_best_points=3) plot_memorycurves_fits_fromexternal(locals(), 'result_external_fitexperiments_noiseconv_gorgo11_ll90_avg_sumT', nb_best_points=3) plot_memorycurves_fits_fromexternal(locals(), 'result_external_fitexperiments_noiseconv_dualrecall_ll90_avg_sumT', nb_best_points=3) all_args = data_pbs.loaded_data['args_list'] variables_to_save = ['experiment_ids', 'parameter_names_sorted', 'T_space', 'all_args_arr', 'all_repeats_completed', 'filter_data'] if savedata: dataio.save_variables_default(locals(), variables_to_save) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='sigmaoutput_normalisedsigmax_random') plt.show() return locals()
def model_fn(features, labels, mode, params): inputs = [] if FLAGS.is_training: for feature_name in [ "ids", "mask", "seg_ids", "prob_logits", "labels" ]: inputs.append(features[feature_name]) else: for feature_name in ["ids", "mask", "seg_ids", "labels"]: inputs.append(features[feature_name]) if FLAGS.emb_pathes and FLAGS.is_training: pathes = FLAGS.emb_pathes.split(',') pretrained_word_embeddings = load_npy(pathes[0]) pretrained_pos_embeddings = load_npy(pathes[1]) else: pretrained_word_embeddings, pretrained_pos_embeddings = None, None Kmax = 8 given_arch = None if FLAGS.arch_path: Kmax, given_arch = load_arch(FLAGS.arch_path) model = AdaBERTStudent( inputs, (mode == tf.estimator.ModeKeys.TRAIN), vocab_size=FLAGS.num_token, is_pair_task=bool(FLAGS.is_pair_task), num_classes=FLAGS.num_classes, Kmax=Kmax, emb_size=FLAGS.embed_size, seq_len=FLAGS.seq_length, keep_prob=0.9 if mode == tf.estimator.ModeKeys.TRAIN else 1.0, temp_decay_steps=FLAGS.temp_decay_steps, model_opt_lr=FLAGS.model_opt_lr, arch_opt_lr=FLAGS.arch_opt_lr, model_l2_reg=FLAGS.model_l2_reg, arch_l2_reg=FLAGS.arch_l2_reg, loss_gamma=FLAGS.loss_gamma, loss_beta=FLAGS.loss_beta, pretrained_word_embeddings=pretrained_word_embeddings, pretrained_pos_embeddings=pretrained_pos_embeddings, given_arch=given_arch) if mode == tf.estimator.ModeKeys.TRAIN: logging_tensors = dict([(var.name, var) for var in model.arch_params]) logging_tensors['step'] = model.global_step logging_tensors['loss'] = model.loss logging_hook = tf.train.LoggingTensorHook(logging_tensors, every_n_iter=50) chief_only_hooks = [logging_hook] if given_arch is None: search_result_hook = SearchResultsSaver( model.global_step, model.arch_params, model.ld_embs, FLAGS.model_dir, FLAGS.save_steps) chief_only_hooks.append(search_result_hook) # handle the save/restore related issues if FLAGS.searched_model: # has pretrained tvars = tf.trainable_variables() initialized_variable_names = {} init_checkpoint = os.path.join(FLAGS.searched_model) tf.logging.info("Init from %s" % init_checkpoint) (assignment_map, initialized_variable_names ) = get_assignment_map_from_checkpoint( tvars, init_checkpoint, ["wemb", "pemb"]) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) return tf.estimator.EstimatorSpec( mode=mode, loss=model.loss, train_op=model.update, training_chief_hooks=chief_only_hooks) elif mode == tf.estimator.ModeKeys.EVAL: # Define the metrics: metrics_dict = { 'Acc': model.acc, #'AUC': tf.metrics.auc(train_labels, probabilities, num_thresholds=2000) } return tf.estimator.EstimatorSpec(mode, loss=model.loss, eval_metric_ops=metrics_dict) else: if FLAGS.is_training: predictions = dict() predictions["predicted"] = model.predictions predictions["labels"] = features["labels"] else: predictions = features.copy() predictions["logits"] = model.logits predictions["predicted"] = model.predictions return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
def plots_logposterior_mixed_autoset(data_pbs, generator_module=None): ''' Reload 2D volume runs from PBS and plot them ''' #### SETUP # savefigs = True plot_per_ratio = False plot_2d_pcolor = False do_relaunch_bestparams_pbs = True colormap = None # or 'cubehelix' plt.rcParams['font.size'] = 16 # #### /SETUP print "Order parameters: ", generator_module.dict_parameters_range.keys() result_log_posterior_mean = np.squeeze(data_pbs.dict_arrays['result_log_posterior_mean']['results']) result_log_posterior_std = np.squeeze(data_pbs.dict_arrays['result_log_posterior_std']['results']) ratio_space = data_pbs.loaded_data['parameters_uniques']['ratio_conj'] sigmax_space = data_pbs.loaded_data['parameters_uniques']['sigmax'] exp_dataset = data_pbs.loaded_data['args_list'][0]['experiment_id'] print ratio_space print sigmax_space print result_log_posterior_mean.shape, result_log_posterior_std.shape dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename']) if plot_per_ratio: # Plot the evolution of loglike as a function of sigmax, with std shown for ratio_conj_i, ratio_conj in enumerate(ratio_space): ax = utils.plot_mean_std_area(sigmax_space, result_log_posterior_mean[ratio_conj_i], result_log_posterior_std[ratio_conj_i]) ax.get_figure().canvas.draw() if savefigs: dataio.save_current_figure('results_fitexp_%s_loglike_ratioconj%.2f_{label}_global_{unique_id}.pdf' % (exp_dataset, ratio_conj)) if plot_2d_pcolor: # Plot the mean loglikelihood as a 2d surface utils.pcolor_2d_data(result_log_posterior_mean, x=ratio_space, y=sigmax_space, xlabel="Ratio conj", ylabel="Sigma x", title="Loglikelihood of experimental data, \n3 items dualrecall, rcscale automatically set", ticks_interpolate=5, cmap=colormap) # plt.tight_layout() if savefigs: dataio.save_current_figure('results_fitexp_%s_loglike_2d_ratiosigmax_{label}_global_{unique_id}.pdf' % exp_dataset) if do_relaunch_bestparams_pbs: # Will check the best fitting parameters, and relaunch simulations for them, in order to get new cool plots. # We can actually use the original dictionary for submission informations, and just change the launcher to one produces the appropriate results generator_parameters_dict = generator_module.__dict__ generator_parameters_dict['pbs_submission_infos']['other_options'].update(dict( action_to_do='launcher_do_memory_curve_marginal_fi', subaction='collect_responses', inference_method='sample', N=300, T=6, num_samples=500, selection_method='last', num_repetitions=3, burn_samples=500, stimuli_generation='random', stimuli_generation_recall='random', session_id='fitting_experiments_relaunchs', result_computation='filenameoutput')) generator_parameters_dict['pbs_submission_infos']['other_options']['label'] = 'fitting_experiment_rerun_nitems{T}M{M}_090414'.format(T = generator_parameters_dict['n_items_to_fit'], M = generator_parameters_dict['M']) generator_parameters_dict['sleeping_period'] = dict(min=5, max=10) submit_pbs = submitpbs.SubmitPBS(pbs_submission_infos=generator_parameters_dict['pbs_submission_infos'], debug=True) # Now run a series of Jobs to obtain the required data # the "Result" of them are the filename of their output. Should then save those in a dictionary. # For later processing, if the exact same parameters are used, then everything will be reloaded automatically, MAAAGIC. # Extract the parameters to try best_params_to_try = [] best_axis2_i_all = np.argmax(result_log_posterior_mean, axis=1) for axis1_i, best_axis2_i in enumerate(best_axis2_i_all): parameter_dict = dict(ratio_conj=ratio_space[axis1_i], sigmax=sigmax_space[best_axis2_i]) best_params_to_try.append(parameter_dict) # Submit them, waiting on them in the process. Should obtain a list of filenames back utils.chdir_safe(generator_parameters_dict['pbs_submission_infos']['simul_out_dir']) result_minibatch_filenames = submit_pbs.submit_minibatch_jobswrapper(best_params_to_try, generator_parameters_dict) result_reloaded_datasets = [] for filename_i, result_filename in enumerate(result_minibatch_filenames): curr_reloaded_dataset = utils.load_npy(result_filename + '.npy') result_reloaded_datasets.append(curr_reloaded_dataset) utils.chdir_safe('../') all_args = data_pbs.loaded_data['args_list'] variables_to_save = ['exp_dataset'] if savefigs: dataio.save_variables_default(locals(), variables_to_save) plt.show() return locals()
def plots_boostrap(data_pbs, generator_module=None): ''' Reload bootstrap samples, plot its histogram, fit empirical CDF and save it for quicker later use. ''' #### SETUP # savefigs = True savedata = True load_fit_bootstrap = True plots_hist_cdf = True estimate_bootstrap = True should_fit_bootstrap = True # caching_bootstrap_filename = None caching_bootstrap_filename = os.path.join(generator_module.pbs_submission_infos['simul_out_dir'], 'outputs', 'cache_bootstrap.pickle') plt.rcParams['font.size'] = 16 # #### /SETUP print "Order parameters: ", generator_module.dict_parameters_range.keys() result_bootstrap_samples_allitems_uniquekappa_sumnontarget = np.squeeze(data_pbs.dict_arrays['result_bootstrap_samples_allitems_uniquekappa_sumnontarget']['results']) result_bootstrap_samples = np.squeeze(data_pbs.dict_arrays['result_bootstrap_samples']['results']) result_bootstrap_samples_allitems_uniquekappa_allnontarget = np.squeeze(data_pbs.dict_arrays['result_bootstrap_samples_allitems_uniquekappa_allnontarget']['results']) sigmax_space = data_pbs.loaded_data['datasets_list'][0]['sigmax_space'] T_space = data_pbs.loaded_data['datasets_list'][0]['T_space'] print result_bootstrap_samples_allitems_uniquekappa_sumnontarget.shape print result_bootstrap_samples.shape print result_bootstrap_samples_allitems_uniquekappa_allnontarget.shape dataio = DataIO(output_folder=generator_module.pbs_submission_infos['simul_out_dir'] + '/outputs/', label='global_' + dataset_infos['save_output_filename']) if load_fit_bootstrap: if caching_bootstrap_filename is not None: if os.path.exists(caching_bootstrap_filename): # Got file, open it and try to use its contents try: with open(caching_bootstrap_filename, 'r') as file_in: # Load and assign values cached_data = pickle.load(file_in) bootstrap_ecdf_bays_sigmax_T = cached_data['bootstrap_ecdf_bays_sigmax_T'] bootstrap_ecdf_allitems_sum_sigmax_T = cached_data['bootstrap_ecdf_allitems_sum_sigmax_T'] bootstrap_ecdf_allitems_all_sigmax_T = cached_data['bootstrap_ecdf_allitems_all_sigmax_T'] should_fit_bootstrap = False except IOError: print "Error while loading ", caching_bootstrap_filename, "falling back to computing the EM fits" if should_fit_bootstrap: bootstrap_ecdf_bays_sigmax_T = dict() bootstrap_ecdf_allitems_sum_sigmax_T = dict() bootstrap_ecdf_allitems_all_sigmax_T = dict() # Fit bootstrap for sigmax_i, sigmax in enumerate(sigmax_space): for T_i, T in enumerate(T_space): if T>1: # One bootstrap CDF per condition bootstrap_ecdf_bays = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_samples[sigmax_i, T_i])) bootstrap_ecdf_allitems_sum = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i])) bootstrap_ecdf_allitems_all = stmodsdist.empirical_distribution.ECDF(utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i])) # Store in a dict(sigmax) -> dict(T) -> ECDF object bootstrap_ecdf_bays_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict(ecdf=bootstrap_ecdf_bays, T=T, sigmax=sigmax) bootstrap_ecdf_allitems_sum_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict(ecdf=bootstrap_ecdf_allitems_sum, T=T, sigmax=sigmax) bootstrap_ecdf_allitems_all_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict(ecdf=bootstrap_ecdf_allitems_all, T=T, sigmax=sigmax) # Save everything to a file, for faster later plotting if caching_bootstrap_filename is not None: try: with open(caching_bootstrap_filename, 'w') as filecache_out: data_bootstrap = dict(bootstrap_ecdf_allitems_sum_sigmax_T=bootstrap_ecdf_allitems_sum_sigmax_T, bootstrap_ecdf_allitems_all_sigmax_T=bootstrap_ecdf_allitems_all_sigmax_T, bootstrap_ecdf_bays_sigmax_T=bootstrap_ecdf_bays_sigmax_T) pickle.dump(data_bootstrap, filecache_out, protocol=2) except IOError: print "Error writing out to caching file ", caching_bootstrap_filename if plots_hist_cdf: ## Plots now for sigmax_i, sigmax in enumerate(sigmax_space): for T_i, T in enumerate(T_space): if T > 1: # Histogram of samples _, axes = plt.subplots(ncols=3, figsize=(18, 6)) axes[0].hist(utils.dropnan(result_bootstrap_samples[sigmax_i, T_i]), bins=100, normed='density') axes[0].set_xlim([0.0, 1.0]) axes[1].hist(utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i]), bins=100, normed='density') axes[1].set_xlim([0.0, 1.0]) axes[2].hist(utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i]), bins=100, normed='density') axes[2].set_xlim([0.0, 1.0]) if savefigs: dataio.save_current_figure('hist_bootstrap_sigmax%.2f_T%d_{label}_{unique_id}.pdf' % (sigmax, T)) # ECDF now _, axes = plt.subplots(ncols=3, sharey=True, figsize=(18, 6)) axes[0].plot(bootstrap_ecdf_bays_sigmax_T[sigmax_i][T_i]['ecdf'].x, bootstrap_ecdf_bays_sigmax_T[sigmax_i][T_i]['ecdf'].y, linewidth=2) axes[0].set_xlim([0.0, 1.0]) axes[1].plot(bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T_i]['ecdf'].x, bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T_i]['ecdf'].y, linewidth=2) axes[1].set_xlim([0.0, 1.0]) axes[2].plot(bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T_i]['ecdf'].x, bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T_i]['ecdf'].y, linewidth=2) axes[2].set_xlim([0.0, 1.0]) if savefigs: dataio.save_current_figure('ecdf_bootstrap_sigmax%.2f_T%d_{label}_{unique_id}.pdf' % (sigmax, T)) if estimate_bootstrap: ## Should be in reloader_error_distribution_mixed_121113 instead model_outputs = utils.load_npy( os.path.join(os.getenv("WORKDIR_DROP", None), 'Experiments', 'bootstrap_nontargets', 'global_plots_errors_distribution-plots_errors_distribution-d977e237-cfce-473b-a292-00695e725259.npy')) data_responses_all = model_outputs['result_responses_all'][..., 0] data_target_all = model_outputs['result_target_all'][..., 0] data_nontargets_all = model_outputs['result_nontargets_all'][..., 0] # Compute bootstrap p-value result_pvalue_bootstrap_sum = np.empty((sigmax_space.size, T_space.size-1))*np.nan result_pvalue_bootstrap_all = np.empty((sigmax_space.size, T_space.size-1, T_space.size-1))*np.nan for sigmax_i, sigmax in enumerate(sigmax_space): for T in T_space[1:]: bootstrap_allitems_nontargets_allitems_uniquekappa = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat(data_responses_all[sigmax_i, (T-1)], data_target_all[sigmax_i, (T-1)], data_nontargets_all[sigmax_i, (T-1), :, :(T-1)], sumnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T-1]['ecdf'], allnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T-1]['ecdf']) result_pvalue_bootstrap_sum[sigmax_i, T-2] = bootstrap_allitems_nontargets_allitems_uniquekappa['p_value'] result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)] = bootstrap_allitems_nontargets_allitems_uniquekappa['allnontarget_p_value'] print sigmax, T, result_pvalue_bootstrap_sum[sigmax_i, T-2], result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)], np.sum(result_pvalue_bootstrap_all[sigmax_i, T-2, :(T-1)] < 0.05) all_args = data_pbs.loaded_data['args_list'] variables_to_save = ['nb_repetitions'] if savedata: dataio.save_variables_default(locals(), variables_to_save) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder='bootstrap_nontargets') plt.show() return locals()
def plots_boostrap(data_pbs, generator_module=None): """ Reload bootstrap samples, plot its histogram, fit empirical CDF and save it for quicker later use. """ #### SETUP # savefigs = True savedata = True load_fit_bootstrap = True plots_hist_cdf = False estimate_bootstrap = False should_fit_bootstrap = True # caching_bootstrap_filename = None caching_bootstrap_filename = os.path.join( generator_module.pbs_submission_infos["simul_out_dir"], "outputs", "cache_bootstrap.pickle" ) plt.rcParams["font.size"] = 16 # #### /SETUP print "Order parameters: ", generator_module.dict_parameters_range.keys() result_bootstrap_samples_allitems_uniquekappa_sumnontarget = np.squeeze( data_pbs.dict_arrays["result_bootstrap_samples_allitems_uniquekappa_sumnontarget"]["results"] ) result_bootstrap_samples = np.squeeze(data_pbs.dict_arrays["result_bootstrap_samples"]["results"]) result_bootstrap_samples_allitems_uniquekappa_allnontarget = np.squeeze( data_pbs.dict_arrays["result_bootstrap_samples_allitems_uniquekappa_allnontarget"]["results"] ) sigmax_space = data_pbs.loaded_data["datasets_list"][0]["sigmax_space"] T_space = data_pbs.loaded_data["datasets_list"][0]["T_space"] print result_bootstrap_samples_allitems_uniquekappa_sumnontarget.shape print result_bootstrap_samples.shape print result_bootstrap_samples_allitems_uniquekappa_allnontarget.shape dataio = DataIO( output_folder=generator_module.pbs_submission_infos["simul_out_dir"] + "/outputs/", label="global_" + dataset_infos["save_output_filename"], ) if load_fit_bootstrap: if caching_bootstrap_filename is not None: if os.path.exists(caching_bootstrap_filename): # Got file, open it and try to use its contents try: with open(caching_bootstrap_filename, "r") as file_in: # Load and assign values cached_data = pickle.load(file_in) bootstrap_ecdf_bays_sigmax_T = cached_data["bootstrap_ecdf_bays_sigmax_T"] bootstrap_ecdf_allitems_sum_sigmax_T = cached_data["bootstrap_ecdf_allitems_sum_sigmax_T"] bootstrap_ecdf_allitems_all_sigmax_T = cached_data["bootstrap_ecdf_allitems_all_sigmax_T"] should_fit_bootstrap = False except IOError: print "Error while loading ", caching_bootstrap_filename, "falling back to computing the EM fits" if should_fit_bootstrap: bootstrap_ecdf_bays_sigmax_T = dict() bootstrap_ecdf_allitems_sum_sigmax_T = dict() bootstrap_ecdf_allitems_all_sigmax_T = dict() # Fit bootstrap for sigmax_i, sigmax in enumerate(sigmax_space): for T_i, T in enumerate(T_space): if T > 1: # One bootstrap CDF per condition bootstrap_ecdf_bays = stmodsdist.empirical_distribution.ECDF( utils.dropnan(result_bootstrap_samples[sigmax_i, T_i]) ) bootstrap_ecdf_allitems_sum = stmodsdist.empirical_distribution.ECDF( utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i]) ) bootstrap_ecdf_allitems_all = stmodsdist.empirical_distribution.ECDF( utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i]) ) # Store in a dict(sigmax) -> dict(T) -> ECDF object bootstrap_ecdf_bays_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict( ecdf=bootstrap_ecdf_bays, T=T, sigmax=sigmax ) bootstrap_ecdf_allitems_sum_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict( ecdf=bootstrap_ecdf_allitems_sum, T=T, sigmax=sigmax ) bootstrap_ecdf_allitems_all_sigmax_T.setdefault(sigmax_i, dict())[T_i] = dict( ecdf=bootstrap_ecdf_allitems_all, T=T, sigmax=sigmax ) # Save everything to a file, for faster later plotting if caching_bootstrap_filename is not None: try: with open(caching_bootstrap_filename, "w") as filecache_out: data_bootstrap = dict( bootstrap_ecdf_allitems_sum_sigmax_T=bootstrap_ecdf_allitems_sum_sigmax_T, bootstrap_ecdf_allitems_all_sigmax_T=bootstrap_ecdf_allitems_all_sigmax_T, bootstrap_ecdf_bays_sigmax_T=bootstrap_ecdf_bays_sigmax_T, ) pickle.dump(data_bootstrap, filecache_out, protocol=2) except IOError: print "Error writing out to caching file ", caching_bootstrap_filename if plots_hist_cdf: ## Plots now for sigmax_i, sigmax in enumerate(sigmax_space): for T_i, T in enumerate(T_space): if T > 1: # Histogram of samples _, axes = plt.subplots(ncols=3, figsize=(18, 6)) axes[0].hist(utils.dropnan(result_bootstrap_samples[sigmax_i, T_i]), bins=100, normed="density") axes[0].set_xlim([0.0, 1.0]) axes[1].hist( utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_sumnontarget[sigmax_i, T_i]), bins=100, normed="density", ) axes[1].set_xlim([0.0, 1.0]) axes[2].hist( utils.dropnan(result_bootstrap_samples_allitems_uniquekappa_allnontarget[sigmax_i, T_i]), bins=100, normed="density", ) axes[2].set_xlim([0.0, 1.0]) if savefigs: dataio.save_current_figure( "hist_bootstrap_sigmax%.2f_T%d_{label}_{unique_id}.pdf" % (sigmax, T) ) # ECDF now _, axes = plt.subplots(ncols=3, sharey=True, figsize=(18, 6)) axes[0].plot( bootstrap_ecdf_bays_sigmax_T[sigmax_i][T_i]["ecdf"].x, bootstrap_ecdf_bays_sigmax_T[sigmax_i][T_i]["ecdf"].y, linewidth=2, ) axes[0].set_xlim([0.0, 1.0]) axes[1].plot( bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T_i]["ecdf"].x, bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T_i]["ecdf"].y, linewidth=2, ) axes[1].set_xlim([0.0, 1.0]) axes[2].plot( bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T_i]["ecdf"].x, bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T_i]["ecdf"].y, linewidth=2, ) axes[2].set_xlim([0.0, 1.0]) if savefigs: dataio.save_current_figure( "ecdf_bootstrap_sigmax%.2f_T%d_{label}_{unique_id}.pdf" % (sigmax, T) ) if estimate_bootstrap: model_outputs = utils.load_npy( os.path.join( os.environ["WORKDIR_DROP"], "Experiments/error_distribution/error_distribution_conj_M100T6repetitions5_121113_outputs/global_plots_errors_distribution-plots_errors_distribution-cc1a49b0-f5f0-4e82-9f0f-5a16a2bfd4e8.npy", ) ) data_responses_all = model_outputs["result_responses_all"][..., 0] data_target_all = model_outputs["result_target_all"][..., 0] data_nontargets_all = model_outputs["result_nontargets_all"][..., 0] # Compute bootstrap p-value for sigmax_i, sigmax in enumerate(sigmax_space): for T in T_space[1:]: bootstrap_allitems_nontargets_allitems_uniquekappa = em_circularmixture_allitems_uniquekappa.bootstrap_nontarget_stat( data_responses_all[sigmax_i, (T - 1)], data_target_all[sigmax_i, (T - 1)], data_nontargets_all[sigmax_i, (T - 1), :, : (T - 1)], sumnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_sum_sigmax_T[sigmax_i][T - 1]["ecdf"], allnontargets_bootstrap_ecdf=bootstrap_ecdf_allitems_all_sigmax_T[sigmax_i][T - 1]["ecdf"], ) # TODO finish here! all_args = data_pbs.loaded_data["args_list"] variables_to_save = ["nb_repetitions"] if savedata: dataio.save_variables_default(locals(), variables_to_save) dataio.make_link_output_to_dropbox(dropbox_current_experiment_folder="bootstrap_nontargets") plt.show() return locals()
train and save """ import tensorflow as tf import subprocess, os from utils import load_npy, iterate_arr from params import TRAIN_DATA, VAL_DATA, LOG_DIR, SAVE_DIR, VAL_LOG, TRAIN_LOG from params import num_frames, num_sequences, batch_size, num_iterations # import model and graph from build_graph import X, train_op, summary_op from build_graph import writer_train, writer_val, saver # load training/validation data train_arr = load_npy(TRAIN_DATA, num_frames=num_frames, limit=num_sequences) if num_sequences is None: num_sequences = train_arr.shape[0] train_iter = iterate_arr(train_arr, batch_size=batch_size) val_arr = load_npy(VAL_DATA) # clear old log if os.path.exists(LOG_DIR): cmd = "rm -r {}".format(LOG_DIR) subprocess.call(cmd.split()) cmd = "mkdir {}".format(LOG_DIR) subprocess.call(cmd.split()) cmd = "mkdir {}".format(TRAIN_LOG) subprocess.call(cmd.split()) cmd = "mkdir {}".format(VAL_LOG)
def main(in_path, out_path): predictions = utils.load_npy(in_path) write_predictions_to_csv(predictions, out_path) print 'Generated predictions file %s' % out_path
n_categories = 6 + 1 #Params dropout = 0.7 n_feats = 600 n_hidden = 64 #hyper-params lr = 0.001 epochs = 80 batchsize = 8 use_ctc = True #Data extraction and preparation X_train_path = data_folder + "X_train.npy" X_train = np.array(utils.load_npy(X_train_path)) X_test_path = data_folder + "X_test.npy" X_test = np.array(utils.load_npy(X_test_path)) y_train_path = data_folder + "y_train.npy" y_train = utils.load_npy(y_train_path) y_train = np.array([utils.categories_to_int(utils.labels_num_to_category(utils.collapse_num_labels(y))) for y in y_train]) y_test_path = data_folder + "y_test.npy" y_test = utils.load_npy(y_test_path) y_test = np.array([utils.categories_to_int(utils.labels_num_to_category(utils.collapse_num_labels(y))) for y in y_test]) #SAVE MEMORY?? # X_train, y_train = utils.get_mini_dataset(X_train, y_train, 500) # X_test, y_test = utils.get_mini_dataset(X_test, y_test, 100)
p_err_mean = epoch_loss_train_mean self.learning_rate = self.lr_scheduler(learning_rate, e, delta_err) self.dropout = self.dropout_scheduler(dropout, e, delta_err) if self.logsdir is not None: self.save_weights() return if __name__ == "__main__": epochs = 10 lr = 0.01 rnn = RNN_framewise_base(n_feats=600, n_classes=41, logsdir="./RNN_test/") X_train = utils.load_npy('../../data/RBM_hann_v2/X_train.npy') X_test = utils.load_npy('../../data/RBM_hann_v2/X_test.npy') y_train = utils.load_npy('../../data/RBM_hann_v2/y_train.npy') y_test = utils.load_npy('../../data/RBM_hann_v2/y_test.npy') y_train = [utils.collapse_num_labels(y) for y in y_train] y_test = [utils.collapse_num_labels(y) for y in y_test] y_train = utils.to_ctc(y_train) y_test = utils.to_ctc(y_test) rnn.set_data(X_train, X_test, y_train, y_test) del (X_train, y_train, X_test, y_test) rnn.fit(n_epochs=epochs,