def _run_synth_exp(self): # initialise result array results = np.zeros((self.param_values.shape[0], len(SCORE_NAMES), len(self.methods), self.num_runs)) # iterate through parameter settings for param_idx in range(self.param_values.shape[0]): print('parameter setting: {0}'.format(param_idx)) # read parameter directory path_pattern = self.output_dir + 'data/param{0}/set{1}/' # iterate through data sets for run_idx in range(self.num_runs): print('data set number: {0}'.format(run_idx)) data_path = path_pattern.format(*(param_idx, run_idx)) if os.path.exists(os.path.join(data_path, 'pred_.csv')): os.remove(os.path.join(data_path, 'pred_.csv')) if os.path.exists(os.path.join(data_path, 'probs_.csv')): os.remove(os.path.join(data_path, 'probs_.csv')) # read data doc_start, gt, annos = self.generator.read_data_file(data_path + 'full_data.csv') exp = Experiment(data_path, self.generator.num_labels, annos, gt, doc_start, None, alpha0_factor=self.alpha0_factor, alpha0_diags=self.alpha0_diags, beta0_factor=self.beta0_factor, begin_factor=self.begin_factor, max_iter=self.max_iter, crf_probs=self.crf_probs, bootstrapping=False) exp.methods = self.methods # run methods results[param_idx, :, :, run_idx], preds, probabilities, _, _, _ = exp.run_methods(new_data=True, save_with_timestamp=False) if self.save_results: if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) print('Saving results...') # np.savetxt(self.output_dir + 'results.csv', np.mean(results, 3)[:, :, 0]) results.dump(self.output_dir + 'results') if self.show_plots or self.save_plots: plot_results(self.param_values, self.methods, self.param_idx, results, self.show_plots, self.save_plots, self.output_dir) return results
best_diags = 1 best_factor = 1 best_acc_bias = 0 exp.alpha0_diags = best_diags exp.alpha0_factor = best_factor exp.nu0_factor = best_nu0factor exp.alpha0_acc_bias = best_acc_bias exp.methods = [ 'majority', # 'mace', # 'ds', # 'ibcc', # 'bac_vec_integrateIF', # 'bac_ibcc_integrateIF', # 'bac_mace_integrateIF', # 'HMM_crowd', # 'best', # 'worst', # best_bac_wm, 'bac_seq_integrateIF', ] exp.run_methods(annos, gt, doc_start, output_dir, text, ground_truth_val=gt_val, doc_start_val=doc_start_val, text_val=text_val,
for rep in range(1, num_reps): output_dir = '../data/bayesian_sequence_combination/output/ner_al_super_new/' if not os.path.isdir(output_dir): os.mkdir(output_dir) exp = Experiment(None, 9, annos.shape[1], None, max_iter=20, crf_probs=True, rep=rep) exp.save_results = True exp.opt_hyper = False#True exp.nu0_factor = 0.1 exp.alpha0_diags = 1 # best_diags exp.alpha0_factor = 1#9 # best_factor exp.methods = [ 'bac_seq_integrateIF', 'HMM_crowd', ] results, preds, probs, results_nocrowd, preds_nocrowd, probs_nocrowd = exp.run_methods( annos, gt, doc_start, output_dir, text, ground_truth_val=gt_val, doc_start_val=doc_start_val, text_val=text_val, ground_truth_nocrowd=gt_nocrowd, doc_start_nocrowd=doc_start_nocrowd, text_nocrowd=text_nocrowd, active_learning=True, AL_batch_fraction=batch_frac, max_AL_iters=AL_iters ) # exp = Experiment(None, 9, annos.shape[1], None, max_iter=10, crf_probs=True, rep=rep) # exp.save_results = True # exp.opt_hyper = False#True # exp.nu0_factor = 0.1 # exp.alpha0_diags = 100 # best_diags
gt, doc_start, features, annos, gt_val, doc_start, features, alpha0_factor=alpha0_factor, alpha0_diags=alpha0_diags, beta0_factor=beta0_factor, max_iter=20, begin_factor=best_begin_factor) # # run all the methods that don't require tuning here exp.methods = [ # 'bac_seq_integrateIF', 'bac_seq_integrateIF_thenLSTM', 'bac_seq_integrateIF_integrateLSTM_atEnd', ] # this will run task 1 -- train on all crowdsourced data, test on the labelled portion thereof exp.run_methods(new_data=regen_data) # ------------------------------------------------------------------------------------------------ beta0_factor = 0.1 # 100 alpha0_diags = 0.1 # 1 alpha0_factor = 0.1 # 1 output_dir = os.path.join( evaluation.experiment.output_root_dir, 'pico3_%f_%f_%f' % (beta0_factor, alpha0_diags, alpha0_factor)) exp = Experiment(output_dir, 3, annos,
gt, doc_start, features, annos, gt_val, doc_start, features, alpha0_factor=alpha0_factor, alpha0_diags=alpha0_diags, beta0_factor=beta0_factor, max_iter=20) exp.methods = [ 'best', # does not use the hyperparameters 'worst', # does not use the hyperparameters 'majority', # does not use the hyperparameters 'mace', # worked best with its own default hyperparameters, smoothing=0.001, alpha=0.5, beta=0.5 'ds', # does not use the hyperparameters 'HMM_Crowd', # does not use alpha0_diags; default values happened to work best ] # should run both task 1 and 2. exp.run_methods(new_data=regen_data) # ---------------------------------------------------------------------------- beta0_factor = 0.1 alpha0_diags = 0.1 alpha0_factor = 1 output_dir = os.path.join( evaluation.experiment.output_root_dir, 'ner3_%f_%f_%f' % (beta0_factor, alpha0_diags, alpha0_factor))
exp.save_results = True exp.opt_hyper = False #True # values obtained from tuning on dev: best_nu0factor = 1000 #1 best_diags = 1000 best_factor = 0.1 exp.nu0_factor = best_nu0factor exp.alpha0_diags = best_diags exp.alpha0_factor = best_factor exp.methods = [ # 'bac_seq_integrateIF', # 'bac_seq', 'bac_seq_integrateIF_noHMM', ] exp.run_methods(annos, gt, doc_start, output_dir, text, rerun_all=True, test_no_crowd=False) # # # values obtained from tuning on dev: # best_nu0factor = 0.1 # best_diags = 10 # best_factor = 0.1
# doc_start_dev = doc_start_dev[:s] # text_dev = text_dev[:s] # ------------------------- exp = Experiment(None, 3, annos.shape[1], None) exp.alpha0_factor = 1 exp.alpha0_diags = 100 exp.save_results = True exp.opt_hyper = False # True # run all the methods that don't require tuning here exp.methods = [ 'ibcc', 'majority', 'best', 'worst', ] seeds = [10] #np.arange(100) annos = annos[gt.flatten() != -1] text = text[gt.flatten() != -1] doc_start = doc_start[gt.flatten() != -1] gt = gt[gt.flatten() != -1] annos_sample = annos[:, :252] print('Mean annotations per token: %f' % np.mean(np.sum(annos_sample != -1, axis=1)))
9, annos, gt, doc_start, features, annos, gt_val, doc_start, features, alpha0_factor=alpha0_factor, alpha0_diags=alpha0_diags, beta0_factor=beta0_factor, max_iter=20) exp.methods = [ # 'bac_seq_integrateIF', 'bac_seq_integrateIF_thenLSTM', 'bac_seq_integrateIF_integrateLSTM_atEnd', ] # should run both task 1 and 2. exp.run_methods(new_data=regen_data) # ------------------------------------------------------------------------------------- beta0_factor = 0.1 alpha0_diags = 0.1 alpha0_factor = 0.1 output_dir = os.path.join( evaluation.experiment.output_root_dir, 'ner3_%f_%f_%f' % (beta0_factor, alpha0_diags, alpha0_factor)) exp = Experiment(output_dir,
gt, doc_start, features, annos, gt_val, doc_start, features, alpha0_factor=alpha0_factor, alpha0_diags=alpha0_diags, beta0_factor=beta0_factor, max_iter=20) # # run all the methods that don't require tuning here exp.methods = [ 'best', 'worst', 'majority', 'ds', 'mace', 'HMM_crowd', ] # this will run task 1 -- train on all crowdsourced data, test on the labelled portion thereof exp.run_methods(new_data=regen_data) # ------------------------------------------------------------------------------------------------ beta0_factor = 0.1 alpha0_diags = 10 alpha0_factor = 1 output_dir = os.path.join( evaluation.experiment.output_root_dir, 'pico3_%f_%f_%f' % (beta0_factor, alpha0_diags, alpha0_factor)) exp = Experiment(output_dir,