def evaluate_model(results, dataset, all_predictions, all_y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths): results['epoch'][epoch_number] = [] assess_and_save(results, dataset, None, all_predictions, all_y_true, stats_graph_folder, epoch_number, 0, epoch_start_time) plot_f1_vs_epoch(results, stats_graph_folder, 'f1_score') plot_f1_vs_epoch(results, stats_graph_folder, 'accuracy_score') # CoNLL evaluation script for dataset_type in ['train', 'valid', 'test']: conll_evaluation_script = os.path.join('.', 'conlleval') conll_output_filepath = '{0}_conll_evaluation.txt'.format( output_filepaths[dataset_type]) shell_command = 'perl {0} < {1} > {2}'.format( conll_evaluation_script, output_filepaths[dataset_type], conll_output_filepath) print('shell_command: {0}'.format(shell_command)) #subprocess.call([shell_command]) os.system(shell_command) conll_parsed_output = utils_nlp.get_parsed_conll_output( conll_output_filepath) #print('conll_parsed_output: {0}'.format(conll_parsed_output)) results['epoch'][epoch_number][0][dataset_type][ 'conll'] = conll_parsed_output results['epoch'][epoch_number][0][dataset_type]['f1_conll'] = {} results['epoch'][epoch_number][0][dataset_type]['f1_conll'][ 'micro'] = results['epoch'][epoch_number][0][dataset_type][ 'conll']['all']['f1'] plot_f1_vs_epoch(results, stats_graph_folder, 'f1_conll', from_json=False)
def evaluate_model(results, dataset, y_pred_all, y_true_all, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters, verbose=False): results['epoch'][epoch_number] = [] result_update = {} for dataset_type in ['train', 'valid', 'test']: print('Generating plots for the {0} set'.format(dataset_type)) result_update[dataset_type] = {} y_pred_original = y_pred_all[dataset_type] y_true_original = y_true_all[dataset_type] for evaluation_mode in ['BIO', 'token', 'binary']: y_pred, y_true, label_indices, label_names = remap_labels(y_pred_original, y_true_original, dataset, evaluation_mode=evaluation_mode) result_update[dataset_type][evaluation_mode] = assess_model(y_pred, y_true, label_indices, label_names, dataset_type, stats_graph_folder, epoch_number, evaluation_mode=evaluation_mode, verbose=verbose) if parameters['main_evaluation_mode'] == evaluation_mode: result_update[dataset_type].update(result_update[dataset_type][evaluation_mode]) #copy.deepcopy(result_update[dataset_type][evaluation_mode]) result_update['time_elapsed_since_epoch_start'] = time.time() - epoch_start_time result_update['time_elapsed_since_train_start'] = time.time() - results['execution_details']['train_start'] results['epoch'][epoch_number].append(result_update) plot_f1_vs_epoch(results, stats_graph_folder, 'f1_score') plot_f1_vs_epoch(results, stats_graph_folder, 'accuracy_score') # CoNLL evaluation script for dataset_type in ['train', 'valid', 'test']: conll_evaluation_script = os.path.join('.', 'conlleval') conll_output_filepath = '{0}_conll_evaluation.txt'.format(output_filepaths[dataset_type]) shell_command = 'perl {0} < {1} > {2}'.format(conll_evaluation_script, output_filepaths[dataset_type], conll_output_filepath) print('shell_command: {0}'.format(shell_command)) os.system(shell_command) conll_parsed_output = utils_nlp.get_parsed_conll_output(conll_output_filepath) results['epoch'][epoch_number][0][dataset_type]['conll'] = conll_parsed_output results['epoch'][epoch_number][0][dataset_type]['f1_conll'] = {} results['epoch'][epoch_number][0][dataset_type]['f1_conll']['micro'] = results['epoch'][epoch_number][0][dataset_type]['conll']['all']['f1'] plot_f1_vs_epoch(results, stats_graph_folder, 'f1_conll') results['execution_details']['train_duration'] = time.time() - results['execution_details']['train_start'] save_results(results, stats_graph_folder)
def evaluate_model(results, dataset, y_pred_all, y_true_all, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters, verbose=False): results['execution_details']['num_epochs'] = epoch_number results['epoch'][epoch_number] = [] result_update = {} for dataset_type in ['train', 'valid', 'test']: if dataset_type not in output_filepaths.keys(): continue print('Generating plots for the {0} set'.format(dataset_type)) result_update[dataset_type] = {} y_pred_original = y_pred_all[dataset_type] y_true_original = y_true_all[dataset_type] for evaluation_mode in ['bio', 'token', 'binary']: y_pred, y_true, label_indices, label_names, label_indices_with_o, label_names_with_o = remap_labels( y_pred_original, y_true_original, dataset, evaluation_mode=evaluation_mode) result_update[dataset_type][evaluation_mode] = assess_model( y_pred, y_true, label_indices, label_names, label_indices_with_o, label_names_with_o, dataset_type, stats_graph_folder, epoch_number, parameters, evaluation_mode=evaluation_mode, verbose=verbose) if parameters['main_evaluation_mode'] == evaluation_mode: result_update[dataset_type].update( result_update[dataset_type][evaluation_mode]) result_update['time_elapsed_since_epoch_start'] = time.time( ) - epoch_start_time result_update['time_elapsed_since_train_start'] = time.time( ) - results['execution_details']['train_start'] results['epoch'][epoch_number].append(result_update) # CoNLL evaluation script for dataset_type in ['train', 'valid', 'test']: if dataset_type not in output_filepaths.keys(): continue conll_evaluation_script = os.path.join('.', 'conlleval') conll_output_filepath = '{0}_conll_evaluation.txt'.format( output_filepaths[dataset_type]) shell_command = 'perl {0} < {1} > {2}'.format( conll_evaluation_script, output_filepaths[dataset_type], conll_output_filepath) print('shell_command: {0}'.format(shell_command)) os.system(shell_command) conll_parsed_output = utils_nlp.get_parsed_conll_output( conll_output_filepath) results['epoch'][epoch_number][0][dataset_type][ 'conll'] = conll_parsed_output results['epoch'][epoch_number][0][dataset_type]['f1_conll'] = {} results['epoch'][epoch_number][0][dataset_type]['f1_conll'][ 'micro'] = results['epoch'][epoch_number][0][dataset_type][ 'conll']['all']['f1'] if parameters['main_evaluation_mode'] == 'conll': results['epoch'][epoch_number][0][dataset_type]['f1_score'] = {} results['epoch'][epoch_number][0][dataset_type]['f1_score'][ 'micro'] = results['epoch'][epoch_number][0][dataset_type][ 'conll']['all']['f1'] results['epoch'][epoch_number][0][dataset_type][ 'accuracy_score'] = results['epoch'][epoch_number][0][ dataset_type]['conll']['all']['accuracy'] utils_plots.plot_classification_report( results['epoch'][epoch_number][0][dataset_type]['conll'], title= 'Classification report for epoch {0} in {1} ({2} evaluation)\n' .format(epoch_number, dataset_type, 'conll'), cmap='RdBu', from_conll_json=True) plt.savefig(os.path.join( stats_graph_folder, 'classification_report_for_epoch_{0:04d}_in_{1}_conll_evaluation.{3}' .format(epoch_number, dataset_type, evaluation_mode, parameters['plot_format'])), dpi=300, format=parameters['plot_format'], bbox_inches='tight') plt.close() if parameters['train_model'] and 'train' in output_filepaths.keys( ) and 'valid' in output_filepaths.keys(): plot_f1_vs_epoch(results, stats_graph_folder, 'f1_score', parameters) plot_f1_vs_epoch(results, stats_graph_folder, 'accuracy_score', parameters) plot_f1_vs_epoch(results, stats_graph_folder, 'f1_conll', parameters) results['execution_details']['train_duration'] = time.time( ) - results['execution_details']['train_start'] save_results(results, stats_graph_folder)
def main(): #### Parameters - start conf_parameters = configparser.ConfigParser() conf_parameters.read(os.path.join('.','parameters.ini')) nested_parameters = utils.convert_configparser_to_dictionary(conf_parameters) parameters = {} for k,v in nested_parameters.items(): parameters.update(v) for k,v in parameters.items(): if k in ['remove_unknown_tokens','character_embedding_dimension','character_lstm_hidden_state_dimension','token_embedding_dimension','token_lstm_hidden_state_dimension', 'patience','maximum_number_of_epochs','maximum_training_time','number_of_cpu_threads','number_of_gpus']: parameters[k] = int(v) if k in ['dropout_rate']: parameters[k] = float(v) if k in ['use_character_lstm','is_character_lstm_bidirect','is_token_lstm_bidirect','use_crf']: parameters[k] = distutils.util.strtobool(v) pprint(parameters) # Load dataset dataset_filepaths = {} dataset_filepaths['train'] = os.path.join(parameters['dataset_text_folder'], 'train.txt') dataset_filepaths['valid'] = os.path.join(parameters['dataset_text_folder'], 'valid.txt') dataset_filepaths['test'] = os.path.join(parameters['dataset_text_folder'], 'test.txt') dataset = ds.Dataset() dataset.load_dataset(dataset_filepaths, parameters) with tf.Graph().as_default(): session_conf = tf.ConfigProto( device_count={'CPU': 1, 'GPU': 1}, allow_soft_placement=True, # automatically choose an existing and supported device to run the operations in case the specified one doesn't exist log_device_placement=False ) sess = tf.Session(config=session_conf) with sess.as_default(): model = EntityLSTM(dataset, parameters) # Define training procedure global_step = tf.Variable(0, name="global_step", trainable=False) if parameters['optimizer'] == 'adam': optimizer = tf.train.AdamOptimizer(1e-3) elif parameters['optimizer'] == 'sgd': optimizer = tf.train.GradientDescentOptimizer(0.005) else: raise ValueError("The lr_method parameter must be either adam or sgd.") # https://github.com/google/prettytensor/issues/6 # https://www.tensorflow.org/api_docs/python/framework/graph_collections #print('tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) : {0}'.format(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) )) #print('tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) : {0}'.format(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) )) #print('tf.get_collection(tf.GraphKeys.MODEL_VARIABLES) : {0}'.format(tf.get_collection(tf.GraphKeys.MODEL_VARIABLES) )) # https://github.com/blei-lab/edward/issues/286#ref-pullrequest-181330211 : utility function to get all tensorflow variables a node depends on grads_and_vars = optimizer.compute_gradients(model.loss) # By defining a global_step variable and passing it to the optimizer we allow TensorFlow handle the counting of training steps for us. # The global step will be automatically incremented by one every time you execute train_op. train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Initialize all variables sess.run(tf.global_variables_initializer()) # Load pretrained token embeddings if not parameters['token_pretrained_embedding_filepath'] == '': load_token_embeddings(sess, model.W, dataset, parameters) estop = False # early stop start_time = time.time() experiment_timestamp = utils.get_current_time_in_miliseconds() results = {} #results['model_options'] = copy.copy(model_options) #results['model_options'].pop('optimizer', None) results['epoch'] = {} # save/initialize execution details results['execution_details'] = {} results['execution_details']['train_start'] = start_time results['execution_details']['time_stamp'] = experiment_timestamp results['execution_details']['early_stop'] = False results['execution_details']['keyboard_interrupt'] = False results['execution_details']['num_epochs'] = 0 results['model_options'] = copy.copy(parameters) dataset_name = utils.get_basename_without_extension(parameters['dataset_text_folder']) #opts.train.replace('/', '_').split('.')[0] # 'conll2003en' model_name = '{0}_{1}'.format(dataset_name, results['execution_details']['time_stamp']) output_folder=os.path.join('..', 'output') stats_graph_folder=os.path.join(output_folder, model_name) # Folder where to save graphs utils.create_folder_if_not_exists(output_folder) print('stats_graph_folder: {0}'.format(stats_graph_folder)) utils.create_folder_if_not_exists(stats_graph_folder) model_folder = os.path.join(stats_graph_folder, 'model') utils.create_folder_if_not_exists(model_folder) step = 0 bad_counter = 0 previous_best_valid_f1_score = 0 transition_params_trained = np.random.rand(len(dataset.unique_labels),len(dataset.unique_labels)) try: while True: epoch_number = math.floor(step / len(dataset.token_indices['train'])) print('epoch_number: {0}'.format(epoch_number)) epoch_start_time = time.time() #print('step: {0}'.format(step)) # Train model: loop over all sequences of training set with shuffling sequence_numbers=list(range(len(dataset.token_indices['train']))) random.shuffle(sequence_numbers) for sequence_number in sequence_numbers: transition_params_trained = train_step(sess, dataset, sequence_number, train_op, global_step, model, transition_params_trained, parameters) step += 1 if sequence_number % 100 == 0: print('.',end='', flush=True) #break # Evaluate model print('step: {0}'.format(step)) all_predictions = {} all_y_true = {} output_filepaths = {} for dataset_type in ['train', 'valid', 'test']: print('dataset_type: {0}'.format(dataset_type)) all_predictions[dataset_type], all_y_true[dataset_type], output_filepaths[dataset_type] = evaluate_model(sess, dataset, dataset_type, model, transition_params_trained, step, stats_graph_folder, epoch_number, parameters) model_options = None # Save and plot results # TODO: remove uidx uidx = 0 results['epoch'][epoch_number] = [] results['execution_details']['num_epochs'] = epoch_number epoch_elapsed_training_time = time.time() - epoch_start_time print('epoch_elapsed_training_time: {0:02f} seconds'.format(epoch_elapsed_training_time)) assess_model.assess_and_save(results, dataset, model_options, all_predictions, all_y_true, stats_graph_folder, epoch_number, uidx, epoch_start_time) assess_model.plot_f1_vs_epoch(results, stats_graph_folder, 'f1_score') assess_model.plot_f1_vs_epoch(results, stats_graph_folder, 'accuracy_score') # CoNLL evaluation script for dataset_type in ['train', 'valid', 'test']: conll_evaluation_script = os.path.join('.', 'conlleval') conll_output_filepath = '{0}_conll_evaluation.txt'.format(output_filepaths[dataset_type]) shell_command = 'perl {0} < {1} > {2}'.format(conll_evaluation_script, output_filepaths[dataset_type], conll_output_filepath) print('shell_command: {0}'.format(shell_command)) #subprocess.call([shell_command]) os.system(shell_command) conll_parsed_output = utils_nlp.get_parsed_conll_output(conll_output_filepath) print('conll_parsed_output: {0}'.format(conll_parsed_output)) results['epoch'][epoch_number][0][dataset_type]['conll'] = conll_parsed_output results['epoch'][epoch_number][0][dataset_type]['f1_conll'] = {} results['epoch'][epoch_number][0][dataset_type]['f1_conll']['micro'] = results['epoch'][epoch_number][0][dataset_type]['conll']['all']['f1'] assess_model.plot_f1_vs_epoch(results, stats_graph_folder, 'f1_conll', from_json=False) #end_time = time.time() #results['execution_details']['train_duration'] = end_time - start_time #results['execution_details']['train_end'] = end_time # Early stop valid_f1_score = results['epoch'][epoch_number][0]['valid']['f1_score']['micro'] if valid_f1_score > previous_best_valid_f1_score: bad_counter = 0 previous_best_valid_f1_score = valid_f1_score else: bad_counter += 1 if bad_counter > parameters['patience']: print('Early Stop!') results['execution_details']['early_stop'] = True break if epoch_number > parameters['maximum_number_of_epochs']: break except KeyboardInterrupt: results['execution_details']['keyboard_interrupt'] = True # assess_model.save_results(results, stats_graph_folder) print('Training interrupted') print('Finishing the experiment') end_time = time.time() results['execution_details']['train_duration'] = end_time - start_time results['execution_details']['train_end'] = end_time assess_model.save_results(results, stats_graph_folder) sess.close() # release the session's resources