def test_parser(config, best_model, data_types): base_dir = config['data']['base_dir'] features = ['sents', 'predicted_pos', 'predicted_stag', 'arcs', 'rels'] base_command = 'python graph_parser_main.py test --get_accuracy' model_info = ' --model {}'.format(best_model) model_info += ' --metrics {}'.format(config['parser']['scores']['metrics']) for data_type in data_types: inputs = {} output_file = os.path.join(base_dir, 'predicted_arcs', '{}.txt'.format(data_type)) inputs[6] = output_file if not os.path.isdir(os.path.dirname(output_file)): os.makedirs(os.path.dirname(output_file)) output_info = ' --predicted_arcs_file {}'.format(output_file) output_file = os.path.join(base_dir, 'predicted_rels', '{}.txt'.format(data_type)) inputs[7] = output_file if not os.path.isdir(os.path.dirname(output_file)): os.makedirs(os.path.dirname(output_file)) output_info += ' --predicted_rels_file {}'.format(output_file) inputs_greedy = {} output_file = os.path.join(base_dir, 'predicted_arcs_greedy', '{}.txt'.format(data_type)) inputs_greedy[6] = output_file if not os.path.isdir(os.path.dirname(output_file)): os.makedirs(os.path.dirname(output_file)) output_info += ' --predicted_arcs_file_greedy {}'.format(output_file) output_file = os.path.join(base_dir, 'predicted_rels_greedy', '{}.txt'.format(data_type)) inputs_greedy[7] = output_file if not os.path.isdir(os.path.dirname(output_file)): os.makedirs(os.path.dirname(output_file)) output_info += ' --predicted_rels_file_greedy {}'.format(output_file) test_data_dirs = map(lambda x: os.path.join(base_dir, x, '{}.txt'.format(data_type)), features) test_data_info = ' --text_test {} --jk_test {} --tag_test {} --arc_test {} --rel_test {}'.format(*test_data_dirs) complete_command = base_command + model_info + output_info + test_data_info subprocess.check_call(complete_command, shell=True) #output_conllu(os.path.join(base_dir, config['data']['split'][data_type]), os.path.join(base_dir, config['data']['split'][data_type]+'_arc_rel'), inputs) output_conllu(os.path.join(base_dir, config['data']['split'][data_type]), os.path.join(base_dir, config['data']['split'][data_type]+'_arc_rel_greedy'), inputs_greedy)
def test_stagger(config, best_model, data_types): base_dir = config['data']['base_dir'] base_command = 'python bilstm_stagger_main.py test' model_info = ' --model {}'.format(best_model) for data_type in data_types: output_file = os.path.join(base_dir, 'predicted_stag', '{}.txt'.format(data_type)) if not os.path.isdir(os.path.dirname(output_file)): os.makedirs(os.path.dirname(output_file)) output_info = ' --save_tags {} --get_accuracy'.format(output_file) test_data_info = ' --text_test {} --jk_test {} --tag_test {}'.format( os.path.join(base_dir, 'sents', '{}.txt'.format(data_type)), os.path.join(base_dir, 'gold_stag', '{}.txt'.format(data_type)), os.path.join(base_dir, 'gold_stag', '{}.txt'.format(data_type))) complete_command = base_command + model_info + output_info + test_data_info subprocess.check_call(complete_command, shell=True) output_conllu( output_file, os.path.join(base_dir, config['data']['split'][data_type]), os.path.join(base_dir, config['data']['split'][data_type] + '_stag'))