def get_test_examples(self, data_dir): return dataset_dstc2.create_examples(os.path.join( data_dir, 'dstc2_test_en.json'), self.slot_list, 'test', use_asr_hyp=1, exclude_unpointable=False)
def get_dev_examples(self, data_dir): return dataset_dstc2.create_examples(os.path.join( data_dir, 'woz_validate_en.json'), self.slot_list, 'dev', use_asr_hyp=0, exclude_unpointable=False)
def get_train_examples(self, data_dir): return dataset_dstc2.create_examples( os.path.join(data_dir, 'dstc2_train_en.json'), self.slot_list, 'train')
prediction["slot_prediction_%s" % slot] = ' '.join( input_tokens[start_pd:end_pd + 1]) prediction["slot_groundtruth_%s" % slot] = ' '.join( input_tokens[start_gt:end_gt + 1]) list_prediction.append(prediction) if i >= num_actual_predict_examples: break num_written_ex += 1 json.dump(list_prediction, f, indent=2) assert num_written_ex == num_actual_predict_examples if __name__ == "__main__": #flags.mark_flag_as_required("data_dir") #flags.mark_flag_as_required("task_name") #flags.mark_flag_as_required("vocab_file") #flags.mark_flag_as_required("bert_config_file") #flags.mark_flag_as_required("output_dir") #tf.app.run() tokenizer = tokenization.FullTokenizer( vocab_file='/ml/uncased_L-12_H-768_A-12/vocab.txt', do_lower_case=True) class_types = ['none', 'dontcare', 'copy_value', 'unpointable'] slot_list = ['area', 'food', 'price range'] examples = dataset_dstc2.create_examples('/ml/woz/woz_train_en.json', slot_list, 'train') file_based_convert_examples_to_features( \ examples, slot_list, class_types, 128, tokenizer, 'train.tfr')