def get_training_data_and_feature(opt, data_loader, preprocessor): """ prepare feature and data """ if opt.load_feature: try: train_features, train_label2id, train_id2label, train_trans_mat = load_feature(opt.train_path.replace('.json', '.saved.pk')) dev_features, dev_label2id, dev_id2label, dev_trans_mat = load_feature(opt.dev_path.replace('.json', '.saved.pk')) except FileNotFoundError: # Not a saved feature file yet, make it opt.load_feature = False opt.save_feature = True train_features, train_label2id, train_id2label, train_trans_mat, \ dev_features, dev_label2id, dev_id2label, dev_trans_mat =\ get_training_data_and_feature(opt, data_loader, preprocessor) # restore option opt.load_feature = True opt.save_feature = False else: train_examples, train_max_len, train_max_support_size, train_trans_mat = data_loader.load_data(path=opt.train_path) dev_examples, dev_max_len, dev_max_support_size, dev_trans_mat = data_loader.load_data(path=opt.dev_path) train_label2id, train_id2label = make_dict(train_examples) dev_label2id, dev_id2label = make_dict(dev_examples) logger.info(' Finish train dev prepare dict ') train_features = preprocessor.construct_feature( train_examples, train_max_support_size, train_label2id, train_id2label) dev_features = preprocessor.construct_feature( dev_examples, dev_max_support_size, dev_label2id, dev_id2label) logger.info(' Finish prepare train dev features ') if opt.save_feature: save_feature(opt.train_path.replace('.json', '.saved.pk'), train_features, train_label2id, train_id2label, train_trans_mat) save_feature(opt.dev_path.replace('.json', '.saved.pk'), dev_features, dev_label2id, dev_id2label, dev_trans_mat) return train_features, train_label2id, train_id2label, train_trans_mat, \ dev_features, dev_label2id, dev_id2label, dev_trans_mat
def get_training_data_and_feature(opt, data_loader, preprocessor): """ prepare feature and data """ if opt.load_feature: try: train_features, (train_slot_label2id, train_slot_id2label), \ (train_intent_label2id, train_intent_id2label) = \ load_feature(opt.train_path.replace('.json', '.saved.pk')) dev_features, (dev_slot_label2id, dev_slot_id2label), \ (dev_intent_label2id, dev_intent_id2label) = load_feature(opt.dev_path.replace('.json', '.saved.pk')) except FileNotFoundError: opt.load_feature, opt.save_feature = False, True # Not a saved feature file yet, make it train_features, (train_slot_label2id, train_slot_id2label), (train_intent_label2id, train_intent_id2label),\ dev_features, (dev_slot_label2id, dev_slot_id2label), (dev_intent_label2id, dev_intent_id2label) = \ get_training_data_and_feature(opt, data_loader, preprocessor) opt.load_feature, opt.save_feature = True, False # restore option else: train_examples, _, train_max_support_size = data_loader.load_data( path=opt.train_path) dev_examples, _, dev_max_support_size = data_loader.load_data( path=opt.dev_path) (train_slot_label2id, train_slot_id2label), (train_intent_label2id, train_intent_id2label) = \ make_dict(opt, train_examples) (dev_slot_label2id, dev_slot_id2label), (dev_intent_label2id, dev_intent_id2label) = \ make_dict(opt, dev_examples) logger.info(' Finish train dev prepare dict ') train_features = preprocessor.construct_feature( train_examples, train_max_support_size, train_slot_label2id, train_slot_id2label, train_intent_label2id, train_intent_id2label) dev_features = preprocessor.construct_feature( dev_examples, dev_max_support_size, dev_slot_label2id, dev_slot_id2label, dev_intent_label2id, dev_intent_id2label) logger.info(' Finish prepare train dev features ') if opt.do_debug: print('train_slot_label2id: {}'.format(train_slot_label2id)) print('train_intent_label2id: {}'.format(train_intent_label2id)) print('dev_slot_label2id: {}'.format(dev_slot_label2id)) print('dev_intent_label2id: {}'.format(dev_intent_label2id)) print('train_examples: {}'.format(len(train_examples), train_examples)) print('train_features: {}'.format(len(train_features), train_features)) if opt.save_feature: save_feature(opt.train_path.replace('.json', '.saved.pk'), train_features, train_slot_label2id, train_slot_id2label, train_intent_label2id, train_intent_id2label) save_feature(opt.dev_path.replace('.json', '.saved.pk'), dev_features, dev_slot_label2id, dev_slot_id2label, dev_intent_label2id, dev_intent_id2label) return train_features, (train_slot_label2id, train_slot_id2label), (train_intent_label2id, train_intent_id2label), \ dev_features, (dev_slot_label2id, dev_slot_id2label), (dev_intent_label2id, dev_intent_id2label)
def get_testing_data_feature(opt, data_loader, preprocessor): """ prepare feature and data """ if opt.load_feature: try: test_features, test_label2id, test_id2label, test_trans_mat = \ load_feature(opt.test_path.replace('.json', '.saved.pk')) except FileNotFoundError: # Not a saved feature file yet, make it opt.load_feature = False opt.save_feature = True test_features, test_label2id, test_id2label, test_trans_mat = \ get_testing_data_feature(opt, data_loader, preprocessor) # restore option opt.load_feature = True opt.save_feature = False else: test_examples, test_max_len, test_max_support_size, test_trans_mat = data_loader.load_data(path=opt.test_path) test_label2id, test_id2label = make_dict(test_examples) logger.info(' Finish prepare test dict') test_features = preprocessor.construct_feature( test_examples, test_max_support_size, test_label2id, test_id2label) logger.info(' Finish prepare test feature') if opt.save_feature: save_feature(opt.test_path.replace('.json', '.saved.pk'), test_features, test_label2id, test_id2label, test_trans_mat) return test_features, test_label2id, test_id2label, test_trans_mat
def get_testing_data_feature(opt, data_loader, preprocessor): """ prepare feature and data """ if opt.load_feature: try: test_features, (test_slot_label2id, test_slot_id2label), (test_intent_label2id, test_intent_id2label) = \ load_feature(opt.test_path.replace('.json', '.saved.pk')) except FileNotFoundError: opt.load_feature, opt.save_feature = False, True # Not a saved feature file yet, make it test_features, (test_slot_label2id, test_slot_id2label), (test_intent_label2id, test_intent_id2label) = \ get_testing_data_feature(opt, data_loader, preprocessor) opt.load_feature, opt.save_feature = True, False # restore option else: test_examples, test_max_len, test_max_support_size = data_loader.load_data( path=opt.test_path) (test_slot_label2id, test_slot_id2label), (test_intent_label2id, test_intent_id2label) = \ make_dict(opt, test_examples) logger.info(' Finish prepare test dict') test_features = preprocessor.construct_feature( test_examples, test_max_support_size, test_slot_label2id, test_slot_id2label, test_intent_label2id, test_intent_id2label) logger.info(' Finish prepare test feature') if opt.do_debug: print('test_slot_label2id: {}'.format(test_slot_label2id)) print('test_slot_id2label: {}'.format(test_slot_id2label)) if opt.save_feature: save_feature(opt.test_path.replace('.json', '.saved.pk'), test_features, test_slot_label2id, test_slot_id2label, test_intent_label2id, test_intent_id2label) return test_features, (test_slot_label2id, test_slot_id2label), (test_intent_label2id, test_intent_id2label)