def main(_): model_path = os.path.join('models', Config.file_name) if os.path.exists(model_path) is False: os.makedirs(model_path) # train_file = 'data/train.txt' # train_file = 'data/train.0926.label' # dev_file = 'data/slotlabel3400' train_file = sys.argv[1] dev_file = sys.argv[2] save_file = os.path.join(model_path, 'vocab_tuples.pkl') # 获取样本数据 sens_tags_train = get_sens_tags(train_file) sens_tags_val = get_sens_tags(dev_file) # sens_tags = get_sens_tags(train_file) # # sens_tags_train = sens_tags[10000:] # sens_tags_val = sens_tags[:10000] # 数据处理 converter = TextConverter(train_file, save_file, max_vocab=Config.vocab_max_size) print('vocab size:', converter.vocab_size) Config.num_classes = converter.tag_size + 1 # 产生训练样本 train_QA_arrs = converter.QAs_to_arr(sens_tags_train, Config.seq_length) train_g = converter.batch_generator(train_QA_arrs, Config.batch_size) # 产生验证样本 val_QA_arrs = converter.QAs_to_arr(sens_tags_val, Config.seq_length) val_g = converter.val_samples_generator(val_QA_arrs, Config.batch_size) # 加载上一次保存的模型 model = Model(Config, converter.vocab_size) checkpoint_path = tf.train.latest_checkpoint(model_path) if checkpoint_path: model.load(checkpoint_path) print('start to training...') model.train(train_g, model_path, val_g)
def main(_): model_path = os.path.join('models', Config.file_name) if os.path.exists(model_path) is False: os.makedirs(model_path) input_file = 'data/去除2和null.xlsx' vocab_file = os.path.join(model_path, 'vocab_label.pkl') # 获取原始excel数据 QAs = get_excel_QAs( input_file) # 要求excel文件格式,第一个表,第一列id,第二列query,第三列response # 分配训练和验证数据集 thres = int(0.8 * len(QAs)) train_QAs = QAs[:thres] val_QAs = QAs[thres:] # 数据处理 text = get_QAs_text(train_QAs) converter = TextConverter(text, vocab_file, max_vocab=Config.vocab_max_size, seq_length=Config.seq_length) print('vocab size:', converter.vocab_size) # 产生训练样本 train_QA_arrs = converter.QAs_to_arr(train_QAs) train_g = converter.batch_generator(train_QA_arrs, Config.batch_size) # 产生验证样本 val_QA_arrs = converter.QAs_to_arr(val_QAs) val_g = converter.val_samples_generator(val_QA_arrs, Config.batch_size) # 加载上一次保存的模型 model = Model(Config, converter.vocab_size) checkpoint_path = tf.train.latest_checkpoint(model_path) if checkpoint_path: model.load(checkpoint_path) print('start to training...') model.train(train_g, model_path, val_g)
# 数据处理 converter = TextConverter(train_files, save_file, max_vocab=Config.vocab_size, seq_length=Config.seq_length) print('vocab size:', converter.vocab_size) print('labels:', converter.label) train_texts, train_labels = converter.load_data(train_files) train_x, train_x_len, train_y = converter.texts_to_arr( train_texts, train_labels) val_texts, val_labels = converter.load_data(val_files) val_x, val_x_len, val_y = converter.texts_to_arr(val_texts, val_labels) # 产生训练样本 train_g = converter.batch_generator(train_x, train_x_len, train_y, Config.batch_size) val_g = converter.val_samples_generator(val_x, val_x_len, val_y, Config.batch_size) model = Model(Config) # 加载上一次保存的模型 checkpoint_path = tf.train.latest_checkpoint(model_path) if checkpoint_path: model.load(checkpoint_path) print('start to training...') model.train(train_g, model_path, val_g)
train_files = '../data/cnews.train.txt' val_files = '../data/cnews.val.txt' test_files = '../data/cnews.test.txt' save_file = 'cnews.vocab_label.pkl' # 数据处理 converter = TextConverter(train_files, save_file, max_vocab=Config.vocab_size, seq_length=Config.seq_length) print('vocab size:', converter.vocab_size) print('labels:', converter.label) test_texts, test_labels = converter.load_data(test_files) test_x, test_x_len, test_y = converter.texts_to_arr( test_texts, test_labels) test_g = converter.val_samples_generator(test_x, test_x_len, test_y, Config.batch_size) model = Model(Config) # 加载上一次保存的模型 checkpoint_path = tf.train.latest_checkpoint(model_path) if checkpoint_path: model.load(checkpoint_path) print('start to testing...') model.test(test_g)