def main(): opt.dataset_file = dataset_files[opt.dataset] # set random seed setup_seed(opt.seed) if not os.path.exists('log'): os.mkdir('log') log_file = '{}-{}-{}.log'.format( opt.model_name, opt.dataset, strftime("%Y-%m-%d_%H:%M:%S", localtime())) logger.addHandler(logging.FileHandler("%s/%s" % ('./log', log_file))) start_time = time.time() ins = Instructor(opt.dataset_file['test_query'], opt.dataset_file['test_reply']) ins.run(opt.dataset_file['train_query'], opt.dataset_file['train_reply']) time_dif = get_time_dif(start_time) logger.info("Time usage: {}".format(time_dif))
if args.embedding == 'random': embedding = 'random' model_name = args.model # 'TextRCNN' # TextCNN x = import_module('models.' + model_name) #一个函数运行需要根据不同项目的配置,动态导入对应的配置文件运行。 config = x.Config(dataset) #进入到对应模型的__init__方法进行参数初始化 start_time = time.time() print("Loading data...") train_data, dev_data, test_data, train_sentences, test_sentences, dev_sentences, word_to_id, id_to_word, tag_to_id, id_to_tag = load_model_dataset( config) config.n_vocab = len(word_to_id) time_dif = data_utils.get_time_dif(start_time) print("Time usage:", time_dif) embedding_pretrained = data_utils.load_word2vec(config, id_to_word) train_X, train_Y = data_utils.get_X_and_Y_data(train_data, config.max_len, len(tag_to_id)) dev_X, dev_Y = data_utils.get_X_and_Y_data(dev_data, config.max_len, len(tag_to_id)) test_X, test_Y = data_utils.get_X_and_Y_data(test_data, config.max_len, len(tag_to_id)) train_dataset = tf.data.Dataset.from_tensor_slices((train_X, train_Y)) train_dataset = train_dataset.shuffle(len(train_X)).batch(