def __init__(self, args, embeddings, tag2label, vocab, paths, config): self.batch_size = args.batch_size self.epoch_num = args.epoch self.hidden_dim = args.hidden_dim self.embeddings = embeddings self.CRF = args.CRF self.update_embedding = args.update_embedding self.dropout_keep_prob = args.dropout self.optimizer = args.optimizer self.lr = args.lr self.clip_grad = args.clip self.tag2label = tag2label self.num_tags = len(tag2label) self.vocab = vocab self.shuffle = args.shuffle self.model_path = paths['model_path'] self.summary_path = paths['summary_path'] self.logger = get_logger(paths['log_path']) self.result_path = paths['result_path'] self.config = config
timestamp = str(int(time.time())) if args.mode == 'train' else args.demo_model output_path = os.path.join('.', args.train_data+"_save", timestamp) if not os.path.exists(output_path): os.makedirs(output_path) summary_path = os.path.join(output_path, "summaries") paths['summary_path'] = summary_path if not os.path.exists(summary_path): os.makedirs(summary_path) model_path = os.path.join(output_path, "checkpoints/") if not os.path.exists(model_path): os.makedirs(model_path) ckpt_prefix = os.path.join(model_path, "model") paths['model_path'] = ckpt_prefix result_path = os.path.join(output_path, "results") paths['result_path'] = result_path if not os.path.exists(result_path): os.makedirs(result_path) log_path = os.path.join(result_path, "log.txt") paths['log_path'] = log_path get_logger(log_path).info(str(args)) ## training Entity Extraction model if args.mode == 'train': model = BiLSTM_CRF(args, embeddings, tag2label, word2id, paths, config=config) model.build_graph() ## hyperparameters-tuning, split train/dev # dev_data = train_data[:5000]; dev_size = len(dev_data) # train_data = train_data[5000:]; train_size = len(train_data) # print("train data: {0}\ndev data: {1}".format(train_size, dev_size)) # model.train(train=train_data, dev=dev_data) ## train model on the whole training data print("train data: {}".format(len(train_data)))