exit(-1) validate_config(dataMap) config = Config(**dataMap) (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() # Could be replaced with tf.one_hot # For Data preprocessing fall back to keras API's for the moment. y_train = tf.keras.utils.to_categorical(y_train, 10) y_test = tf.keras.utils.to_categorical(y_test, 10) x_test = np.reshape(x_test, (-1, 784)) x_train = np.reshape(x_train, (-1, 784)) x_train = x_train / 255 x_test = x_test / 255 dataSet = Dataset() dataSet.set_test_data((x_test, y_test)) dataSet.set_train_data((x_train, y_train)) dataSet.prepare_data(shuffle_all=True) feed_forward = FeedForward(config, dataSet) feed_forward.build() feed_forward.compile() feed_forward.train() # from utils.proto_maker import proto_maker # proto_maker(model,config)
def train(): options = vars(args) for opt, val in options.items(): print("[{}] = {}".format(opt, val)) dataset = Dataset(options) device = "cuda" if args.use_cuda else "cpu" model = Model(options) model.to(device) dev_batches = dataset.get_batches(dataset.dev_data, args.batch_size, training=False) if args.eval: load_model_name = os.path.join(args.model_path, args.model_name) state_dict = torch.load(load_model_name, map_location=device) model.load_state_dict(state_dict) model.eval() all_results = model.get_results(dev_batches, dataset.dev_features) model_name = args.model_name output_prediction_file = os.path.join( args.result_path, "{}_predictions.json".format(model_name)) output_nbest_file = os.path.join( args.result_path, "{}_nbest_predictions.json".format(model_name)) write_predictions(dataset.dev_examples, dataset.dev_features, all_results, args.n_best_size, args.max_answer_length, output_prediction_file, output_nbest_file) with open(os.path.join(args.data_path, "KorQuAD_v1.0_dev.json"), "r") as fp: eval_dataset = json.load(fp)["data"] with open(output_prediction_file) as prediction_file: predictions = json.load(prediction_file) eval_info = evaluate(eval_dataset, predictions) print("[Dev] EM = {:.2f}%, F1 = {:.2f}%".format( eval_info["exact_match"], eval_info["f1"])) exit() if not args.split_train_data: train_batches = dataset.get_batches(dataset.train_data, args.batch_size) total_train_size = len(train_batches) else: total_train_size = 0 for i in range(1, 3): with open( os.path.join(args.data_path, "train_features_{}.pkl".format(i)), "rb") as fp: train_features = pkl.load(fp) total_train_size += len(train_features) del train_features total_train_size = math.ceil(total_train_size / args.batch_size) bert_parameters = [] parameters = [] for param in model.named_parameters(): if param[1].requires_grad: if "bert" in param[0]: bert_parameters.append(param) else: parameters.append(param[1]) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in bert_parameters if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in bert_parameters if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] bert_lrate = args.bert_lrate warmup_proportion = args.warmup_proportion num_train_steps = total_train_size * args.epochs bert_optimizer = AdamW(optimizer_grouped_parameters, lr=bert_lrate) lrate = args.lrate warmup_step = math.ceil(warmup_proportion * num_train_steps) optimizer = torch.optim.Adamax(parameters, lr=lrate, betas=(0.9, 0.999)) bert_scheduler = get_linear_schedule_with_warmup(bert_optimizer, warmup_step, num_train_steps) best_f1 = 0.0 dev_batches = dataset.get_batches(dataset.dev_data, args.batch_size, training=False) for epoch in range(1, args.epochs + 1): model.train() if not args.split_train_data: pbar = tqdm(train_batches, total=total_train_size) for i, batch_data in enumerate(pbar): loss = model(batch_data) loss.backward() optimizer.step() bert_optimizer.step() bert_scheduler.step() optimizer.zero_grad() bert_optimizer.zero_grad() model.zero_grad() if i % 100 == 0: pbar.set_description( "[Epoch {}] Step = {} / {}, Loss = {:.5f}".format( epoch, i, total_train_size, loss)) else: for num_feat in range(1, 3): with open( os.path.join(args.data_path, "train_features_{}.pkl".format(num_feat)), "rb") as fp: train_features = pkl.load(fp) train_data = dataset.prepare_data(train_features) train_batches = dataset.get_batches( train_data, args.batch_size) del train_features, train_data train_size = len(train_batches) pbar = tqdm(train_batches, total=train_size) for i, batch_data in enumerate(pbar): loss = model(batch_data) loss.backward() optimizer.step() bert_optimizer.step() bert_scheduler.step() optimizer.zero_grad() bert_optimizer.zero_grad() model.zero_grad() if i % 100 == 0: pbar.set_description( "[Epoch {}] Step = {} / {}, Loss = {:.5f}".format( epoch, i, train_size, loss)) model.eval() all_results = model.get_results(dev_batches, dataset.dev_features) model_name = args.model_name output_prediction_file = os.path.join( args.result_path, "{}_predictions.json".format(model_name)) output_nbest_file = os.path.join( args.result_path, "{}_nbest_predictions.json".format(model_name)) write_predictions(dataset.dev_examples, dataset.dev_features, all_results, args.n_best_size, args.max_answer_length, output_prediction_file, output_nbest_file) with open(os.path.join(args.data_path, "KorQuAD_v1.0_dev.json"), "r") as fp: eval_dataset = json.load(fp)["data"] with open(output_prediction_file) as prediction_file: predictions = json.load(prediction_file) eval_info = evaluate(eval_dataset, predictions) print("[Epoch {}] EM = {:.2f}%, F1 = {:.2f}%".format( epoch, eval_info["exact_match"], eval_info["f1"])) if best_f1 < eval_info["f1"]: best_f1 = eval_info["f1"] save_model_name = os.path.join(args.model_path, args.model_name) state_dict = model.state_dict() torch.save(state_dict, save_model_name) if epoch % args.decay_period == 0: lrate = lrate * args.decay for group in optimizer.param_groups: group['lr'] = lrate