Esempio n. 1
0
    exit(-1)

validate_config(dataMap)
config = Config(**dataMap)

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Could be replaced with tf.one_hot
# For Data preprocessing fall back to keras API's for the moment.
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

x_test = np.reshape(x_test, (-1, 784))
x_train = np.reshape(x_train, (-1, 784))

x_train = x_train / 255
x_test = x_test / 255

dataSet = Dataset()
dataSet.set_test_data((x_test, y_test))
dataSet.set_train_data((x_train, y_train))
dataSet.prepare_data(shuffle_all=True)

feed_forward = FeedForward(config, dataSet)
feed_forward.build()
feed_forward.compile()
feed_forward.train()

# from utils.proto_maker import proto_maker
# proto_maker(model,config)
Esempio n. 2
0
def train():
    options = vars(args)
    for opt, val in options.items():
        print("[{}] = {}".format(opt, val))
    dataset = Dataset(options)

    device = "cuda" if args.use_cuda else "cpu"
    model = Model(options)
    model.to(device)
    dev_batches = dataset.get_batches(dataset.dev_data,
                                      args.batch_size,
                                      training=False)

    if args.eval:
        load_model_name = os.path.join(args.model_path, args.model_name)
        state_dict = torch.load(load_model_name, map_location=device)
        model.load_state_dict(state_dict)
        model.eval()

        all_results = model.get_results(dev_batches, dataset.dev_features)
        model_name = args.model_name
        output_prediction_file = os.path.join(
            args.result_path, "{}_predictions.json".format(model_name))
        output_nbest_file = os.path.join(
            args.result_path, "{}_nbest_predictions.json".format(model_name))

        write_predictions(dataset.dev_examples, dataset.dev_features,
                          all_results, args.n_best_size,
                          args.max_answer_length, output_prediction_file,
                          output_nbest_file)
        with open(os.path.join(args.data_path, "KorQuAD_v1.0_dev.json"),
                  "r") as fp:
            eval_dataset = json.load(fp)["data"]

        with open(output_prediction_file) as prediction_file:
            predictions = json.load(prediction_file)

        eval_info = evaluate(eval_dataset, predictions)
        print("[Dev] EM = {:.2f}%, F1 = {:.2f}%".format(
            eval_info["exact_match"], eval_info["f1"]))
        exit()

    if not args.split_train_data:
        train_batches = dataset.get_batches(dataset.train_data,
                                            args.batch_size)
        total_train_size = len(train_batches)
    else:
        total_train_size = 0
        for i in range(1, 3):
            with open(
                    os.path.join(args.data_path,
                                 "train_features_{}.pkl".format(i)),
                    "rb") as fp:
                train_features = pkl.load(fp)
                total_train_size += len(train_features)
                del train_features
        total_train_size = math.ceil(total_train_size / args.batch_size)

    bert_parameters = []
    parameters = []
    for param in model.named_parameters():
        if param[1].requires_grad:
            if "bert" in param[0]:
                bert_parameters.append(param)
            else:
                parameters.append(param[1])

    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in bert_parameters if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in bert_parameters if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]

    bert_lrate = args.bert_lrate
    warmup_proportion = args.warmup_proportion
    num_train_steps = total_train_size * args.epochs
    bert_optimizer = AdamW(optimizer_grouped_parameters, lr=bert_lrate)

    lrate = args.lrate
    warmup_step = math.ceil(warmup_proportion * num_train_steps)

    optimizer = torch.optim.Adamax(parameters, lr=lrate, betas=(0.9, 0.999))
    bert_scheduler = get_linear_schedule_with_warmup(bert_optimizer,
                                                     warmup_step,
                                                     num_train_steps)

    best_f1 = 0.0
    dev_batches = dataset.get_batches(dataset.dev_data,
                                      args.batch_size,
                                      training=False)

    for epoch in range(1, args.epochs + 1):
        model.train()
        if not args.split_train_data:
            pbar = tqdm(train_batches, total=total_train_size)
            for i, batch_data in enumerate(pbar):
                loss = model(batch_data)
                loss.backward()

                optimizer.step()

                bert_optimizer.step()
                bert_scheduler.step()

                optimizer.zero_grad()
                bert_optimizer.zero_grad()
                model.zero_grad()

                if i % 100 == 0:
                    pbar.set_description(
                        "[Epoch {}] Step = {} / {}, Loss = {:.5f}".format(
                            epoch, i, total_train_size, loss))

        else:
            for num_feat in range(1, 3):
                with open(
                        os.path.join(args.data_path,
                                     "train_features_{}.pkl".format(num_feat)),
                        "rb") as fp:
                    train_features = pkl.load(fp)
                    train_data = dataset.prepare_data(train_features)
                    train_batches = dataset.get_batches(
                        train_data, args.batch_size)
                    del train_features, train_data

                train_size = len(train_batches)
                pbar = tqdm(train_batches, total=train_size)
                for i, batch_data in enumerate(pbar):
                    loss = model(batch_data)
                    loss.backward()

                    optimizer.step()

                    bert_optimizer.step()
                    bert_scheduler.step()

                    optimizer.zero_grad()
                    bert_optimizer.zero_grad()
                    model.zero_grad()
                    if i % 100 == 0:
                        pbar.set_description(
                            "[Epoch {}] Step = {} / {}, Loss = {:.5f}".format(
                                epoch, i, train_size, loss))

        model.eval()
        all_results = model.get_results(dev_batches, dataset.dev_features)
        model_name = args.model_name
        output_prediction_file = os.path.join(
            args.result_path, "{}_predictions.json".format(model_name))
        output_nbest_file = os.path.join(
            args.result_path, "{}_nbest_predictions.json".format(model_name))

        write_predictions(dataset.dev_examples, dataset.dev_features,
                          all_results, args.n_best_size,
                          args.max_answer_length, output_prediction_file,
                          output_nbest_file)

        with open(os.path.join(args.data_path, "KorQuAD_v1.0_dev.json"),
                  "r") as fp:
            eval_dataset = json.load(fp)["data"]

        with open(output_prediction_file) as prediction_file:
            predictions = json.load(prediction_file)

        eval_info = evaluate(eval_dataset, predictions)
        print("[Epoch {}] EM = {:.2f}%, F1 = {:.2f}%".format(
            epoch, eval_info["exact_match"], eval_info["f1"]))

        if best_f1 < eval_info["f1"]:
            best_f1 = eval_info["f1"]
            save_model_name = os.path.join(args.model_path, args.model_name)
            state_dict = model.state_dict()
            torch.save(state_dict, save_model_name)

        if epoch % args.decay_period == 0:
            lrate = lrate * args.decay
            for group in optimizer.param_groups:
                group['lr'] = lrate