Ejemplo n.º 1
0
def predict():
    if request.is_json:
        # Parse the JSON into a Python dictionary
        req = request.get_json()

        # Print the dictionary
        print(req.get('prep'))
        text = str(req.get('prep'))
        if not text:
            return 'Empty strings are not allowed', 400
        commands = do_predict(text)
        print(commands)
        if not commands:
            return 'You should provide a command containing an action, a house element and a location', 400
        # Return a string along with an HTTP status code
        res = make_response(jsonify(commands), 200)
        return res
    return 'bad request!', 400
Ejemplo n.º 2
0
import paddle
import paddle.fluid as fluid

from eval import do_eval
from train import do_train
from predict import do_predict
from inference_model import do_save_inference_model

from dgu.utils.configure import PDConfig

if __name__ == "__main__":

    args = PDConfig(yaml_file="./data/config/dgu.yaml")
    args.build()
    args.Print()

    if args.do_train:
        do_train(args)

    if args.do_predict:
        do_predict(args)

    if args.do_eval:
        do_eval(args)

    if args.do_save_inference_model:
        do_save_inference_model(args)

# vim: set ts=4 sw=4 sts=4 tw=100:
Ejemplo n.º 3
0
def do_train():
    paddle.set_device(args.device)
    rank = paddle.distributed.get_rank()
    if paddle.distributed.get_world_size() > 1:
        paddle.distributed.init_parallel_env()

    set_seed(args.seed)

    train_ds, public_test_ds, test_ds = load_dataset("fewclue",
                                                     name=args.task_name,
                                                     splits=("train_0",
                                                             "test_public",
                                                             "test"))

    model = ppnlp.transformers.ErnieForSequenceClassification.from_pretrained(
        'ernie-1.0', num_classes=2)
    tokenizer = ppnlp.transformers.ErnieTokenizer.from_pretrained('ernie-1.0')

    processor = processor_dict[args.task_name](args.negative_num)
    train_ds = processor.get_train_datasets(train_ds,
                                            TASK_LABELS_DESC[args.task_name])

    public_test_ds = processor.get_dev_datasets(
        public_test_ds, TASK_LABELS_DESC[args.task_name])
    test_ds = processor.get_test_datasets(test_ds,
                                          TASK_LABELS_DESC[args.task_name])

    # [src_ids, token_type_ids, labels]
    batchify_fn = lambda samples, fn=Tuple(
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # src_ids
        Pad(axis=0, pad_val=tokenizer.pad_token_type_id),  # token_type_ids
        Stack(dtype="int64"),  # labels
    ): [data for data in fn(samples)]

    # [src_ids, token_type_ids]
    predict_batchify_fn = lambda samples, fn=Tuple(
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # src_ids
        Pad(axis=0, pad_val=tokenizer.pad_token_type_id),  # token_type_ids
    ): [data for data in fn(samples)]

    trans_func = partial(convert_example,
                         tokenizer=tokenizer,
                         max_seq_length=args.max_seq_length)

    predict_trans_func = partial(convert_example,
                                 tokenizer=tokenizer,
                                 max_seq_length=args.max_seq_length,
                                 is_test=True)

    train_data_loader = create_dataloader(train_ds,
                                          mode='train',
                                          batch_size=args.batch_size,
                                          batchify_fn=batchify_fn,
                                          trans_fn=trans_func)

    public_test_data_loader = create_dataloader(public_test_ds,
                                                mode='eval',
                                                batch_size=args.batch_size,
                                                batchify_fn=batchify_fn,
                                                trans_fn=trans_func)

    test_data_loader = create_dataloader(test_ds,
                                         mode='eval',
                                         batch_size=args.batch_size,
                                         batchify_fn=predict_batchify_fn,
                                         trans_fn=predict_trans_func)

    if args.init_from_ckpt and os.path.isfile(args.init_from_ckpt):
        state_dict = paddle.load(args.init_from_ckpt)
        model.set_dict(state_dict)
        print("warmup from:{}".format(args.init_from_ckpt))

    num_training_steps = len(train_data_loader) * args.epochs

    lr_scheduler = LinearDecayWithWarmup(args.learning_rate,
                                         num_training_steps,
                                         args.warmup_proportion)

    # Generate parameter names needed to perform weight decay.
    # All bias and LayerNorm parameters are excluded.
    decay_params = [
        p.name for n, p in model.named_parameters()
        if not any(nd in n for nd in ["bias", "norm"])
    ]
    optimizer = paddle.optimizer.AdamW(
        learning_rate=lr_scheduler,
        parameters=model.parameters(),
        weight_decay=args.weight_decay,
        apply_decay_param_fun=lambda x: x in decay_params)

    criterion = paddle.nn.loss.CrossEntropyLoss()

    global_step = 0
    tic_train = time.time()
    for epoch in range(1, args.epochs + 1):
        model.train()
        for step, batch in enumerate(train_data_loader, start=1):

            src_ids, token_type_ids, labels = batch

            prediction_scores = model(input_ids=src_ids,
                                      token_type_ids=token_type_ids)

            loss = criterion(prediction_scores, labels)

            global_step += 1
            if global_step % 10 == 0 and rank == 0:
                print(
                    "global step %d, epoch: %d, batch: %d, loss: %.5f, speed: %.2f step/s"
                    % (global_step, epoch, step, loss, 10 /
                       (time.time() - tic_train)))
                tic_train = time.time()

            if global_step % args.save_steps == 0 and rank == 0:
                save_dir = os.path.join(args.save_dir,
                                        "model_%d" % global_step)
                if not os.path.exists(save_dir):
                    os.makedirs(save_dir)
                save_param_path = os.path.join(save_dir,
                                               'model_state.pdparams')
                paddle.save(model.state_dict(), save_param_path)
                tokenizer.save_pretrained(save_dir)

            loss.backward()
            optimizer.step()
            lr_scheduler.step()
            optimizer.clear_grad()

        test_public_accuracy, total_num = do_evaluate(
            model,
            tokenizer,
            public_test_data_loader,
            task_label_description=TASK_LABELS_DESC[args.task_name])

        print("epoch:{}, dev_accuracy:{:.3f}, total_num:{}".format(
            epoch, test_public_accuracy, total_num))

        y_pred_labels = do_predict(
            model,
            tokenizer,
            test_data_loader,
            task_label_description=TASK_LABELS_DESC[args.task_name])

        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)

        output_file = os.path.join(args.output_dir,
                                   str(epoch) + predict_file[args.task_name])

        write_fn[args.task_name](args.task_name, output_file, y_pred_labels)

        if rank == 0:
            save_dir = os.path.join(args.save_dir, "model_%d" % global_step)
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
            save_param_path = os.path.join(save_dir, 'model_state.pdparams')
            paddle.save(model.state_dict(), save_param_path)
            tokenizer.save_pretrained(save_dir)
Ejemplo n.º 4
0
import sys
import numpy as np
import paddle
import paddle.fluid as fluid

from arg_config import ArgConfig, print_arguments

from train import do_train
from predict import do_predict
from eval import do_eval
from inference_model import do_save_inference_model

if __name__ == "__main__":

    args = ArgConfig()
    args = args.build_conf()
    print_arguments(args)

    if args.do_train:
        do_train(args)

    if args.do_predict:
        predictions = do_predict(args)

        if args.do_eval:
            acc = do_eval(args, predictions)
            print("evaluation accuaracy %.3f percent" % (acc * 100))

    if args.do_save_inference_model:
        do_save_inference_model(args)
from predict import do_predict
import time
from sys import stdin

print("<<<< DuoCloud Crypto Currency Market Predict >>>>")
print("* Use LSTM Neural Network to Predict BTC and ETH Tomorrow Price *")
print("Hello, and Welcome Use!")
print('First, did you want train model or load weights?(y or anything)')
need_train = stdin.readline() == 'y'
need_save = True

if need_train:
    print("Training Begin date (yyyy-MM-dd):")
    begin_date = stdin.readline() or '20150101'

    print("Training End date (default now):")
    end_date = stdin.readline() or time.strftime("%Y%m%d")

    print("Did you want save parameters and weight?")
    need_save = stdin.readline() == 'y'

print("===== Preparing to Predict =====")

do_predict(need_train, need_save)