Beispiel #1
0
def check_version():
    """
     Log error and exit when the installed version of paddlepaddle is
     not satisfied.
     """
    err = "PaddlePaddle version 1.6 or higher is required, " \
          "or a suitable develop version is satisfied as well. \n" \
          "Please make sure the version is good with your code." \

    try:
        fluid.require_version('1.6.0')
    except Exception as e:
        logger.error(err)
        sys.exit(1)
Beispiel #2
0
    def check_version(self):
        """
        Log error and exit when the installed version of paddlepaddle is
        not satisfied.
        """
        err = "TDM-GPU need Paddle version 1.8 or higher is required, " \
            "or a suitable develop version is satisfied as well. \n" \
            "Please make sure the version is good with your code." \

        try:
            fluid.require_version('1.8.0')
            return True
        except Exception as e:
            print(err)
            return False
Beispiel #3
0
def best_strategy_compiled(args,
                           program,
                           loss,
                           exe,
                           mode="train",
                           share_prog=None):
    """make a program which wrapped by a compiled program
    """

    if os.getenv('FLAGS_use_ngraph'):
        return program
    else:
        build_strategy = fluid.compiler.BuildStrategy()
        try:
            fluid.require_version(min_version='1.7.0')
            build_strategy.fuse_bn_act_ops = args.fuse_bn_act_ops
        except Exception as e:
            logger.info(
                "PaddlePaddle version 1.7.0 or higher is "
                "required when you want to fuse batch_norm and activation_op.")
        build_strategy.fuse_elewise_add_act_ops = args.fuse_elewise_add_act_ops

        exec_strategy = fluid.ExecutionStrategy()

        if args.use_gpu:
            exec_strategy.num_threads = fluid.core.get_cuda_device_count()

        exec_strategy.num_iteration_per_drop_scope = 10

        num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
        if num_trainers > 1 and args.use_gpu:
            dist_utils.prepare_for_multi_process(exe, build_strategy, program)
            # NOTE: the process is fast when num_threads is 1
            # for multi-process training.
            exec_strategy.num_threads = 1

        compiled_program = fluid.CompiledProgram(program).with_data_parallel(
            loss_name=loss.name if mode == "train" else None,
            share_vars_from=share_prog if mode == "val" else None,
            build_strategy=build_strategy,
            exec_strategy=exec_strategy)

        return compiled_program
Beispiel #4
0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import division
import os
import argparse
import ruamel.yaml
import numpy as np
import soundfile as sf

from paddle import fluid
fluid.require_version('1.8.0')
import paddle.fluid.layers as F
import paddle.fluid.dygraph as dg
from tensorboardX import SummaryWriter

from parakeet.g2p import en
from parakeet.modules.weight_norm import WeightNormWrapper
from parakeet.utils.layer_tools import summary
from parakeet.utils import io

from model import make_model
from utils import make_evaluator

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Synthsize waveform with a checkpoint.")
Beispiel #5
0
def main():
    args = parse_args()

    # check if set use_gpu=True in paddlepaddle cpu version
    check_cuda(args.use_gpu)
    # check if paddlepaddle version is satisfied
    check_version()

    logger = logging.getLogger("lm")
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    if args.log_path:
        file_handler = logging.FileHandler(args.log_path)
        file_handler.setLevel(logging.INFO)
        file_handler.setFormatter(formatter)
        logger.addHandler(file_handler)
    else:
        console_handler = logging.StreamHandler()
        console_handler.setLevel(logging.INFO)
        console_handler.setFormatter(formatter)
        logger.addHandler(console_handler)
    logger.info('Running with args : {}'.format(args))

    config = RNNConfig(args)

    if not os.path.exists(args.save_model_dir):
        mkpath(args.save_model_dir)

    # define train program
    main_program = fluid.Program()
    startup_program = fluid.Program()
    if args.enable_ce:
        startup_program.random_seed = SEED
    with fluid.program_guard(main_program, startup_program):
        with fluid.unique_name.guard():
            res_vars = lm_model.lm_model(config.hidden_size,
                                         config.vocab_size,
                                         num_layers=config.num_layers,
                                         num_steps=config.num_steps,
                                         init_scale=config.init_scale,
                                         dropout=config.dropout,
                                         rnn_model=config.rnn_model,
                                         use_dataloader=args.use_dataloader)

            if args.use_dataloader:
                dataloader = res_vars[-1]
                res_vars = res_vars[:-1]
            loss, last_hidden, last_cell, feed_order = res_vars

            fluid.clip.set_gradient_clip(
                clip=fluid.clip.GradientClipByGlobalNorm(
                    clip_norm=config.max_grad_norm))

            learning_rate = fluid.layers.create_global_var(
                name="learning_rate",
                shape=[1],
                value=1.0,
                dtype='float32',
                persistable=True)

            optimizer = fluid.optimizer.SGD(learning_rate=learning_rate)
            optimizer.minimize(loss)

    # define inference program
    inference_program = fluid.Program()
    inference_startup_program = fluid.Program()
    with fluid.program_guard(inference_program, inference_startup_program):
        with fluid.unique_name.guard():
            lm_model.lm_model(config.hidden_size,
                              config.vocab_size,
                              num_layers=config.num_layers,
                              num_steps=config.num_steps,
                              init_scale=config.init_scale,
                              dropout=config.dropout,
                              rnn_model=config.rnn_model,
                              use_dataloader=False)
    # Some op behaves differently for train and inference, we need to call
    # this clone function to ensure every op is right for inference.
    inference_program = inference_program.clone(for_test=True)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = Executor(place)
    exe.run(startup_program)

    if args.init_from_pretrain_model:
        if not os.path.exists(args.init_from_pretrain_model + '.pdparams'):
            print(args.init_from_pretrain_model)
            raise Warning("The pretrained params do not exist.")
            return
        fluid.load(main_program, args.init_from_pretrain_model)
        print("finish initing model from pretrained params from %s" %
              (args.init_from_pretrain_model))

    device_count = len(fluid.cuda_places()) if args.use_gpu else len(
        fluid.cpu_places())

    exec_strategy = fluid.ExecutionStrategy()
    exec_strategy.num_threads = device_count
    exec_strategy.num_iteration_per_drop_scope = 100

    build_strategy = fluid.BuildStrategy()
    build_strategy.fuse_all_optimizer_ops = True
    try:
        fluid.require_version(min_version='1.7.0')
        build_strategy.enable_auto_fusion = args.enable_auto_fusion
    except Exception as e:
        logger.info("PaddlePaddle version 1.7.0 or higher is "
                    "required when you want to enable fusion_group.")

    if args.parallel:
        train_program = fluid.compiler.CompiledProgram(
            main_program).with_data_parallel(loss_name=loss.name,
                                             build_strategy=build_strategy,
                                             exec_strategy=exec_strategy)
    else:
        train_program = fluid.compiler.CompiledProgram(main_program)

    data_path = args.data_path
    print("begin to load data")
    ptb_data = reader.get_ptb_data(data_path)
    print("finished load data")
    train_data, valid_data, test_data = ptb_data

    def generate_init_data():
        batch_size = config.batch_size * device_count
        init_hidden = np.zeros(
            (batch_size, config.num_layers, config.hidden_size),
            dtype='float32')
        init_cell = np.zeros(
            (batch_size, config.num_layers, config.hidden_size),
            dtype='float32')
        return init_hidden, init_cell

    def generate_new_lr(epoch_id=0, device_count=1):
        new_lr = config.base_learning_rate * (config.lr_decay**max(
            epoch_id + 1 - config.epoch_start_decay, 0.0))
        lr = np.ones((device_count), dtype='float32') * new_lr
        return lr

    def prepare_input(batch,
                      init_hidden=None,
                      init_cell=None,
                      epoch_id=0,
                      with_lr=True,
                      device_count=1):
        x, y = batch
        x = x.reshape((-1, config.num_steps, 1))
        y = y.reshape((-1, 1))

        res = {}
        res['x'] = x
        res['y'] = y
        if init_hidden is not None:
            res['init_hidden'] = init_hidden
        if init_cell is not None:
            res['init_cell'] = init_cell
        if with_lr:
            res['learning_rate'] = generate_new_lr(epoch_id, device_count)

        return res

    def eval(data):
        # when eval the batch_size set to 1
        eval_data_iter = reader.get_data_iter(data,
                                              config.batch_size * device_count,
                                              config.num_steps)
        total_loss = 0.0
        iters = 0
        init_hidden, init_cell = generate_init_data()
        for batch_id, batch in enumerate(eval_data_iter):
            input_data_feed = prepare_input(batch,
                                            init_hidden,
                                            init_cell,
                                            epoch_id=0,
                                            with_lr=False)
            fetch_outs = exe.run(
                program=inference_program,
                feed=input_data_feed,
                fetch_list=[loss.name, last_hidden.name, last_cell.name],
                use_program_cache=False)

            cost_eval = np.array(fetch_outs[0])
            init_hidden = np.array(fetch_outs[1])
            init_cell = np.array(fetch_outs[2])

            total_loss += cost_eval
            iters += config.num_steps

        ppl = np.exp(total_loss / iters)
        return ppl

    def get_log_interval(data_len):
        num_batchs = data_len // config.batch_size
        epoch_size = (num_batchs - 1) // config.num_steps
        log_interval = max(1, epoch_size // 10)
        return log_interval

    def train_an_epoch(epoch_id, batch_times):
        # get train epoch size
        log_interval = get_log_interval(len(train_data))
        train_data_iter = reader.get_data_iter(
            train_data, config.batch_size * device_count, config.num_steps)

        total_loss = 0
        iters = 0

        init_hidden, init_cell = generate_init_data()
        for batch_id, batch in enumerate(train_data_iter):
            input_data_feed = prepare_input(batch,
                                            init_hidden=init_hidden,
                                            init_cell=init_cell,
                                            epoch_id=epoch_id,
                                            with_lr=True,
                                            device_count=device_count)
            batch_start_time = time.time()
            fetch_outs = exe.run(train_program,
                                 feed=input_data_feed,
                                 fetch_list=[
                                     loss.name, "learning_rate",
                                     last_hidden.name, last_cell.name
                                 ],
                                 use_program_cache=True)
            batch_time = time.time() - batch_start_time
            batch_times.append(batch_time)

            cost_train = np.array(fetch_outs[0])
            lr = np.array(fetch_outs[1])
            init_hidden = np.array(fetch_outs[2])
            init_cell = np.array(fetch_outs[3])
            total_loss += cost_train
            iters += config.num_steps
            if batch_id > 0 and batch_id % log_interval == 0:
                ppl = np.exp(total_loss / iters)
                print(
                    "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f, lr: %.5f"
                    % (epoch_id, batch_id, batch_time, ppl[0], lr[0]))

            # profiler tools for benchmark
            if args.profile and batch_id == log_interval:
                profiler.reset_profiler()
            elif args.profile and batch_id == (log_interval + 5):
                break
        ppl = np.exp(total_loss / iters)
        return ppl

    def train_an_epoch_dataloader(epoch_id, batch_times):
        # get train epoch size
        log_interval = get_log_interval(len(train_data))

        init_hidden, init_cell = generate_init_data()

        total_loss = 0
        iters = 0

        dataloader.start()
        batch_id = 0
        try:
            while True:
                data_feeds = {}
                if batch_id == 0:
                    batch_time = 0
                    batch_start_time = time.time()
                else:
                    batch_time = time.time() - batch_start_time
                    batch_times.append(batch_time)
                    batch_start_time = time.time()

                new_lr = generate_new_lr(epoch_id, device_count)
                data_feeds['learning_rate'] = new_lr
                data_feeds["init_hidden"] = init_hidden
                data_feeds["init_cell"] = init_cell

                fetch_outs = exe.run(train_program,
                                     feed=data_feeds,
                                     fetch_list=[
                                         loss.name, "learning_rate",
                                         last_hidden.name, last_cell.name
                                     ],
                                     use_program_cache=True)

                cost_train = np.array(fetch_outs[0])
                lr = np.array(fetch_outs[1])
                init_hidden = np.array(fetch_outs[2])
                init_cell = np.array(fetch_outs[3])

                total_loss += cost_train
                iters += config.num_steps
                if batch_id > 0 and (log_interval == 0
                                     or batch_id % log_interval == 0):
                    ppl = np.exp(total_loss / iters)
                    print(
                        "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f, lr: %.5f"
                        % (epoch_id, batch_id, batch_time, ppl[0], lr[0]))

                batch_id += 1
                # profiler tools for benchmark
                if args.profile and batch_id == log_interval:
                    profiler.reset_profiler()
                elif args.profile and batch_id == (log_interval + 5):
                    break
        except fluid.core.EOFException:
            dataloader.reset()

        batch_times.append(time.time() - batch_start_time)
        ppl = np.exp(total_loss / iters)
        return ppl

    def train():
        if args.use_dataloader:

            def data_gen():
                data_iter_size = config.batch_size
                train_batches = reader.get_data_iter(train_data,
                                                     data_iter_size,
                                                     config.num_steps)
                for batch in train_batches:
                    x, y = batch
                    x = x.reshape((-1, config.num_steps, 1))
                    y = y.reshape((-1, 1))
                    yield x, y

            dataloader.set_batch_generator(data_gen)

        total_time = 0.0
        for epoch_id in range(config.max_epoch):
            batch_times = []
            epoch_start_time = time.time()
            if args.use_dataloader:
                train_ppl = train_an_epoch_dataloader(epoch_id, batch_times)
            else:
                train_ppl = train_an_epoch(epoch_id, batch_times)
            epoch_time = time.time() - epoch_start_time
            total_time += epoch_time
            print(
                "\nTrain epoch:[%d]; epoch Time: %.5f; ppl: %.5f; avg_time: %.5f steps/s \n"
                % (epoch_id, epoch_time, train_ppl[0],
                   len(batch_times) / sum(batch_times)))

            # FIXME(zjl): ppl[0] increases as batch_size increases.
            # We should find a better way to calculate ppl by normalizing batch_size.
            if device_count == 1 and config.batch_size <= 20 and epoch_id == 0 and train_ppl[
                    0] > 1000:
                # for bad init, after first epoch, the loss is over 1000
                # no more need to continue
                print(
                    "Parameters are randomly initialized and not good this time because the loss is over 1000 after the first epoch."
                )
                print("Abort this training process and please start again.")
                return

            if epoch_id == config.max_epoch - 1 and args.enable_ce:
                # kpis
                print("ptblm\tlstm_language_model_%s_duration_card%d\t%s" %
                      (args.rnn_model, device_count,
                       total_time / config.max_epoch))
                print("ptblm\tlstm_language_model_%s_loss_card%d\t%s" %
                      (args.rnn_model, device_count, train_ppl[0]))

            if not args.profile:
                # NOTE(zjl): sometimes we have not enough data for eval if batch_size is large, i.e., 2100
                # Just skip to avoid error
                def is_valid_data(data, batch_size, num_steps):
                    data_len = len(data)
                    batch_len = data_len // batch_size
                    epoch_size = (batch_len - 1) // num_steps
                    return epoch_size >= 1

                valid_data_valid = is_valid_data(valid_data, config.batch_size,
                                                 config.num_steps)
                if valid_data_valid:
                    valid_ppl = eval(valid_data)
                    print("Valid ppl: %.5f" % valid_ppl[0])
                else:
                    print(
                        'WARNING: length of valid_data is {}, which is not enough for batch_size {} and num_steps {}'
                        .format(len(valid_data), config.batch_size,
                                config.num_steps))

                save_model_dir = os.path.join(args.save_model_dir,
                                              str(epoch_id))
                if not os.path.exists(save_model_dir):
                    mkpath(save_model_dir)
                save_model_dir = os.path.join(save_model_dir, 'params')

                fluid.save(main_program, save_model_dir)
                print("Saved model to: %s.\n" % save_model_dir)

    with profile_context(args.profile, args.profiler_path):
        train()

    test_ppl = eval(test_data)
    print("Test ppl:", test_ppl[0])
Beispiel #6
0
 def test_input_value_2_1():
     fluid.require_version('1.5', '1.5.2.0.0')
Beispiel #7
0
 def test_input_value_1_1():
     fluid.require_version('1.5', 'string')
Beispiel #8
0
 def test_input_type_1():
     fluid.require_version('0', 200)
Beispiel #9
0
 def test_input_type():
     fluid.require_version(100)
Beispiel #10
0
    def test_check_output(self):
        warnings.warn(
            "paddle.__version__: %s, fluid_version.full_version: %s, fluid_version.major: %s, fluid_version.minor: %s, fluid_version.patch: %s, fluid_version.rc: %s."
            % (paddle.__version__, fluid_version.full_version,
               fluid_version.major, fluid_version.minor, fluid_version.patch,
               fluid_version.rc))
        ori_full_version = fluid_version.full_version
        ori_sep_version = [
            fluid_version.major, fluid_version.minor, fluid_version.patch,
            fluid_version.rc
        ]
        [
            fluid_version.major, fluid_version.minor, fluid_version.patch,
            fluid_version.rc
        ] = ['1', '4', '1', '0']

        fluid.require_version('1')
        fluid.require_version('1.4')
        fluid.require_version('1.4.1.0')

        # any version >= 1.4.1 is acceptable.
        fluid.require_version('1.4.1')

        # if 1.4.1 <= version <= 1.6.0, it is acceptable.
        fluid.require_version(min_version='1.4.1', max_version='1.6.0')

        # only version 1.4.1 is acceptable.
        fluid.require_version(min_version='1.4.1', max_version='1.4.1')

        # if installed version is 0.0.0.0, throw warning and skip the checking.
        [
            fluid_version.major, fluid_version.minor, fluid_version.patch,
            fluid_version.rc
        ] = ['0', '0', '0', '0']
        fluid.require_version('0.0.0')

        fluid_version.full_version = ori_full_version
        [
            fluid_version.major, fluid_version.minor, fluid_version.patch,
            fluid_version.rc
        ] = ori_sep_version
Beispiel #11
0
 def test_version_2():
     fluid.require_version('1.4.0', '1.2')
Beispiel #12
0
 def test_version_1():
     fluid.require_version('0.0.0', '1.4')
Beispiel #13
0
 def test_version():
     fluid.require_version('100')
Beispiel #14
0
 def test_input_value_3():
     fluid.require_version('1.5.2a.0')
Beispiel #15
0
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging

import paddle.fluid as fluid
from ..common import get_logger

_logger = get_logger(__name__, level=logging.INFO)

try:
    fluid.require_version('2.0.0')
    from .quanter import quant_aware, convert, quant_post_static, quant_post_dynamic
    from .quanter import quant_post, quant_post_only_weight
except Exception as e:
    _logger.warning(
        "If you want to use training-aware and post-training quantization, "
        "please use Paddle >= 2.0.0 or develop version")

from .quant_embedding import quant_embedding