Esempio n. 1
0
def main():
    """
    Launches translation (inference).
    Inference is executed on a single GPU, implementation supports beam search
    with length normalization and coverage penalty.
    """
    args = parse_args()
    args.batch_first = False

    if args.cuda:
        torch.cuda.set_device(0)
    if not args.cuda and torch.cuda.is_available():
        warnings.warn('cuda is available but not enabled')
    if args.math == 'fp16' and not args.cuda:
        raise RuntimeError('fp16 requires cuda')
    if not args.cudnn:
        torch.backends.cudnn.enabled = False

    num_stages = args.num_stages
    # compute BLEU score for every epoch
    print("Epoch\tBLEU score")
    epoch = 0
    while True:
        # no more epochs to run, since desired file not available
        if not os.path.isfile(
                os.path.join(args.checkpoint_path,
                             f"checkpoint.0.pth.tar.epoch.{epoch}")):
            break

        module = importlib.import_module(args.module)
        model = module.model(None)
        num_modules = len(model)

        key_to_module_mapping = OrderedDict()
        all_stages_state_dict = OrderedDict()
        module_id = 0
        stage_id = 0
        for stage_id in range(num_stages):
            # load the checkpoint associated with a stage
            full_checkpoint_path = os.path.join(
                args.checkpoint_path,
                f"checkpoint.{stage_id}.pth.tar.epoch.{epoch}")
            checkpoint = torch.load(full_checkpoint_path,
                                    map_location=torch.device('cpu'))

            # iterate through all modules in stage_id's checkpoint
            local_module_id = 0

            # quit when checkpoints for all modules in full model are loaded
            while module_id < num_modules:

                # load checkpoint corresponding to different modules in our runtime
                state_dict = checkpoint["state_dict"]
                state_dict_key = "module%d" % local_module_id

                if state_dict_key not in state_dict:
                    break
                state_dict = checkpoint["state_dict"][state_dict_key]

                # remove mask buffer
                keys_to_delete = []
                for key in state_dict:
                    if "mask" in key:
                        keys_to_delete.append(key)
                for key in keys_to_delete:
                    del state_dict[key]

                if checkpoint_from_distributed(state_dict):
                    state_dict = unwrap_distributed(state_dict)

                # collect all state_dicts in a single OrderedDict
                for key in state_dict:
                    all_stages_state_dict[(stage_id, local_module_id,
                                           key)] = state_dict[key]

                stage_module, _, _ = model[module_id]
                for key in state_dict:
                    # key_to_module_mapping maps key (in state_dict) to the
                    # torch.nn.Module wrapping the parameter and the name
                    # of parameter (weight, bias, etc.)
                    key_to_module_mapping[(
                        stage_id, local_module_id,
                        key)] = get_submodule_and_parameter_name(
                            stage_module, key)

                # load tokenizer state
                tokenizer = Tokenizer()
                tokenizer.set_state(checkpoint['tokenizer'])
                vocab_size = tokenizer.vocab_size

                local_module_id += 1
                module_id += 1

        epoch += 1

        # build model, and load state dict
        model_config = {
            'vocab_size': vocab_size,
            'batch_first': args.batch_first,
            'hidden_size': 1024,
            'num_layers': args.num_layers,
            'dropout': 0.2,
            'share_embedding': False
        }
        model = GNMT(**model_config)
        model_state_dict = OrderedDict()
        for real_key in model.state_dict():
            (module, parameter_name) = get_submodule_and_parameter_name(
                model, real_key)
            # find key in all_stages_state_dict that corresponds to real_key in
            # model's state_dict
            for key in key_to_module_mapping:
                (module2, parameter_name2) = key_to_module_mapping[key]
                if parameter_name == parameter_name2 and str(module) == str(
                        module2):
                    break
            if parameter_name == parameter_name2 and str(module) == str(
                    module2):
                model_state_dict[real_key] = all_stages_state_dict[key]
                del key_to_module_mapping[key]
                del all_stages_state_dict[key]

        # load state_dict into model, and perform inference
        model.load_state_dict(model_state_dict)

        if args.math == 'fp32':
            dtype = torch.FloatTensor
        if args.math == 'fp16':
            dtype = torch.HalfTensor

        model.type(dtype)
        model = model.cuda()
        model.eval()

        # construct the dataset
        test_data = TextDataset(src_fname=args.input,
                                tokenizer=tokenizer,
                                sort=False)

        # build the data loader
        test_loader = test_data.get_loader(world_size=1,
                                           rank=0,
                                           batch_size=args.batch_size,
                                           batch_first=args.batch_first,
                                           shuffle=False,
                                           pad=True,
                                           num_workers=0)

        # build the translator object
        translator = Translator(model=model,
                                tokenizer=tokenizer,
                                loader=test_loader,
                                beam_size=args.beam_size,
                                max_seq_len=args.max_seq_len,
                                len_norm_factor=args.len_norm_factor,
                                len_norm_const=args.len_norm_const,
                                cov_penalty_factor=args.cov_penalty_factor,
                                cuda=args.cuda,
                                print_freq=args.print_freq,
                                dataset_dir=args.dataset_dir)

        # execute the inference
        test_bleu, _ = translator.run(calc_bleu=args.bleu,
                                      eval_path=args.output,
                                      reference_path=args.reference,
                                      summary=True)
        print(f'{epoch}\t{test_bleu:.2f}')
Esempio n. 2
0
def main():
    """
    Launches translation (inference).
    Inference is executed on a single GPU, implementation supports beam search
    with length normalization and coverage penalty.
    """
    args = parse_args()
    utils.set_device(args.cuda, args.local_rank)
    utils.init_distributed(args.cuda)
    setup_logging()

    if args.env:
        utils.log_env_info()

    logging.info(f'Run arguments: {args}')

    if not args.cuda and torch.cuda.is_available():
        warnings.warn('cuda is available but not enabled')
    if not args.cudnn:
        torch.backends.cudnn.enabled = False

    # load checkpoint and deserialize to CPU (to save GPU memory)
    checkpoint = torch.load(args.model, map_location={'cuda:0': 'cpu'})

    # build GNMT model
    tokenizer = Tokenizer()
    tokenizer.set_state(checkpoint['tokenizer'])
    vocab_size = tokenizer.vocab_size
    model_config = checkpoint['model_config']
    model_config['batch_first'] = args.batch_first
    model = GNMT(vocab_size=vocab_size, **model_config)
    model.load_state_dict(checkpoint['state_dict'])

    for (math, batch_size, beam_size) in product(args.math, args.batch_size,
                                                 args.beam_size):
        logging.info(f'math: {math}, batch size: {batch_size}, '
                     f'beam size: {beam_size}')
        if math == 'fp32':
            dtype = torch.FloatTensor
        if math == 'fp16':
            dtype = torch.HalfTensor
        model.type(dtype)

        if args.cuda:
            model = model.cuda()
        model.eval()

        # construct the dataset
        test_data = TextDataset(src_fname=args.input,
                                tokenizer=tokenizer,
                                sort=args.sort)

        # build the data loader
        test_loader = test_data.get_loader(batch_size=batch_size,
                                           batch_first=args.batch_first,
                                           shuffle=False,
                                           pad=True,
                                           num_workers=0)

        # build the translator object
        translator = Translator(model=model,
                                tokenizer=tokenizer,
                                loader=test_loader,
                                beam_size=beam_size,
                                max_seq_len=args.max_seq_len,
                                len_norm_factor=args.len_norm_factor,
                                len_norm_const=args.len_norm_const,
                                cov_penalty_factor=args.cov_penalty_factor,
                                cuda=args.cuda,
                                print_freq=args.print_freq,
                                dataset_dir=args.dataset_dir)

        # execute the inference
        translator.run(calc_bleu=args.bleu,
                       eval_path=args.output,
                       reference_path=args.reference,
                       summary=True)
Esempio n. 3
0
def main():
    """
    Launches translation (inference).
    Inference is executed on a single GPU, implementation supports beam search
    with length normalization and coverage penalty.
    """
    args = parse_args()

    # initialize distributed backend
    distributed = args.world_size > 1
    if distributed:
        backend = 'nccl' if args.cuda else 'gloo'
        dist.init_process_group(backend=backend,
                                rank=args.rank,
                                init_method=args.dist_url,
                                world_size=args.world_size)
    setup_logging()
    logging.info(f'Run arguments: {args}')

    if args.cuda:
        torch.cuda.set_device(args.rank)
    if not args.cuda and torch.cuda.is_available():
        warnings.warn('cuda is available but not enabled')
    if args.math == 'fp16' and not args.cuda:
        raise RuntimeError('fp16 requires cuda')
    if not args.cudnn:
        torch.backends.cudnn.enabled = False

    # load checkpoint and deserialize to CPU (to save GPU memory)
    checkpoint = torch.load(args.model, map_location={'cuda:0': 'cpu'})

    # build GNMT model
    tokenizer = Tokenizer()
    tokenizer.set_state(checkpoint['tokenizer'])
    vocab_size = tokenizer.vocab_size
    model_config = dict(vocab_size=vocab_size,
                        math=checkpoint['config'].math,
                        **literal_eval(checkpoint['config'].model_config))
    model_config['batch_first'] = args.batch_first
    model = GNMT(**model_config)

    state_dict = checkpoint['state_dict']
    if checkpoint_from_distributed(state_dict):
        state_dict = unwrap_distributed(state_dict)

    model.load_state_dict(state_dict)

    if args.math == 'fp32':
        dtype = torch.FloatTensor
    if args.math == 'fp16':
        dtype = torch.HalfTensor

    model.type(dtype)
    if args.cuda:
        model = model.cuda()
    model.eval()

    # construct the dataset
    test_data = TextDataset(src_fname=args.input,
                            tokenizer=tokenizer,
                            sort=False)

    # build the data loader
    test_loader = test_data.get_loader(batch_size=args.batch_size,
                                       batch_first=args.batch_first,
                                       shuffle=False,
                                       pad=True,
                                       num_workers=0,
                                       drop_last=False)

    # build the translator object
    translator = Translator(model=model,
                            tokenizer=tokenizer,
                            loader=test_loader,
                            beam_size=args.beam_size,
                            max_seq_len=args.max_seq_len,
                            len_norm_factor=args.len_norm_factor,
                            len_norm_const=args.len_norm_const,
                            cov_penalty_factor=args.cov_penalty_factor,
                            cuda=args.cuda,
                            print_freq=args.print_freq,
                            dataset_dir=args.dataset_dir)

    # execute the inference
    translator.run(calc_bleu=args.bleu,
                   eval_path=args.output,
                   reference_path=args.reference,
                   summary=True)
Esempio n. 4
0
def main():
    """
    Launches translation (inference).
    Inference is executed on a single GPU, implementation supports beam search
    with length normalization and coverage penalty.
    """
    args = parse_args()

    logging.basicConfig(level=logging.DEBUG,
                        format="%(asctime)s - %(levelname)s - %(message)s",
                        datefmt="%Y-%m-%d %H:%M:%S",
                        filename='log.log',
                        filemode='w')
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    logging.info(args)

    if args.cuda:
        torch.cuda.set_device(0)
    if not args.cuda and torch.cuda.is_available():
        warnings.warn('cuda is available but not enabled')
    if args.math == 'fp16' and not args.cuda:
        raise RuntimeError('fp16 requires cuda')
    if not args.cudnn:
        torch.backends.cudnn.enabled = False

    # load checkpoint and deserialize to CPU (to save GPU memory)
    checkpoint = torch.load(args.model, map_location={'cuda:0': 'cpu'})

    # build GNMT model
    tokenizer = checkpoint['tokenizer']
    vocab_size = tokenizer.vocab_size
    model_config = dict(vocab_size=vocab_size,
                        math=checkpoint['config'].math,
                        **literal_eval(checkpoint['config'].model_config))
    model_config['batch_first'] = args.batch_first
    model = GNMT(**model_config)

    state_dict = checkpoint['state_dict']
    if checkpoint_from_distributed(state_dict):
        state_dict = unwrap_distributed(state_dict)

    model.load_state_dict(state_dict)

    if args.math == 'fp32':
        dtype = torch.FloatTensor
    if args.math == 'fp16':
        dtype = torch.HalfTensor

    model.type(dtype)
    if args.cuda:
        model = model.cuda()
    model.eval()

    # construct the dataset
    test_data = TextDataset(src_fname=args.input,
                            tokenizer=tokenizer,
                            sort=False)

    # build the data loader
    test_loader = test_data.get_loader(batch_size=args.batch_size,
                                       batch_first=args.batch_first,
                                       shuffle=False,
                                       num_workers=0,
                                       drop_last=False)

    # build the translator object
    translator = Translator(model=model,
                            tokenizer=tokenizer,
                            loader=test_loader,
                            beam_size=args.beam_size,
                            max_seq_len=args.max_seq_len,
                            len_norm_factor=args.len_norm_factor,
                            len_norm_const=args.len_norm_const,
                            cov_penalty_factor=args.cov_penalty_factor,
                            cuda=args.cuda,
                            print_freq=args.print_freq,
                            dataset_dir=args.dataset_dir)

    # execute the inference
    translator.run(calc_bleu=args.bleu,
                   eval_path=args.output,
                   reference_path=args.reference,
                   summary=True)