Esempio n. 1
0
    def _prepare_sample(self, sample, is_dummy=False):
        if sample == "DUMMY":
            raise Exception(
                "Trying to use an uninitialized 'dummy' batch. This usually indicates "
                "that the total number of batches is smaller than the number of "
                "participating GPUs. Try reducing the batch size or using fewer GPUs."
            )

        if sample is None or len(sample) == 0:
            assert (self._dummy_batch is not None
                    and len(self._dummy_batch) > 0
                    ), "Invalid dummy batch: {}".format(self._dummy_batch)
            sample, _ = self._prepare_sample(self._dummy_batch, is_dummy=True)
            return sample, True

        if self.args['common'].get('on_cpu_convert_precision', False):
            sample = self._fp_convert_sample(sample)

        if self.cuda:
            sample = move_to_cuda(sample)

        if not self.args['common'].get('on_cpu_convert_precision', False):
            sample = self._fp_convert_sample(sample)

        if self._dummy_batch == "DUMMY":
            self._dummy_batch = sample

        return sample, False
Esempio n. 2
0
 def train_step(self, samples, raise_oom=False):
     """Do forward, backward and parameter update."""
     # self._set_seed()
     # seed = self.args['common']['seed'] + self.get_num_updates()
     # torch.manual_seed(seed)
     # if self.cuda:
     #     torch.cuda.manual_seed(seed)
     # self.model.train()
     # self.criterion.train()
     # self.zero_grad()
     # forward and backward pass
     # logging_outputs, sample_size, ooms = [], 0, 0
     for i, sample in enumerate(samples):
         if self.cuda:
             sample = utils.move_to_cuda(sample)
         loss, sample_size_i, logging_output = self.task.train_step(
             sample=sample,
             model=self.model,
             criterion=self.criterion,
             optimizer=self.optimizer,
             update_num=self.get_num_updates(),
             ignore_grad=False,
         )
         loss.backward()
         self.optimizer.step()
         self.optimizer.zero_grad()
     return loss.item()
Esempio n. 3
0
        def get_prev_item(prev_model, task_idx, index):
            src_item = self.prev_tgts[task_idx][index][:-1]
            tgt_item = self.prev_tgts[task_idx][index][1:]
            model.load_state_dict(prev_model)
            with torch.no_grad():
                tmp_src = torch.cat([src_item, torch.LongTensor([self.pad] * (MAX_SENTENCE_LENGTH - len(src_item)))],
                                    dim=-1)
                tmp_src = move_to_cuda(tmp_src[None, ...])
                hidden_reprs = model.extract_features(tmp_src).squeeze(dim=0)

            extend = 0 if self.extends is None else self.extends[index].item()
            if self.attrs_mapping:
                # do not move attr_masks into cuda
                attr_masks = {attr: [] for attr in self.attrs}
                for idx, attr_idx in enumerate(self.attr_indices[index].tolist()[1:][extend:], start=extend):
                    if attr_idx in self.reversed_attrs_mapping:
                        attr_masks[self.reversed_attrs_mapping[attr_idx]].append(idx)
                for attr in self.attrs:
                    attr_masks[attr] = np.array(attr_masks[attr])
            else:
                attr_masks = None

            example = {
                'id': index,
                'source': src_item,
                'target': tgt_item,

                'hidden_reprs': hidden_reprs,
                'attr_masks': attr_masks,
                'extend': extend,
            }
            return example
Esempio n. 4
0
def gen_outputs(args, task, trainer):
    # cause some data mighe been filtered by max_source/target_position
    tmp_cache = [
        [8 * [0] for _ in range(6)],  # topk idx
        [8 * [0] for _ in range(6)],  # topk prob
    ]

    trainer.model.eval()
    itr = task.get_batch_iterator(
        dataset=task.dataset('train'),
        max_tokens=args['dataset']['max_tokens'],
        max_sentences=args['dataset']['max_sentences_valid'],
        # max_sentences=16,
        max_positions=utils.resolve_max_positions(
            task.max_positions(),
            trainer.get_model().max_positions(),
        ),
        ignore_invalid_inputs=args['dataset']
        ['skip_invalid_size_inputs_valid_test'],
        # required_batch_size_multiple=8,
        seed=args['common']['seed'],
        num_shards=args['distributed_training']['distributed_world_size'],
        shard_id=args['distributed_training']['distributed_rank'],
    ).next_epoch_itr(shuffle=False)

    # outputs = [None for _ in range(len(task.dataset('train')))]
    outputs = {}
    for sample in tqdm(itr, mininterval=5):
        with torch.no_grad():
            if sample is None or len(sample) == 0:
                continue
            sample = utils.move_to_cuda(sample)

            bs, srclen = sample['net_input']['src_tokens'].shape
            output = trainer.model(**sample['net_input'])[0].detach()
            non_padding_mask = sample['target'].ne(
                task.target_dictionary.pad()).cpu()
            _, tgtlen = sample['target'].shape
            topk_idx, topk_v = output2topk(output, args['kd']['distill_topk'])
            topk_x_shape = (bs, tgtlen, args['kd']['distill_topk'])
            topk_idx, topk_v = topk_idx.view(
                *topk_x_shape).cpu().numpy(), topk_v.view(
                    *topk_x_shape).cpu().numpy()
            non_padding_mask = non_padding_mask.view(
                *topk_x_shape[:2]).cpu().numpy().astype(bool)
            for b in range(bs):
                outputs[sample['id'][b].item()] = \
                    topk_idx[b, non_padding_mask[b]].tolist(), \
                    topk_v[b, non_padding_mask[b]].tolist()

    return [
        outputs[idx] if idx in outputs else tmp_cache
        for idx in list(range(len(task.dataset('train'))))
    ]
Esempio n. 5
0
    def valid_step(self, sample, raise_oom=False):
        """Do forward pass in evaluation mode."""

        with torch.no_grad():
            self.model.eval()
            self.criterion.eval()

            if self.cuda:
                sample = utils.move_to_cuda(sample)

            # _loss, sample_size, logging_output
            predictions, loss, sample_size, logging_output = self.task.valid_step(
                sample, self.model, self.criterion)
        return predictions, loss, sample_size, logging_output
Esempio n. 6
0
def main(model_path, input):
    args, task, model, use_cuda = load_state(model_path)
    generator = task.build_generator(args)
    # encode input (and feed into gpu)
    input = task.encode_input(input)
    if use_cuda:
        input = utils.move_to_cuda(input)
    # feed input into model
    output = generator.generate(models=[model], sample=input)
    # decode
    # from ipdb import set_trace
    # set_trace()
    output = task.decode_output(output)
    del task, model  # to release memory in cpu/gpu
    return output
Esempio n. 7
0
def summarization_task(args, task, model, use_cuda, input, **kwargs):
    from ncc.tokenizers import tokenization
    generator = task.build_generator([model], args)
    # encode input (and feed into gpu)
    # input = task.encode_input(input, tokenizer=tokenization._space_dpu_sub_tokenizer)
    input = task.encode_input(input,
                              tokenizer=tokenization._space_dpu_sub_tokenizer)
    if use_cuda:
        input = utils.move_to_cuda(input)
    # feed input into model
    output = generator.generate(models=[model], sample=input)
    # decode
    # from ipdb import set_trace
    # set_trace()
    output = task.decode_output(output)
    del task, model  # to release memory in cpu/gpu
    return output
Esempio n. 8
0
def completion_task(args, task, model, use_cuda, input, **kwargs):
    generator = task.build_generator([model], args)
    # encode input (and feed into gpu)
    input = task.encode_input(input, tokenizer=tokenization._space_tokenizer)
    if use_cuda:
        input = utils.move_to_cuda(input)
    # feed input into model
    output = generator.generate(models=[model], sample=input)
    # decode
    # from ipdb import set_trace
    # set_trace()
    output = task.decode_output(output)
    del task, model  # to release memory in cpu/gpu
    top_tokens, probabilities = zip(*output)
    return {
        'top_tokens': top_tokens,
        'probabilities': probabilities,
    }
Esempio n. 9
0
def main(model_path, input):
    LOGGER.info('Load model from {}'.format(model_path))
    state = load_checkpoint_to_cpu(model_path, arg_overrides={})
    args = state["args"]
    task = tasks.setup_task(args)  # load src/tgt dicts
    model = task.build_model(args)
    model.load_state_dict(state["model"])
    use_cuda = torch.cuda.is_available() and not args['common']['cpu']
    use_cuda = 0
    if use_cuda:
        torch.cuda.empty_cache()
        torch.cuda.set_device(torch.cuda.device_count() - 1)
        model.cuda()
    model.eval()
    if args['common']['fp16'] and use_cuda:
        model.half()

    # TODO: source tensor should be handled in corresponding task scripts. here we only use seq2seq pipeline for instance.

    intput_ids = task.target_dictionary.encode_string(input, line_tokenizer=None, add_if_not_exist=False)
    src_input_ids = intput_ids.long().unsqueeze(dim=0)

    sample = {
        'net_input': {
            'src_tokens': src_input_ids,
        },
    }
    sample = utils.move_to_cuda(sample) if use_cuda else sample
    generator = task.sequence_completor
    net_output = generator.complete(models=[model], sample=sample)

    # from ipdb import set_trace
    # set_trace()

    pred_prob = torch.softmax(net_output[0][0, -1, :], dim=-1)
    topk_prob, topk_idx = pred_prob.topk(k=10, dim=-1)
    # remove unk/eos/bos/pad
    topk_info = [(round(prob.item(), 6), idx.item()) for prob, idx in zip(topk_prob, topk_idx)][:5]
    topk_info = [(task.target_dictionary[idx], prob) for prob, idx in topk_info]
    pred_sentence = [
        (input[:-1] + [topk_token], topk_prob)
        for topk_token, topk_prob in topk_info
    ]
    return topk_info, pred_sentence
Esempio n. 10
0
def main(model_path, input):
    state = load_checkpoint_to_cpu(model_path, arg_overrides={})
    args = state["args"]
    task = tasks.setup_task(args)  # load src/tgt dicts
    model = task.build_model(args)
    model.load_state_dict(state["model"])
    use_cuda = torch.cuda.is_available() and not args['common']['cpu']
    if use_cuda:
        torch.cuda.empty_cache()
        torch.cuda.set_device(torch.cuda.device_count() - 1)
        model.cuda()
    if args['common']['fp16'] and use_cuda:
        model.half()
    model.eval()

    # TODO: source tensor should be handled in corresponding task scripts. here we only use seq2seq pipeline for instance.
    src_input_ids = task.src_dict.encode_line(input,
                                              line_tokenizer=None,
                                              add_if_not_exist=False)
    src_input_ids = torch.cat([
        src_input_ids[:args['task']['max_source_positions'] - 1],
        torch.Tensor([task.src_dict.eos()]).long()
    ])
    padding_size = args['task']['max_source_positions'] - len(src_input_ids)
    if padding_size > 0:
        src_input_ids = torch.cat([
            src_input_ids,
            torch.Tensor([task.src_dict.pad()] * padding_size).long()
        ])
    if use_cuda:
        src_input_ids = src_input_ids.unsqueeze(dim=0).cuda()
    sample = {
        'net_input': {
            'src_tokens': src_input_ids,
            'src_lengths':
            torch.LongTensor([s.numel() for s in src_input_ids]),
        },
    }
    sample = utils.move_to_cuda(sample) if use_cuda else sample
    generator = task.build_generator(args)
    pred_sentence_ids = generator.generate(models=[model], sample=sample)
    pred_sentence = task.tgt_dict.string(pred_sentence_ids[0][0]['tokens'])
    return pred_sentence
Esempio n. 11
0
    def _prepare_sample(self, sample):
        if sample == "DUMMY":
            raise Exception(
                "Trying to use an uninitialized 'dummy' batch. This usually indicates "
                "that the total number of batches is smaller than the number of "
                "participating GPUs. Try reducing the batch size or using fewer GPUs."
            )

        if sample is None or len(sample) == 0:
            return None

        if self.cuda:
            sample = utils.move_to_cuda(sample)

        def apply_half(t):
            if t.dtype is torch.float32:
                return t.half()
            return t

        if self.args['common']['fp16']:
            sample = utils.apply_to_sample(apply_half, sample)

        return sample
Esempio n. 12
0
def main(model_path, input):
    LOGGER.info('Load model from {}'.format(model_path))
    state = load_checkpoint_to_cpu(model_path, arg_overrides={})
    args = state["args"]
    args = recursive_contractuser(args, old_cache_name='.ncc')
    args = recursive_expanduser(args)
    task = tasks.setup_task(args)  # load src/tgt dicts
    model = task.build_model(args)
    model.load_state_dict(state["model"])
    use_cuda = torch.cuda.is_available() and not args['common']['cpu']
    if use_cuda:
        torch.cuda.empty_cache()
        torch.cuda.set_device(torch.cuda.device_count() - 1)
        model.cuda()
    model.eval()
    if args['common']['fp16'] and use_cuda:
        model.half()

    sample = task.encode_input(input)
    sample = utils.move_to_cuda(sample) if use_cuda else sample
    generator = task.sequence_completor
    net_output = generator.complete(models=[model], sample=sample)
    out = task.decode_output(net_output)
    return out
Esempio n. 13
0
def main(args, out_file=None):
    use_cuda = torch.cuda.is_available() and not args['common']['cpu']

    # Load dataset splits
    task = tasks.setup_task(args)
    task.load_dataset(args['dataset']['gen_subset'])

    # Set dictionaries
    src_dict = task.source_dictionary
    tgt_dict = task.target_dictionary

    # Load ensemble
    LOGGER.info('loading model(s) from {}'.format(args['eval']['path']))
    models, _ = checkpoint_utils.load_model_ensemble(
        utils.split_paths(args['eval']['path']),
        arg_overrides=eval(args['eval']['model_overrides']),
        task=task,
    )

    # Optimize ensemble for generation
    for model in models:
        model.make_generation_fast_(
            beamable_mm_beam_size=None
            if args['eval']['no_beamable_mm'] else args['eval']['beam'],
            need_attn=args['eval']['print_alignment'],
        )

        if use_cuda:
            device = os.environ.get('CUDA_VISIBALE_DEVICES',
                                    [0])[0]  # get first device as default
            torch.cuda.set_device(f'cuda:{device}')
            model = model.cuda()
        if args['common']['fp16'] and use_cuda:
            model.half()

    # Load dataset (possibly sharded)
    itr = task.get_batch_iterator(
        dataset=task.dataset(args['dataset']['gen_subset']),
        max_tokens=args['dataset']['max_tokens'],
        max_sentences=args['eval']['max_sentences'],
        max_positions=utils.resolve_max_positions(
            task.max_positions(),
            *[model.max_positions() for model in models]),
        ignore_invalid_inputs=args['dataset']
        ['skip_invalid_size_inputs_valid_test'],
        required_batch_size_multiple=args['dataset']
        ['required_batch_size_multiple'],
        num_shards=args['dataset']['num_shards'],
        shard_id=args['dataset']['shard_id'],
        num_workers=args['dataset']['num_workers'],
    ).next_epoch_itr(shuffle=False)
    progress = progress_bar.progress_bar(
        itr,
        log_format=args['common']['log_format'],
        log_interval=args['common']['log_interval'],
        default_log_format=('tqdm' if not args['common']['no_progress_bar']
                            else 'none'),
    )

    # Initialize generator
    gen_timer = StopwatchMeter()
    generator = task.build_generator(models, args)

    sources, hypotheses, references = dict(), dict(), dict()

    for sample in progress:
        torch.cuda.empty_cache()

        sample = move_to_cuda(sample) if use_cuda else sample
        if 'net_input' not in sample:
            continue

        gen_timer.start()
        hypos = task.inference_step(generator,
                                    models,
                                    sample,
                                    bos_token=tgt_dict.bos())
        num_generated_tokens = sum(len(h[0]['tokens'])
                                   for h in hypos)  # TODO: warning
        gen_timer.stop(num_generated_tokens)

        for i, sample_id in enumerate(sample['id'].tolist()):
            has_target = sample['target'] is not None

            # Remove padding
            src_tokens = utils.strip_pad(
                sample['net_input']['src_tokens'][i, :], tgt_dict.pad())
            target_tokens = None
            if has_target:
                target_tokens = utils.strip_pad(sample['target'][i, :],
                                                tgt_dict.pad()).int().cpu()

            hypos_tokens = utils.strip_eos(hypos[i][0]['tokens'],
                                           tgt_dict.eos()).int().cpu()
            # Either retrieve the original sentences or regenerate them from tokens.
            if src_dict is not None:
                src_str = src_dict.string(src_tokens,
                                          args['eval']['remove_bpe'])
            else:
                src_str = "0"
            if has_target:
                target_str = tgt_dict.string(target_tokens,
                                             args['eval']['remove_bpe'],
                                             escape_unk=True)

            hypo_str = tgt_dict.string(hypos_tokens,
                                       args['eval']['remove_bpe'])

            sources[sample_id] = [src_str]
            hypotheses[sample_id] = [hypo_str]
            references[sample_id] = [target_str]

    bleu, rouge_l, meteor = \
        summarization_metrics.eval_accuracies(hypotheses, references, filename=out_file, mode='test')
    LOGGER.info('BLEU: {:.2f}\t ROUGE-L: {:.2f}\t METEOR: {:.2f}'.format(
        bleu, rouge_l, meteor))
Esempio n. 14
0
def _main(args, output_file):
    if args['dataset']['max_tokens'] is None and args['dataset'][
            'max_sentences'] is None:
        args['dataset']['max_tokens'] = 12000
    LOGGER.info(args)

    use_cuda = torch.cuda.is_available() and not args['common']['cpu']

    # Load dataset splits
    task = tasks.setup_task(args)
    task.load_dataset(args['dataset']['gen_subset'])

    # Set dictionaries
    try:
        src_dict = getattr(task, 'source_dictionary', None)
    except NotImplementedError:
        src_dict = None
    tgt_dict = task.target_dictionary

    # Load ensemble
    LOGGER.info('loading model(s) from {}'.format(args['eval']['path']))
    models, _model_args = checkpoint_utils.load_model_ensemble(
        utils.split_paths(args['eval']['path']),
        arg_overrides=eval(args['eval']['model_overrides']),
        task=task,
    )

    # Optimize ensemble for generation
    for model in models:
        model.make_generation_fast_(
            beamable_mm_beam_size=None
            if args['eval']['no_beamable_mm'] else args['eval']['beam'],
            need_attn=args['eval']['print_alignment'],
        )
        if _model_args['common']['fp16']:
            model.half()
        if use_cuda:
            model.cuda()

    # Load alignment dictionary for unknown word replacement
    # (None if no unknown word replacement, empty if no path to align dictionary)
    align_dict = utils.load_align_dict(args['eval']['replace_unk'])

    # Load dataset (possibly sharded)
    itr = task.get_batch_iterator(
        dataset=task.dataset(args['dataset']['gen_subset']),
        max_tokens=args['dataset']['max_tokens'],
        max_sentences=args['eval']['max_sentences'],
        max_positions=utils.resolve_max_positions(
            task.max_positions(),
            *[model.max_positions() for model in models]),
        ignore_invalid_inputs=_model_args['dataset']
        ['skip_invalid_size_inputs_valid_test'],
        required_batch_size_multiple=_model_args['dataset']
        ['required_batch_size_multiple'],
        num_shards=_model_args['dataset']['num_shards'],
        shard_id=_model_args['dataset']['shard_id'],
        num_workers=_model_args['dataset']['num_workers'],
    ).next_epoch_itr(shuffle=False)
    progress = progress_bar.progress_bar(
        itr,
        log_format=_model_args['common']['log_format'],
        log_interval=_model_args['common']['log_interval'],
        default_log_format=('tqdm'
                            if not _model_args['common']['no_progress_bar']
                            else 'none'),
    )

    # Initialize generator
    gen_timer = StopwatchMeter()
    generator = task.build_generator(args)

    # Generate and compute BLEU score
    scorer = OrderedDict()
    if args['eval']['sacrebleu']:
        scorer['bleu'] = bleu_scorer.SacrebleuScorer()
    elif args['eval']['nltk_bleu']:
        scorer['bleu'] = bleu_scorer.NLTKBleuScorer()
    else:
        scorer['bleu'] = bleu_scorer.Scorer(tgt_dict.pad(), tgt_dict.eos(),
                                            tgt_dict.unk())
    # Generate and compute BLEU score
    if args['eval']['rouge']:
        scorer['rouge'] = rouge_scorer.RougeScorer()
    num_sentences = 0
    has_target = True
    wps_meter = TimeMeter()
    # for sample in tqdm(progress, total=len(progress)):
    for sample in progress:
        torch.cuda.empty_cache()
        sample = utils.move_to_cuda(sample) if use_cuda else sample
        if 'net_input' not in sample:
            continue

        prefix_tokens = None
        if args['eval']['prefix_size'] > 0:
            prefix_tokens = sample['target'][:, :args['eval']['prefix_size']]

        gen_timer.start()
        hypos = task.inference_step(generator, models, sample, prefix_tokens)
        num_generated_tokens = sum(len(h[0]['tokens']) for h in hypos)
        gen_timer.stop(num_generated_tokens)

        for i, sample_id in enumerate(sample['id'].tolist()):
            has_target = sample['target'] is not None

            # Remove padding
            src_tokens = utils.strip_pad(
                sample['net_input']['src_tokens'][i, :], tgt_dict.pad())
            target_tokens = None
            if has_target:
                target_tokens = utils.strip_pad(sample['target'][i, :],
                                                tgt_dict.pad()).int().cpu()

            # Either retrieve the original sentences or regenerate them from tokens.
            if align_dict is not None:
                src_str = task.dataset(
                    args['dataset']['gen_subset']).src.get_original_text(
                        sample_id)
                target_str = task.dataset(
                    args['dataset']['gen_subset']).tgt.get_original_text(
                        sample_id)
            else:
                if src_dict is not None:
                    src_str = src_dict.string(src_tokens,
                                              args['eval']['remove_bpe'])
                else:
                    src_str = ""
                if has_target:
                    target_str = tgt_dict.string(target_tokens,
                                                 args['eval']['remove_bpe'],
                                                 escape_unk=True)

            if not args['eval']['quiet']:
                if src_dict is not None:
                    print('S-{}\t{}'.format(sample_id, src_str),
                          file=output_file)
                if has_target:
                    print('T-{}\t{}'.format(sample_id, target_str),
                          file=output_file)

            # Process top predictions
            for j, hypo in enumerate(hypos[i][:args['eval']['nbest']]):
                hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
                    hypo_tokens=hypo['tokens'].int().cpu(),
                    src_str=src_str,
                    alignment=hypo['alignment'],
                    align_dict=align_dict,
                    tgt_dict=tgt_dict,
                    remove_bpe=args['eval']['remove_bpe'],
                )

                if hypo_str == '.':
                    # rouge cannot handle hypo'.'
                    continue

                if not args['eval']['quiet']:
                    score = hypo['score'] / math.log(2)  # convert to base 2
                    print('H-{}\t{}\t{}'.format(sample_id, score, hypo_str),
                          file=output_file)
                    print(
                        'P-{}\t{}'.format(
                            sample_id,
                            ' '.join(
                                map(
                                    lambda x: '{:.4f}'.format(x),
                                    # convert from base e to base 2
                                    hypo['positional_scores'].div_(math.log(2)
                                                                   ).tolist(),
                                ))),
                        file=output_file)

                    if args['eval']['print_alignment']:
                        print('A-{}\t{}'.format(
                            sample_id, ' '.join([
                                '{}-{}'.format(src_idx, tgt_idx)
                                for src_idx, tgt_idx in alignment
                            ])),
                              file=output_file)

                    if args['eval']['print_step']:
                        print('I-{}\t{}'.format(sample_id, hypo['steps']),
                              file=output_file)

                    # if getattr(args, 'retain_iter_history', False):
                    if args['eval']['retain_iter_history']:
                        for step, h in enumerate(hypo['history']):
                            _, h_str, _ = utils.post_process_prediction(
                                hypo_tokens=h['tokens'].int().cpu(),
                                src_str=src_str,
                                alignment=None,
                                align_dict=None,
                                tgt_dict=tgt_dict,
                                remove_bpe=None,
                            )
                            print('E-{}_{}\t{}'.format(sample_id, step, h_str),
                                  file=output_file)

                # Score only the top hypothesis
                if has_target and j == 0:
                    # print('Ref>> {}'.format(target_str), file=output_file)
                    # print('Hyp>> {}'.format(hypo_str), file=output_file)
                    if align_dict is not None or args['eval'][
                            'remove_bpe'] is not None:
                        # Convert back to tokens for evaluation with unk replacement and/or without BPE
                        target_tokens = tgt_dict.encode_line(
                            target_str, add_if_not_exist=True)
                    for metric in scorer:
                        if hasattr(scorer[metric], 'add_string'):
                            scorer[metric].add_string(target_str, hypo_str)
                        else:
                            scorer[metric].add(target_tokens, hypo_tokens)

        wps_meter.update(num_generated_tokens)
        progress.log({'wps': round(wps_meter.avg)})
        num_sentences += sample['nsentences']

    LOGGER.info('NOTE: hypothesis and token scores are output in base 2')
    LOGGER.info(
        'Translated {} sentences ({} tokens) in {:.1f}s ({:.2f} sentences/s, {:.2f} tokens/s)'
        .format(num_sentences, gen_timer.n, gen_timer.sum,
                num_sentences / gen_timer.sum, 1. / gen_timer.avg))
    if has_target:
        LOGGER.info('Generate {} with beam={}: {}'.format(
            args['dataset']['gen_subset'], args['eval']['beam'], {
                '\n{}:\n{}'.format(str.upper(metric), value.score())
                for metric, value in scorer.items()
            }))

    return scorer
Esempio n. 15
0
def _main(args, output_file):
    if args['dataset']['max_tokens'] is None and args['dataset']['max_sentences'] is None:
        args['dataset']['max_tokens'] = 12000
    LOGGER.info(args)

    use_cuda = torch.cuda.is_available() and not args['common']['cpu']

    # Load dataset splits
    task = tasks.setup_task(args)
    task.load_dataset(args['dataset']['gen_subset'])

    # Set dictionaries
    try:
        src_dict = getattr(task, 'source_dictionary', None)
    except NotImplementedError:
        src_dict = None
    tgt_dict = task.target_dictionary

    # Load ensemble
    LOGGER.info('loading model(s) from {}'.format(args['eval']['path']))
    models, _model_args = checkpoint_utils.load_model_ensemble(
        utils.split_paths(args['eval']['path']),
        arg_overrides=eval(args['eval']['model_overrides']),
        task=task,
    )

    # Optimize ensemble for generation
    for model in models:
        if _model_args['common']['fp16']:
            model.half()
        if use_cuda:
            model.cuda()

    # Load dataset (possibly sharded)
    itr = task.get_batch_iterator(
        dataset=task.dataset(args['dataset']['gen_subset']),
        max_tokens=args['dataset']['max_tokens'],
        max_sentences=args['dataset']['max_sentences'],
        max_positions=utils.resolve_max_positions(
            task.max_positions(),
            *[model.max_positions() for model in models]
        ),
        ignore_invalid_inputs=_model_args['dataset']['skip_invalid_size_inputs_valid_test'],
        required_batch_size_multiple=_model_args['dataset']['required_batch_size_multiple'],
        num_shards=_model_args['dataset']['num_shards'],
        shard_id=_model_args['dataset']['shard_id'],
        num_workers=_model_args['dataset']['num_workers'],
    ).next_epoch_itr(shuffle=False)
    progress = progress_bar.progress_bar(
        itr,
        log_format=_model_args['common']['log_format'],
        log_interval=_model_args['common']['log_interval'],
        default_log_format=('tqdm' if not _model_args['common']['no_progress_bar'] else 'none'),
    )

    """
    nohup python -m run.completion.seqrnn.eval > run/completion/seqrnn/case.log 2>&1 &
    """
    sequence_completor = task.build_completor([model], args)
    for sample in progress:
        torch.cuda.empty_cache()
        sample = utils.move_to_cuda(sample) if use_cuda else sample
        if 'net_input' not in sample:
            continue

        non_pad_idx = sample['net_input']['src_tokens'] > task.target_dictionary.pad()

        with torch.no_grad():
            net_output = sequence_completor.generate([model], sample, prefix_tokens=None)
        lprobs = model.get_normalized_probs(net_output, log_probs=True)

        # from ipdb import set_trace
        # set_trace()

        rank = torch.argmax(lprobs, dim=-1)
        target = model.get_targets(sample, net_output)
        accuracy = 1.0 * ((rank == target) & non_pad_idx).sum(dim=-1) / non_pad_idx.sum(dim=-1)
        for idx, (data_idx, acc) in enumerate(zip(sample['id'], accuracy)):
            if acc > 0.9:
                LOGGER.info(f"{data_idx}: {task.target_dictionary.string(sample['net_input']['src_tokens'][idx, :])}")
Esempio n. 16
0
def _main(args, output_file):
    if args['dataset']['max_tokens'] is None and args['dataset'][
            'max_sentences'] is None:
        args['dataset']['max_tokens'] = 12000
    LOGGER.info(args)

    use_cuda = torch.cuda.is_available() and not args['common']['cpu']

    # Load dataset splits
    task = tasks.setup_task(args)
    task.load_dataset(args['dataset']['gen_subset'])

    # Set dictionaries
    try:
        src_dict = getattr(task, 'source_dictionary', None)
    except NotImplementedError:
        src_dict = None
    tgt_dict = task.target_dictionary

    # Load ensemble
    LOGGER.info('loading model(s) from {}'.format(args['eval']['path']))
    models, _model_args = checkpoint_utils.load_model_ensemble(
        utils.split_paths(args['eval']['path']),
        arg_overrides=eval(args['eval']['model_overrides']),
        task=task,
    )

    # Optimize ensemble for generation
    for model in models:
        model.make_generation_fast_(
            beamable_mm_beam_size=None
            if args['eval']['no_beamable_mm'] else args['eval']['beam'],
            need_attn=args['eval']['print_alignment'],
        )
        if _model_args['common']['fp16']:
            model.half()
        if use_cuda:
            model.cuda()

    # Load alignment dictionary for unknown word replacement
    # (None if no unknown word replacement, empty if no path to align dictionary)
    align_dict = utils.load_align_dict(args['eval']['replace_unk'])

    # Load dataset (possibly sharded)
    itr = task.get_batch_iterator(
        dataset=task.dataset(args['dataset']['gen_subset']),
        max_tokens=args['dataset']['max_tokens'],
        max_sentences=args['eval']['max_sentences'],
        max_positions=utils.resolve_max_positions(
            task.max_positions(),
            *[model.max_positions() for model in models]),
        ignore_invalid_inputs=_model_args['dataset']
        ['skip_invalid_size_inputs_valid_test'],
        required_batch_size_multiple=_model_args['dataset']
        ['required_batch_size_multiple'],
        num_shards=_model_args['dataset']['num_shards'],
        shard_id=_model_args['dataset']['shard_id'],
        num_workers=_model_args['dataset']['num_workers'],
    ).next_epoch_itr(shuffle=False)
    progress = progress_bar.progress_bar(
        itr,
        log_format=_model_args['common']['log_format'],
        log_interval=_model_args['common']['log_interval'],
        default_log_format=('tqdm'
                            if not _model_args['common']['no_progress_bar']
                            else 'none'),
    )

    # Initialize generator
    gen_timer = StopwatchMeter()
    generator = task.build_generator(args)

    num_sentences = 0
    has_target = True
    wps_meter = TimeMeter()
    # for sample in tqdm(progress, total=len(progress)):
    sources, hypotheses, references = dict(), dict(), dict()

    for sample in progress:
        torch.cuda.empty_cache()
        sample = utils.move_to_cuda(sample) if use_cuda else sample
        if 'net_input' not in sample:
            continue

        # prefix_tokens = None
        # if args['eval']['prefix_size'] > 0:
        #     prefix_tokens = sample['target'][:, :args['eval']['prefix_size']]

        gen_timer.start()
        hypos = task.inference_step(generator, models, sample)
        # gen_out = task.sequence_generator.generate(model, sample)
        num_generated_tokens = sum(len(h[0]['tokens'])
                                   for h in hypos)  # TODO: warning
        gen_timer.stop(num_generated_tokens)

        for i, sample_id in enumerate(sample['id'].tolist()):
            has_target = sample['target'] is not None

            # Remove padding
            src_tokens = utils.strip_pad(
                sample['net_input']['src_tokens'][i, :], tgt_dict.pad())
            target_tokens = None
            if has_target:
                target_tokens = utils.strip_pad(sample['target'][i, :],
                                                tgt_dict.pad()).int().cpu()

            hypos_tokens = utils.strip_eos(hypos[i][0]['tokens'],
                                           tgt_dict.eos()).int().cpu()
            # Either retrieve the original sentences or regenerate them from tokens.
            # if align_dict is not None:
            #     src_str = task.dataset(args['dataset']['gen_subset']).src.get_original_text(sample_id)
            #     target_str = task.dataset(args['dataset']['gen_subset']).tgt.get_original_text(sample_id)
            # else:
            if src_dict is not None:
                src_str = src_dict.string(src_tokens,
                                          args['eval']['remove_bpe'])
            else:
                src_str = ""
            if has_target:
                target_str = tgt_dict.string(target_tokens,
                                             args['eval']['remove_bpe'],
                                             escape_unk=True)

            # hypo_tokens = tgt_dict.encode_line(hypo_str, add_if_not_exist=True)
            hypo_str = tgt_dict.string(hypos_tokens,
                                       args['eval']['remove_bpe'])

            sources[sample_id] = [src_str]
            hypotheses[sample_id] = [hypo_str]
            references[sample_id] = [target_str]

            if not args['eval']['quiet']:
                if src_dict is not None:
                    print('S-{}\t{}'.format(sample_id, src_str),
                          file=output_file)
                if has_target:
                    print('T-{}\t{}'.format(sample_id, target_str),
                          file=output_file)

                print('H-{}\t{}'.format(sample_id, hypo_str), file=output_file)

    filename = os.path.join(os.path.dirname(__file__), 'config',
                            'predict.json')
    LOGGER.info('write predicted file at {}'.format(filename))
    bleu, rouge_l, meteor = eval_utils.eval_accuracies(hypotheses,
                                                       references,
                                                       filename=filename,
                                                       mode='test')
    LOGGER.info('BLEU: {:.2f}\t ROUGE-L: {:.2f}\t METEOR: {:.2f}'.format(
        bleu, rouge_l, meteor))
Esempio n. 17
0
def _main(args, output_file):
    if args['dataset']['max_tokens'] is None and args['dataset'][
            'max_sentences'] is None:
        args['dataset']['max_tokens'] = 12000

    use_cuda = torch.cuda.is_available() and not args['common']['cpu']
    if use_cuda:
        device = os.environ.get('CUDA_VISIBALE_DEVICES',
                                [0])[0]  # get first device as default
        torch.cuda.set_device(f'cuda:{device}')

    # Load dataset splits
    task = tasks.setup_task(args)

    # Load ensemble
    LOGGER.info('loading model(s) from {}'.format(args['eval']['path']))
    models, _model_args = checkpoint_utils.load_model_ensemble(
        utils.split_paths(args['eval']['path']),
        arg_overrides=eval(args['eval']['model_overrides']),
        task=task,
    )

    # Optimize ensemble for generation
    for model in models:
        if _model_args['common']['fp16']:
            model.half()
        if use_cuda:
            model.cuda()

    sequence_completor = task.build_completor(models, args)

    subsets = [
        args['dataset']['train_subset'],
        args['dataset']['valid_subset'],
        args['dataset']['gen_subset'],
    ]
    for subset in subsets:
        task.load_dataset(subset, shuffle=False)
        task.dataset(subset).shuffle = False

        # Load dataset (possibly sharded)
        itr = task.get_batch_iterator(
            dataset=task.dataset(subset),
            max_tokens=args['dataset']['max_tokens'],
            max_sentences=args['eval']['max_sentences_eval'],
            max_positions=utils.resolve_max_positions(
                task.max_positions(),
                *[model.max_positions() for model in models]),
            ignore_invalid_inputs=_model_args['dataset']
            ['skip_invalid_size_inputs_valid_test'],
            required_batch_size_multiple=_model_args['dataset']
            ['required_batch_size_multiple'],
            num_shards=_model_args['dataset']['num_shards'],
            shard_id=_model_args['dataset']['shard_id'],
            num_workers=_model_args['dataset']['num_workers'],
        ).next_epoch_itr(shuffle=False)
        progress = progress_bar.progress_bar(
            itr,
            log_format=_model_args['common']['log_format'],
            log_interval=_model_args['common']['log_interval'],
            default_log_format=('tqdm'
                                if not _model_args['common']['no_progress_bar']
                                else 'none'),
        )

        topk = args['kd']['gen_topk']
        out_idx, out_prob = [], []
        with torch.no_grad():
            for sample in progress:
                torch.cuda.empty_cache()
                sample = move_to_cuda(sample) if use_cuda else sample
                if 'net_input' not in sample:
                    continue
                net_output = sequence_completor.generate([model],
                                                         sample,
                                                         prefix_tokens=None)
                topk_prob, topk_ids = torch.topk(net_output[0], topk, dim=-1)
                # ignore pad
                non_padding_mask = sample['net_input'][
                    'src_tokens'] != task.target_dictionary.pad()
                if use_cuda:
                    topk_prob, topk_ids = topk_prob.cpu(), topk_ids.cpu()
                    non_padding_mask = non_padding_mask.cpu()
                for idx in range(topk_prob.size(0)):
                    out_idx.append(
                        topk_ids[idx,
                                 ...][non_padding_mask[idx,
                                                       ...]].view(-1).tolist())
                    out_prob.append(topk_prob[idx, ...][non_padding_mask[
                        idx, ...]].view(-1).tolist())
        assert len(out_idx) == len(out_prob) == len(task.dataset(subset)), \
            Exception(len(out_idx), len(out_prob), len(task.dataset(subset)))
        TeacherOutDataset.save_bin(
            prefix=os.path.join(args['checkpoint']['save_dir'],
                                f'{subset}.top{topk}_idx'),
            data_list=out_idx,
            dtype=np.int32,
        )
        TeacherOutDataset.save_bin(
            prefix=os.path.join(args['checkpoint']['save_dir'],
                                f'{subset}.top{topk}_prob'),
            data_list=out_prob,
            dtype=np.float,
        )
Esempio n. 18
0
def main(args, **unused_kwargs):
    assert args['eval']['path'] is not None, '--path required for evaluation!'

    if torch.cuda.is_available() and not args['common']['cpu']:
        torch.cuda.set_device(args['distributed_training']['device_id'])

    LOGGER.info(args)
    # while evaluation, set fraction_using_func_name = 0, namely, not sample from func_name
    args['task']['fraction_using_func_name'] = 0.
    use_cuda = torch.cuda.is_available() and not args['common']['cpu']
    if use_cuda:
        device = os.environ.get('CUDA_VISIBALE_DEVICES',
                                [0])[0]  # get first device as default
        torch.cuda.set_device(f'cuda:{device}')
    task = tasks.setup_task(args)

    # Load ensemble
    LOGGER.info('loading model(s) from {}'.format(args['eval']['path']))
    models, _model_args = checkpoint_utils.load_model_ensemble(
        utils.split_paths(args['eval']['path']),
        arg_overrides=eval(args['eval']['model_overrides']),
        task=task,
    )

    for lang in deepcopy(args['dataset']['langs']):
        args['dataset']['langs'] = [lang]
        # Load dataset splits
        LOGGER.info(f'Evaluating {lang} dataset')
        task.load_dataset(args['dataset']['gen_subset'])
        dataset = task.dataset(args['dataset']['gen_subset'])

        # Optimize ensemble for generation and set the source and dest dicts on the model (required by scorer)
        for model in models:
            model.make_generation_fast_()
            if args['common']['fp16']:
                model.half()
            if use_cuda:
                model.cuda()

        assert len(models) > 0

        LOGGER.info('num. model params: {}'.format(
            sum(p.numel() for p in models[0].parameters())))

        itr = task.get_batch_iterator(
            dataset=dataset,
            max_tokens=args['dataset']['max_tokens'] or 36000,
            max_sentences=args['eval']['max_sentences'],
            max_positions=utils.resolve_max_positions(
                *[model.max_positions() for model in models]),
            ignore_invalid_inputs=True,
            num_shards=args['dataset']['num_shards'],
            shard_id=args['dataset']['shard_id'],
            num_workers=args['dataset']['num_workers'],
        ).next_epoch_itr(shuffle=False)
        progress = progress_bar.progress_bar(
            itr,
            log_format=args['common']['log_format'],
            log_interval=args['common']['log_interval'],
            default_log_format=('tqdm' if not args['common']['no_progress_bar']
                                else 'none'),
        )

        code_reprs, query_reprs = [], []
        for sample in progress:
            if 'net_input' not in sample:
                continue
            sample = move_to_cuda(sample) if use_cuda else sample
            batch_code_reprs, batch_query_reprs = models[0](
                **sample['net_input'])

            if use_cuda:
                batch_code_reprs = batch_code_reprs.cpu().detach()
                batch_query_reprs = batch_query_reprs.cpu().detach()

            code_reprs.append(batch_code_reprs)
            query_reprs.append(batch_query_reprs)
        code_reprs = torch.cat(code_reprs, dim=0)
        query_reprs = torch.cat(query_reprs, dim=0)

        assert code_reprs.shape == query_reprs.shape, (code_reprs.shape,
                                                       query_reprs.shape)
        eval_size = len(
            code_reprs
        ) if args['eval']['eval_size'] == -1 else args['eval']['eval_size']

        k, MRR, topk_idx, topk_prob = 3, [], [], []
        for idx in range(len(dataset) // eval_size):
            code_emb = code_reprs[idx:idx + eval_size, :]
            query_emb = query_reprs[idx:idx + eval_size, :]

            if use_cuda:
                code_emb = code_emb.cuda()
                query_emb = query_emb.cuda()

            if args['criterion'] == 'search_cosine':
                src_emb_nrom = torch.norm(code_emb, dim=-1,
                                          keepdim=True) + 1e-10
                tgt_emb_nrom = torch.norm(query_emb, dim=-1,
                                          keepdim=True) + 1e-10
                logits = (query_emb / tgt_emb_nrom) @ (code_emb /
                                                       src_emb_nrom).t()
            elif args['criterion'] == 'search_softmax':
                logits = query_emb @ code_emb.t()
            else:
                raise NotImplementedError

            correct_scores = logits.diag()
            compared_scores = logits >= correct_scores.unsqueeze(dim=-1)
            mrr = 1 / compared_scores.sum(dim=-1).float()
            MRR.extend(mrr.tolist())

        if len(dataset) % eval_size:
            code_emb = code_reprs[-eval_size:, :]
            query_emb = query_reprs[-eval_size:, :]

            if use_cuda:
                code_emb = code_emb.cuda()
                query_emb = query_emb.cuda()

            if args['criterion'] == 'search_cosine':
                src_emb_nrom = torch.norm(code_emb, dim=-1,
                                          keepdim=True) + 1e-10
                tgt_emb_nrom = torch.norm(query_emb, dim=-1,
                                          keepdim=True) + 1e-10
                logits = (query_emb / tgt_emb_nrom) @ (code_emb /
                                                       src_emb_nrom).t()
            elif args['criterion'] == 'search_softmax':
                logits = query_emb @ code_emb.t()
            else:
                raise NotImplementedError

            correct_scores = logits.diag()
            compared_scores = logits >= correct_scores.unsqueeze(dim=-1)
            last_ids = len(code_reprs) % eval_size
            mrr = 1 / compared_scores.sum(dim=-1).float()[-last_ids:]
            MRR.extend(mrr.tolist())

        print('{}, mrr: {:.4f}'.format(lang, np.mean(MRR)))
Esempio n. 19
0
def cli_main():
    SEED = 204
    BATCH_SIZE = 64
    MAX_SOURCE_POSITIONS = 1024
    EPOCH = 50

    from ncc.utils.set_seed import set_seed
    set_seed(SEED)

    use_cuda = torch.cuda.is_available()
    if use_cuda:
        device = os.environ.get('CUDA_VISIBALE_DEVICES', [0])[0]  # get first device as default
        torch.cuda.set_device(f'cuda:{device}')
    criterion = DeepTuneLoss(task=None, sentence_avg=-1)
    if use_cuda:
        criterion = criterion.cuda()

    data = []
    for i, platform in enumerate(LANGUAGES):
        DATA_DIR = os.path.join(DATASET_DIR, f'mapping/{platform}/data-mmap')

        def get_attr(attr):
            oracle_file = os.path.join(DATA_DIR, f'train.{attr}')
            with open(oracle_file, 'rb') as reader:
                out = pickle.load(reader)
            return np.asarray(out)

        platform_name = mapping_metrics.platform2str(platform)
        benchmarks = get_attr('benchmark')
        runtime_cpus = get_attr('runtime_cpu')
        runtime_gpus = get_attr('runtime_gpu')

        #################### load dataset ####################
        src_dataset = load_mmap_dataset(os.path.join(DATA_DIR, f'train.src_tokens'))
        src_dataset = TruncateDataset(src_dataset, truncation_length=MAX_SOURCE_POSITIONS, truncate_prefix=0)
        tgt_dataset = load_mmap_dataset(os.path.join(DATA_DIR, f'train.oracle'))

        src_dict = Dictionary.load(os.path.join(DATA_DIR, 'src_tokens.dict.jsonl'))
        src_aux = OrderedDict()
        src_aux['transfer'] = get_attr('transfer')
        src_aux['wgsize'] = get_attr('wgsize')

        tgt_dict = Dictionary.load(os.path.join(DATA_DIR, 'oracle.dict.jsonl'))

        dataset = LanguagePairDataset(
            src=src_dataset, src_sizes=src_dataset.sizes, src_dict=src_dict, src_aux=src_aux,
            tgt=tgt_dataset, tgt_sizes=tgt_dataset.sizes, tgt_dict=tgt_dict, tgt_aux=None,
            left_pad_source=True, max_source_positions=MAX_SOURCE_POSITIONS,
        )
        #################### load dataset ####################

        # build toy dataset for 10-fold cross validation
        tgt_data = [tgt_dataset[idx].item() for idx in range(len(tgt_dataset))]
        src_data = [None] * len(tgt_data)

        # 10-fold cross-validation
        kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=SEED)
        for j, (train_ids, test_ids) in enumerate(kf.split(src_data, tgt_data)):
            # deeptune model
            model = DeepTuneEncoder(dictionary=src_dict, embed_dim=64,
                                    rnn_cell='lstm', rnn_hidden_dim=64, rnn_dropout=0., rnn_num_layers=2,
                                    aux_dim=2, inner_dim=32, out_dim=2)
            if use_cuda:
                model = model.cuda()
            optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
            for epoch_i in range(EPOCH):
                if dataset.shuffle:
                    random.shuffle(train_ids)
                train_batch_sampler = data_utils.batch_by_size(
                    train_ids,
                    num_tokens_fn=lambda *args: -1,
                    max_sentences=BATCH_SIZE,
                )
                train_dataloader = DataLoader(dataset=dataset,
                                              batch_sampler=train_batch_sampler,
                                              collate_fn=collate, )
                with tqdm(total=len(train_dataloader)) as t:
                    for sample_i, sample in enumerate(train_dataloader, start=1):
                        t.set_description(f'Epoch {epoch_i + 1}/{EPOCH} Batch {sample_i}/{len(train_dataloader)}')
                        if use_cuda:
                            sample = move_to_cuda(sample)
                        loss, sample_size, logging_output = criterion(model, sample)
                        loss.div_(sample_size)
                        t.set_postfix(loss=loss.item())
                        t.update()

                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

            # test accuracy
            test_batch_sampler = data_utils.batch_by_size(
                test_ids,
                num_tokens_fn=lambda *args: -1,
                max_sentences=BATCH_SIZE,
            )
            test_dataloader = DataLoader(dataset=dataset,
                                         batch_sampler=test_batch_sampler,
                                         collate_fn=collate, )
            predictions, ground_truth = [], []
            for sample in test_dataloader:
                if use_cuda:
                    sample = move_to_cuda(sample)
                hybrid_out, _ = model(**sample['net_input'])
                predictions.append(hybrid_out.max(dim=-1)[1])
                ground_truth.append(sample['target'].view(-1))
            predictions = torch.cat(predictions)
            ground_truth = torch.cat(ground_truth)

            accuracy = (predictions == ground_truth).tolist()
            # runtimes of baseline mapping (CPU on AMD, GPU on NVIDIA)
            gt_runtimes = (runtime_cpus if platform == "amd" else runtime_gpus)[test_ids]
            pred_runtimes = [
                (runtime_cpus if pred == 0 else runtime_gpus)[idx]
                for idx, pred in zip(test_ids, predictions)
            ]
            speedup = gt_runtimes / pred_runtimes

            # record results
            for benchmark_, o_, p_, accuracy_, p_speedup_ in \
                zip(benchmarks[test_ids], ground_truth, predictions, accuracy, speedup):
                data.append({
                    "Model": model.__class__.__name__,
                    "Platform": platform_name,
                    'Benchmark': mapping_metrics.escape_benchmark_name(benchmark_),
                    'Benchmark Suite': mapping_metrics.escape_suite_name(benchmark_),
                    "Oracle Mapping": o_,
                    "Predicted Mapping": p_,
                    "Accuracy": accuracy_,
                    "Speedup": p_speedup_,
                })
            del model, optimizer
    performance = pd.DataFrame(
        data, index=range(1, len(data) + 1), columns=[
            "Model",
            "Platform",
            "Benchmark",
            "Benchmark Suite",
            "Oracle Mapping",
            "Predicted Mapping",
            "Accuracy",
            "Speedup"
        ])
    benchmark_out = performance.groupby(['Platform', 'Benchmark Suite'])[['Platform', 'Accuracy', 'Speedup']].mean()
    benchmark_out['Accuracy'] = round(benchmark_out['Accuracy'] * 100, 2)
    benchmark_out['Speedup'] = round(benchmark_out['Speedup'], 2)
    print(benchmark_out)
    out = performance.groupby(['Platform'])[['Platform', 'Accuracy', 'Speedup']].mean()
    out['Accuracy'] = round(out['Accuracy'] * 100, 2)
    out['Speedup'] = round(out['Speedup'], 2)
    print(out)
Esempio n. 20
0
def _main(args, output_file):
    if args['dataset']['max_tokens'] is None and args['dataset'][
            'max_sentences'] is None:
        args['dataset']['max_tokens'] = 12000

    use_cuda = torch.cuda.is_available() and not args['common']['cpu']
    if use_cuda:
        device = os.environ.get('CUDA_VISIBALE_DEVICES',
                                [0])[0]  # get first device as default
        torch.cuda.set_device(f'cuda:{device}')

    # Load dataset splits
    task = tasks.setup_task(args)
    task.load_dataset(args['dataset']['gen_subset'], shuffle=False)

    # Load ensemble
    LOGGER.info('loading model(s) from {}'.format(args['eval']['path']))
    models, _model_args = checkpoint_utils.load_model_ensemble(
        utils.split_paths(args['eval']['path']),
        arg_overrides=eval(args['eval']['model_overrides']),
        task=task,
    )

    # Optimize ensemble for generation
    for model in models:
        if _model_args['common']['fp16']:
            model.half()
        if use_cuda:
            model.cuda()

    # Load dataset (possibly sharded)
    itr = task.get_batch_iterator(
        dataset=task.dataset(args['dataset']['gen_subset']),
        max_tokens=args['dataset']['max_tokens'],
        max_sentences=args['eval']['max_sentences_eval'],
        max_positions=utils.resolve_max_positions(
            task.max_positions(),
            *[model.max_positions() for model in models]),
        ignore_invalid_inputs=_model_args['dataset']
        ['skip_invalid_size_inputs_valid_test'],
        required_batch_size_multiple=_model_args['dataset']
        ['required_batch_size_multiple'],
        num_shards=_model_args['dataset']['num_shards'],
        shard_id=_model_args['dataset']['shard_id'],
        num_workers=_model_args['dataset']['num_workers'],
    ).next_epoch_itr(shuffle=False)
    progress = progress_bar.progress_bar(
        itr,
        log_format=_model_args['common']['log_format'],
        log_interval=_model_args['common']['log_interval'],
        default_log_format=('tqdm'
                            if not _model_args['common']['no_progress_bar']
                            else 'none'),
    )

    sequence_completor = task.build_completor([model], args)

    accuracy = {'all': 0.}
    mrr = {'all': 0.}
    sample_num = {'all': 0.}
    if task.dataset('test').attrs is not None:
        for attr in task.dataset('test').attrs:
            accuracy[attr] = 0.
            mrr[attr] = 0.
            sample_num[attr] = 0

    def _eval(lprobs, target, idx, num):
        with torch.no_grad():
            lprobs = lprobs[idx]
            target = target[idx]
            accuracy = (torch.argmax(lprobs,
                                     dim=-1) == target).sum().float().item()
            # Ref: Code Prediction by Feeding Trees to Transformers
            # With this practical perspective and for ease of computation, we only consider ranki ≤ 10 for each
            # location i (all ranki > 10 will have a score of 0).
            ranks = (lprobs >= lprobs[:,
                                      target].diag().unsqueeze(dim=-1)).sum(-1)
            mrr = 1. / ranks
            mrr[ranks > 10] = 0.
            mrr = mrr.sum().float().item()
        return accuracy, mrr, num

    for sample in progress:
        torch.cuda.empty_cache()
        sample = utils.move_to_cuda(sample) if use_cuda else sample
        if 'net_input' not in sample:
            continue

        with torch.no_grad():
            net_output = sequence_completor.generate([model],
                                                     sample,
                                                     prefix_tokens=None)
            # lprobs = model.get_normalized_probs(net_output, log_probs=True)
            lprobs = torch.softmax(net_output[0], dim=-1)
            lprobs = lprobs.view(-1, lprobs.size(-1))
            target = model.get_targets(sample, net_output).view(-1)

            # all
            # ignore pad and unk
            idx = sample['net_input']['src_tokens'].view(
                -1) != task.target_dictionary.pad()
            idx[sample['target'].view(-1) == task.target_dictionary.unk()] = 0
            # ignore overlapping tokens
            max_len = sample['target'].size(-1)
            for i, ext_i in enumerate(sample['extends']):
                idx[i * max_len:i * max_len + ext_i] = 0
            batch_acc, batch_mrr, batch_num = _eval(lprobs,
                                                    target,
                                                    idx,
                                                    num=idx.sum().item())
            accuracy['all'] += batch_acc
            mrr['all'] += batch_mrr
            sample_num['all'] += batch_num

            # other attrs
            if sample['attr_masks'] is not None:
                for attr, attr_idx in sample['attr_masks'].items():
                    # pick out attr_idx who are not unk/pad
                    attr_idx = attr_idx[idx[attr_idx].tolist()]
                    if len(attr_idx) > 0:
                        batch_acc, batch_mrr, batch_num = _eval(
                            lprobs, target, attr_idx, num=attr_idx.size)
                        accuracy[attr] += batch_acc
                        mrr[attr] += batch_mrr
                        sample_num[attr] += batch_num
    for attr in accuracy.keys():
        avg_acc = round(accuracy[attr] /
                        sample_num[attr], 6) if sample_num[attr] > 0. else None
        avg_mrr = round(mrr[attr] /
                        sample_num[attr], 6) if sample_num[attr] > 0. else None
        print('[{}] tokens, accuracy: {}, MRR: {}'.format(
            attr, avg_acc, avg_mrr))
Esempio n. 21
0
def main(args, **unused_kwargs):
    assert args['eval']['path'] is not None, '--path required for evaluation!'

    if torch.cuda.is_available() and not args['common']['cpu']:
        torch.cuda.set_device(args['distributed_training']['device_id'])

    LOGGER.info(args)
    # while evaluation, set fraction_using_func_name = 0, namely, not sample from func_name
    args['task']['fraction_using_func_name'] = 0.
    use_cuda = torch.cuda.is_available() and not args['common']['cpu']
    task = tasks.setup_task(args)

    # Load ensemble
    LOGGER.info('loading model(s) from {}'.format(args['eval']['path']))
    models, _model_args = checkpoint_utils.load_model_ensemble(
        utils.split_paths(args['eval']['path']),
        arg_overrides=eval(args['eval']['model_overrides']),
        task=task,
    )

    task = tasks.setup_task(args)

    # Load dataset splits
    task.load_dataset(args['dataset']['gen_subset'])
    dataset = task.dataset(args['dataset']['gen_subset'])

    # Optimize ensemble for generation and set the source and dest dicts on the model (required by scorer)
    for model in models:
        model.make_generation_fast_()
        if args['common']['fp16']:
            model.half()
        if use_cuda:
            model.cuda()

    assert len(models) > 0

    LOGGER.info('num. model params: {}'.format(
        sum(p.numel() for p in models[0].parameters())))

    itr = task.get_batch_iterator(
        dataset=dataset,
        max_tokens=args['dataset']['max_tokens'] or 36000,
        max_sentences=args['eval']['max_sentences'],
        max_positions=utils.resolve_max_positions(
            *[model.max_positions() for model in models]),
        ignore_invalid_inputs=True,
        num_shards=args['dataset']['num_shards'],
        shard_id=args['dataset']['shard_id'],
        num_workers=args['dataset']['num_workers'],
    ).next_epoch_itr(shuffle=False)
    progress = progress_bar.progress_bar(
        itr,
        log_format=args['common']['log_format'],
        log_interval=args['common']['log_interval'],
        default_log_format=('tqdm' if not args['common']['no_progress_bar']
                            else 'none'),
    )

    code_reprs, query_reprs = [], []
    for sample in progress:
        if 'net_input' not in sample:
            continue
        sample = utils.move_to_cuda(sample) if use_cuda else sample
        batch_code_reprs, batch_query_reprs = models[0](**sample['net_input'])

        code_reprs.extend(batch_code_reprs.tolist())
        query_reprs.extend(batch_query_reprs.tolist())
    code_reprs = np.asarray(code_reprs, dtype=np.float32)
    query_reprs = np.asarray(query_reprs, dtype=np.float32)

    assert code_reprs.shape == query_reprs.shape, (code_reprs.shape,
                                                   query_reprs.shape)
    eval_size = len(
        code_reprs
    ) if args['eval']['eval_size'] == -1 else args['eval']['eval_size']

    k, MRR, topk_idx, topk_prob = 3, [], [], []
    for idx in range(len(dataset) // eval_size):
        code_emb = torch.from_numpy(code_reprs[idx:idx + eval_size, :]).cuda()
        query_emb = torch.from_numpy(query_reprs[idx:idx +
                                                 eval_size, :]).cuda()
        logits = query_emb @ code_emb.t()

        # src_emb_nrom = torch.norm(code_emb, dim=-1, keepdim=True) + 1e-10
        # tgt_emb_nrom = torch.norm(query_emb, dim=-1, keepdim=True) + 1e-10
        # logits = (query_emb / tgt_emb_nrom) @ (code_emb / src_emb_nrom).t()

        correct_scores = logits.diag()
        compared_scores = logits >= correct_scores.unsqueeze(dim=-1)
        mrr = 1 / compared_scores.sum(dim=-1).float()
        MRR.extend(mrr.tolist())
        batch_topk_prob, batch_topk_idx = logits.softmax(dim=-1).topk(k)
        batch_topk_idx = batch_topk_idx + idx * eval_size
        topk_idx.extend(batch_topk_idx.tolist())
        topk_prob.extend(batch_topk_prob.tolist())

    if len(dataset) % eval_size:
        code_emb = torch.from_numpy(code_reprs[-eval_size:, :]).cuda()
        query_emb = torch.from_numpy(query_reprs[-eval_size:, :]).cuda()
        logits = query_emb @ code_emb.t()

        # src_emb_nrom = torch.norm(code_emb, dim=-1, keepdim=True) + 1e-10
        # tgt_emb_nrom = torch.norm(query_emb, dim=-1, keepdim=True) + 1e-10
        # logits = (query_emb / tgt_emb_nrom) @ (code_emb / src_emb_nrom).t()

        correct_scores = logits.diag()
        compared_scores = logits >= correct_scores.unsqueeze(dim=-1)
        last_ids = len(code_reprs) % eval_size
        mrr = 1 / compared_scores.sum(dim=-1).float()[-last_ids:]
        MRR.extend(mrr.tolist())
        batch_topk_prob, batch_topk_idx = logits[-last_ids:].softmax(
            dim=-1).topk(k)
        batch_topk_idx = batch_topk_idx + len(code_reprs) - eval_size
        topk_idx.extend(batch_topk_idx.tolist())
        topk_prob.extend(batch_topk_prob.tolist())

    print('mrr: {:.4f}'.format(np.mean(MRR)))

    for idx, mrr in enumerate(MRR):
        if mrr == 1.0 and topk_prob[idx][0] > 0.8:
            print(
                np.asarray(topk_idx[idx]) + 1,
                [round(porb, 4) for porb in topk_prob[idx]])
Esempio n. 22
0
def hybrid_retrieval_task(args, task, model, use_cuda, input, **kwargs):
    task.args['dataset']['langs'] = kwargs['lang']
    topk = kwargs['topk']
    # load code_tokens dataset
    task.load_dataset(split=args['dataset']['gen_subset'])
    code_dataset = task.dataset(args['dataset']['gen_subset'])
    # construct similarities
    similarities = torch.FloatTensor(len(code_dataset)).fill_(0.0)

    def cosine_fn(code_emb, query_emb):
        src_emb_nrom = torch.norm(code_emb, dim=-1, keepdim=True) + 1e-10
        tgt_emb_nrom = torch.norm(query_emb, dim=-1, keepdim=True) + 1e-10
        similarity = (query_emb / tgt_emb_nrom) @ (code_emb / src_emb_nrom).t()
        return similarity

    def softmax_fn(code_emb, query_emb):
        similarity = query_emb @ code_emb.t()
        return similarity

    if args['criterion'] == 'retrieval_cosine':
        similarity_metrics = cosine_fn
    elif args['criterion'] == 'retrieval_softmax':
        similarity_metrics = softmax_fn
    else:
        raise NotImplementedError(args['criterion'])
    # query embeddding
    query_tokens = task.encode_query_input(input).unsqueeze(dim=0)
    if use_cuda:
        query_tokens = utils.move_to_cuda(query_tokens)
    query_tokens = model.tgt_encoders(query_tokens)
    # code embeddding
    code_encoder = model.src_encoders[task.args['dataset']['langs'][0]]
    for idx, code_tokens in enumerate(code_dataset.src):
        code_tokens = code_tokens.unsqueeze(dim=0)
        if use_cuda:
            code_tokens = utils.move_to_cuda(code_tokens)
        code_tokens = code_encoder(code_tokens)
        similarities[idx] = similarity_metrics(code_tokens,
                                               query_tokens).item()
    topk_probs, topk_ids = similarities.topk(k=topk)
    topk_ids_probs = {
        idx.item(): round(prob.item() * 100, 4)
        for prob, idx in zip(topk_probs, topk_ids)
    }
    topk_ids = set(topk_ids.tolist())

    if 'code_file' in args['eval']:
        code_raw_file = args['eval']['code_file']
    else:
        default_dir = args['task']['data'][:args['task']['data'].
                                           rfind('retrieval')]
        code_raw_file = os.path.join(default_dir, "attributes",
                                     task.args['dataset']['langs'][0],
                                     f"test.code")

    out = []
    with open(code_raw_file, 'r') as reader:
        for idx, line in enumerate(reader):
            if idx in topk_ids:
                out.append([line, topk_ids_probs[idx]])
                if len(out) == len(topk_ids):
                    break
    out = sorted(out, key=lambda code_prob: code_prob[-1], reverse=True)
    return out