Ejemplo n.º 1
0
def create_agent_from_opt_file(opt: Opt):
    """
    Load agent options and module from file if opt file exists.

    Checks to see if file exists opt['model_file'] + ".opt"; if so, load up the
    options from the file and use that to create an agent, loading the model
    type from that file and overriding any options specified in that file when
    instantiating the agent.

    If that file does not exist, return None.
    """
    model_file = opt['model_file']
    optfile = model_file + '.opt'
    if os.path.isfile(optfile):
        new_opt = load_opt_file(optfile)
        # TODO we need a better way to say these options are never copied...
        if 'datapath' in new_opt:
            # never use the datapath from an opt dump
            del new_opt['datapath']
        if 'batchindex' in new_opt:
            # This saved variable can cause trouble if we switch to BS=1 at test time
            del new_opt['batchindex']
        # only override opts specified in 'override' dict
        if opt.get('override'):
            for k, v in opt['override'].items():
                if str(v) != str(new_opt.get(k, None)):
                    print("[ warning: overriding opt['{}'] to {} ("
                          "previously: {} )]".format(k, v,
                                                     new_opt.get(k, None)))
                new_opt[k] = v

        model_class = load_agent_module(new_opt['model'])

        # check for model version
        if hasattr(model_class, 'model_version'):
            curr_version = new_opt.get('model_version', 0)
            if curr_version != model_class.model_version():
                model = new_opt['model']
                m = ('It looks like you are trying to load an older version of'
                     ' the selected model. Change your model argument to use '
                     'the old version from parlai/agents/legacy_agents: for '
                     'example: `-m legacy:{m}:{v}` or '
                     '`--model parlai.agents.legacy_agents.{m}.{m}_v{v}:{c}`')
                if '.' not in model:
                    # give specific error message if it's easy
                    raise RuntimeError(
                        m.format(m=model,
                                 v=curr_version,
                                 c=model_class.__name__))
                else:
                    # otherwise generic one
                    raise RuntimeError(
                        m.format(m='modelname', v=curr_version,
                                 c='ModelAgent'))

        if hasattr(model_class, 'upgrade_opt'):
            new_opt = model_class.upgrade_opt(new_opt)

        # add model arguments to new_opt if they aren't in new_opt already
        for k, v in opt.items():
            if k not in new_opt:
                new_opt[k] = v
        new_opt['model_file'] = model_file
        if not new_opt.get('dict_file'):
            new_opt['dict_file'] = model_file + '.dict'
        elif new_opt.get('dict_file') and not os.path.isfile(
                new_opt['dict_file']):
            old_dict_file = new_opt['dict_file']
            new_opt['dict_file'] = model_file + '.dict'
        if not os.path.isfile(new_opt['dict_file']):
            warn_once(
                'WARNING: Neither the specified dict file ({}) nor the '
                '`model_file`.dict file ({}) exists, check to make sure either '
                'is correct. This may manifest as a shape mismatch later '
                'on.'.format(old_dict_file, new_opt['dict_file']))

        # if we want to load weights from --init-model, compare opts with
        # loaded ones
        compare_init_model_opts(opt, new_opt)
        return model_class(new_opt)
    else:
        return None
Ejemplo n.º 2
0
    def _setup_data(self, opt):
        counts = {
            'partner': {
                gend_utils.UNKNOWN: 0,
                gend_utils.FEM: 0,
                gend_utils.MASC: 0
            },
            'self': {
                gend_utils.UNKNOWN: 0,
                gend_utils.FEM: 0,
                gend_utils.MASC: 0
            },
        }

        dt = opt['datatype'].split(':')[0]
        if dt == 'test':
            warn_once('No test set; switching to valid')
            dt = 'valid'

        # build data
        print('[ Building data ... ]')
        new_eps = []
        orig_teacher = OrigConvai2Teacher(opt)
        total_exs = orig_teacher.num_examples()
        num_exs = 0
        while num_exs < total_exs:
            current_episode = []
            episode_done = False

            while not episode_done:
                # TODO: eventually all teachers should return Messages, so
                # we should assert this
                action = Message(orig_teacher.act())
                current_episode.append(action)
                episode_done = action.get('episode_done', False)
                num_exs += 1

            # now we have the entire episode,... do something
            first_ex = current_episode[0]
            first_ex_text = []
            partner_persona = []
            your_persona = []
            for line in first_ex['text'].split('\n'):
                # NOTE: we flip "your" and "partner" here since we are taking the 'text'
                # field instead of the 'label'
                if 'partner\'s persona: ' in line:
                    your_persona.append(line.split('partner\'s persona: ')[1])
                elif 'your persona: ' in line:
                    partner_persona.append(line.split('your persona: ')[1])
                else:
                    first_ex_text.append(line)

            your, your_prob, partner, partner_prob = self.get_genders(
                your_persona, partner_persona)

            for i, ex in enumerate(current_episode):
                counts['self'][your] += 1
                counts['partner'][partner] += 1
                if i == 0:
                    text = '\n'.join(first_ex_text)
                else:
                    text = ex['text']
                new_ex = {
                    'text': text,
                    'episode_done': True,
                    'your_persona': '\n'.join(your_persona),
                    'partner_persona': '\n'.join(partner_persona),
                    'id': 'ConvAI2 Gender',
                }
                if not self.use_probably:
                    new_ex['partner_prob'] = partner_prob
                    new_ex['your_prob'] = your_prob

                if your is not None and self.labels_to_use != 'partner':
                    # Get the your task
                    labels = [f'SELF:{your}']
                    your_ex = deepcopy(new_ex)
                    your_ex['labels'] = labels
                    your_ex['class_type'] = 'self'
                    new_eps.append(your_ex)

                if partner is not None and self.labels_to_use != 'self':
                    # Get the partner task
                    labels = [f'PARTNER:{partner}']
                    partner_ex = deepcopy(new_ex)
                    partner_ex['labels'] = labels
                    partner_ex['class_type'] = 'partner'
                    new_eps.append(partner_ex)

        if self.labels_to_use == 'all' and self.add_unknown_classes:
            # load about data
            all_about_data = gend_utils.get_inferred_about_data(
                self.opt['task'], self.opt)
            sample_rate = self.opt['unknown_temp']
            if sample_rate < 1.0:
                to_samp = int(sample_rate * len(all_about_data))
                sampled = random.sample(all_about_data, to_samp)
                new_eps += sampled
            else:
                new_eps += all_about_data

        if self.is_train:
            random.shuffle(new_eps)

        self.data = new_eps
        print(f'Missing cnt: {self.missing_cnt} / {len(self.data) * 2}')
        for x in ['self', 'partner']:
            print(f'Totals for {x}:')
            subtot = sum(counts[x].values())
            for k, v in counts[x].items():
                print(f'\t{k}: {v} ({v / subtot})')
Ejemplo n.º 3
0
    def forward(self, input, positions=None, segments=None):
        """
        Forward pass.

        :param LongTensor[batch,seqlen] input:
            The input IDs
        :param BoolTensor[batch,seqlen] mask:
            The attention mask; 1 means attend, 0 means ignore.
        :param LongTensor[batch,seqlen]:
            If provided, additionally adds ``segments`` as extra embedding features.
        """
        mask = input != self.padding_idx
        if positions is None:
            positions = (mask.cumsum(dim=1, dtype=torch.int64) -
                         1).clamp_(min=0)
        tensor = self.embeddings(input)
        if self.embeddings_scale:
            tensor = tensor * np.sqrt(self.dim)

        if positions.max().item() > self.n_positions:
            warn_once(
                'You are inputting a sequence of {x} length, but only have '
                '--n-positions {y}. Set --truncate or increase --n-positions'.
                format(x=positions.max().item(), y=self.n_positions))
        position_embs = self.position_embeddings(positions).expand_as(tensor)
        tensor = tensor + position_embs

        if self.n_segments >= 1:
            if segments is None:
                segments = torch.zeros_like(input)
            tensor = tensor + self.segment_embeddings(segments)

        if self.variant == 'xlm':
            tensor = _normalize(tensor, self.norm_embeddings)

        # --dropout on the embeddings
        tensor = self.dropout(tensor)

        tensor *= mask.unsqueeze(-1).type_as(tensor)

        if getattr(self.layers, 'is_model_parallel', False):
            # factored out for readability. It is equivalent to the other
            # condition
            tensor = self._apply_model_parallel(tensor, mask)
        else:
            for i in range(self.n_layers):
                tensor = self.layers[i](tensor, mask)

        if self.variant == 'prelayernorm':
            tensor = _normalize(tensor, self.norm_embeddings)
        tensor *= self.output_scaling
        if self.reduction_type == 'first':
            return tensor[:, 0, :]
        elif self.reduction_type == 'max':
            return tensor.max(dim=1)[0]
        elif self.reduction_type == 'mean':
            divisor = mask.float().sum(dim=1).unsqueeze(-1).clamp(
                min=1).type_as(tensor)
            output = tensor.sum(dim=1) / divisor
            return output
        elif self.reduction_type is None or 'none' in self.reduction_type:
            return tensor, mask
        else:
            raise ValueError("Can't handle --reduction-type {}".format(
                self.reduction_type))
Ejemplo n.º 4
0
    def __init__(self, opt: Opt, shared=None):
        init_model, self.is_finetune = self._get_init_model(opt, shared)
        super().__init__(opt, shared)

        # set up classes
        if opt.get('classes') is None and opt.get('classes_from_file') is None:
            raise RuntimeError(
                'Must specify --classes or --classes-from-file argument.')
        if not shared:
            if opt['classes_from_file'] is not None:
                with open(opt['classes_from_file']) as f:
                    self.class_list = f.read().splitlines()
            else:
                self.class_list = opt['classes']
            self.class_dict = {val: i for i, val in enumerate(self.class_list)}
            if opt.get('class_weights', None) is not None:
                self.class_weights = opt['class_weights']
            else:
                self.class_weights = [1.0 for c in self.class_list]
            self.reset_metrics()
        else:
            self.class_list = shared['class_list']
            self.class_dict = shared['class_dict']
            self.class_weights = shared['class_weights']

        # get reference class; if opt['get_all_metrics'] is False, this is
        # used to compute metrics
        # in binary classfication, opt['threshold'] applies to ref class
        if opt['ref_class'] is None or opt['ref_class'] not in self.class_dict:
            self.ref_class = self.class_list[0]
        else:
            self.ref_class = opt['ref_class']
            ref_class_id = self.class_list.index(self.ref_class)
            if ref_class_id != 0:
                # move to the front of the class list
                self.class_list.insert(0, self.class_list.pop(ref_class_id))
        if not opt['get_all_metrics']:
            warn_once('Using %s as the class for computing P, R, and F1' %
                      self.ref_class)

        # set up threshold, only used in binary classification
        if len(self.class_list) == 2 and opt.get('threshold', 0.5) != 0.5:
            self.threshold = opt['threshold']
        else:
            self.threshold = None

        # set up model and optimizers

        if shared:
            self.model = shared['model']
        else:
            self.model = self.build_model()
            self.criterion = self.build_criterion()
            if self.model is None or self.criterion is None:
                raise AttributeError(
                    'build_model() and build_criterion() need to return the model or criterion'
                )
            if self.use_cuda:
                self.model.cuda()
                self.criterion.cuda()
            if init_model:
                print('Loading existing model parameters from ' + init_model)
                self.load(init_model)
            if self.use_cuda:
                if self.opt['data_parallel']:
                    if is_distributed():
                        raise ValueError(
                            'Cannot combine --data-parallel and distributed mode'
                        )
                    self.model = torch.nn.DataParallel(self.model)
        if shared:
            # We don't use get here because hasattr is used on optimizer later.
            if 'optimizer' in shared:
                self.optimizer = shared['optimizer']
        else:
            optim_params = [
                p for p in self.model.parameters() if p.requires_grad
            ]
            self.init_optim(optim_params)
            self.build_lr_scheduler()
Ejemplo n.º 5
0
    def __init__(
        self,
        opt: Opt,
        embedding: Optional[nn.Embedding] = None,
        n_positions: Optional[int] = None,
    ):
        super().__init__()

        def _default(val, default):
            return val if val is not None else default

        self.embedding_size = opt['embedding_size']
        self.ffn_size = opt['ffn_size']
        self.n_layers = (opt['n_decoder_layers']
                         if opt.get('n_decoder_layers', -1) > 0 else
                         opt['n_layers'])
        self.n_heads = opt['n_heads']
        self.dim = self.embedding_size
        self.activation = opt.get('activation', 'relu')
        self.variant = opt.get('variant', 'aiayn')

        self.embeddings_scale = opt.get('embeddings_scale', True)
        dropout_frac = opt.get('dropout', 0.0)
        self.dropout = nn.Dropout(p=dropout_frac)  # --dropout

        self.n_positions = _default(n_positions,
                                    get_n_positions_from_options(opt))
        self.out_dim = self.embedding_size
        assert (self.embedding_size % self.n_heads == 0
                ), 'Transformer embedding size must be a multiple of n_heads'

        self.embeddings = embedding

        if (self.variant == 'xlm' or self.variant == 'prelayernorm'
                or self.variant == 'bart'):
            self.norm_embeddings = torch.nn.LayerNorm(self.dim,
                                                      eps=LAYER_NORM_EPS)
            if self.variant == 'xlm':
                warn_once(
                    'DEPRECATED: XLM should only be used for backwards compatibility, '
                    'as it involves a less-stable layernorm operation.')
        elif self.variant == 'aiayn':
            pass
        else:
            raise ValueError("Can't handle --variant {}".format(self.variant))

        # create the positional embeddings
        self.position_embeddings = nn.Embedding(self.n_positions,
                                                self.embedding_size)
        if not opt.get('learn_positional_embeddings', False):
            create_position_codes(
                self.n_positions,
                self.embedding_size,
                out=self.position_embeddings.weight,
            )
        else:
            nn.init.normal_(self.position_embeddings.weight, 0,
                            self.embedding_size**-0.5)

        # build the model
        self.layers = nn.ModuleList()
        for _ in range(self.n_layers):
            self.layers.append(
                TransformerDecoderLayer(
                    self.n_heads,
                    self.embedding_size,
                    self.ffn_size,
                    attention_dropout=opt.get('attention_dropout', 0.0),
                    relu_dropout=opt.get('relu_dropout', 0.0),
                    dropout=dropout_frac,
                    activation=self.activation,
                    variant=self.variant,
                ))
Ejemplo n.º 6
0
    def load_from_chunk(self, chunk_idx: int):
        """
        [Abstract] Given the chunk index, load examples from that chunk.

        Return a list of tuples. The function `_create_message` will take these tuples
        to form the Message object that is returned by the teacher.
        """
        output = []
        chunk_path = self.chunk_idx_to_file[chunk_idx]

        extra_data = []
        with open(chunk_path) as wf:
            for article_json in wf:
                article = json.loads(article_json)
                title = article['title']
                text = article['text']

                title = title.split(' (')[0]
                is_person = check_if_person(title)
                if not is_person:
                    continue

                gender = get_gender(text)

                label = f'ABOUT:{gender}'
                for par in text.split('\n'):
                    if par:
                        output.append((par, title, label, gender, 'about'))
                        self.counts[gender] += 1

                        if self.add_unknown_classes:
                            extra_data.append((
                                par,
                                title,
                                f'SELF:{gend_utils.UNKNOWN}',
                                gender,
                                'self',
                            ))
                            extra_data.append((
                                par,
                                title,
                                f'PARTNER:{gend_utils.NEUTRAL}',
                                gender,
                                'partner',
                            ))

        if len(extra_data) > 0:
            # possibly sample unknown classes
            sample_rate = self.opt['unknown_temp']
            if sample_rate < 1.0:
                to_samp = int(sample_rate * len(extra_data))
                sampled = random.sample(extra_data, to_samp)
                output += sampled
            else:
                output += extra_data

        if DEBUG:
            print('\n\nGender count update:')
            for k, v in self.counts.items():
                print(f'{k}: {v}')

        if (self.is_train
                and self.opt['balance']) or (self.is_valid
                                             and self.opt['balance_valid']):
            exclude_lst = [
                f'ABOUT:{gend_utils.NONBINARY}',
                f'SELF:{gend_utils.UNKNOWN}',
                f'PARTNER:{gend_utils.NEUTRAL}',
            ]  # not enough of each of these examples to balance
            output = gend_utils.balance_data(output,
                                             key=2,
                                             exclude_labels=exclude_lst)

        if len(output) == 0:
            warn_once(f'CHUNK {chunk_idx} is empty')

        return output
Ejemplo n.º 7
0
    def lr_scheduler_factory(cls, opt, optimizer, states, hard_reset=False):
        """
        Create the learning rate scheduler, and assign it to self.scheduler. This
        scheduler will be updated upon a call to receive_metrics. May also create
        self.warmup_scheduler, if appropriate.

        :param opt opt:
            Arguments received by torch_agent
        :param optimizer optimizer:
            Optimizer being used for training. May be wrapped in
            fp16_optimizer_wrapper depending on whether fp16 is used.
        :param state_dict states:
            Possible state_dict provided by model checkpoint, for restoring
            LR state.
        :param bool hard_reset:
            If true, the LR scheduler should ignore the state dictionary.
        :return: ParlAILRScheduler object
        """

        patience = opt.get('lr_scheduler_patience', 3)
        decay = opt.get('lr_scheduler_decay', 0.5)
        warmup_updates = opt.get('warmup_updates', -1)
        warmup_rate = opt.get('warmup_rate', 1e-4)
        max_lr_steps = opt.get('max_train_steps', -1)
        if opt.get('max_lr_steps', -1) > 0:
            raise ValueError(
                '--max-lr-steps is **DEPRECATED**; please set --max-train-steps directly'
            )
        invsqrt_lr_decay_gamma = opt.get('invsqrt_lr_decay_gamma', -1)

        if opt.get('lr_scheduler') == 'none':
            return None
        elif decay == 1.0:
            warn_once(
                "Your LR decay is set to 1.0. Assuming you meant you wanted "
                "to disable learning rate scheduling. Adjust --lr-scheduler-decay "
                "if this is not correct.")
            return None
        elif opt.get('lr_scheduler') == 'reduceonplateau':
            scheduler = ReduceOnPlateauLRScheduler(optimizer, hard_reset,
                                                   patience, decay,
                                                   warmup_updates, warmup_rate)
        elif opt.get('lr_scheduler') == 'fixed':
            scheduler = FixedLRScheduler(optimizer, hard_reset, patience,
                                         decay, warmup_updates, warmup_rate)
        elif opt.get('lr_scheduler') == 'invsqrt':
            scheduler = InvSqrtLRScheduler(
                optimizer,
                hard_reset,
                patience,
                decay,
                warmup_updates,
                warmup_rate,
                invsqrt_lr_decay_gamma,
                max_lr_steps,
            )
        elif opt.get('lr_scheduler') == 'cosine':
            scheduler = CosineLRScheduler(
                optimizer,
                hard_reset,
                patience,
                decay,
                warmup_updates,
                warmup_rate,
                max_lr_steps,
            )
        elif opt.get('lr_scheduler') == 'linear':
            scheduler = LinearLRScheduler(
                optimizer,
                hard_reset,
                patience,
                decay,
                warmup_updates,
                warmup_rate,
                max_lr_steps,
            )
        else:
            raise ValueError(
                "Don't know what to do with --lr-scheduler '{}'".format(
                    opt.get('lr_scheduler')))

        # time to load LR state from the checkpoint, if possible.
        if (
                # there is already an old LR scheduler saved on disk
                states
                # and there was a scheduler in the dump
                and 'lr_scheduler_type' in states
                # and the old LR scheduler is different
                and states.get('lr_scheduler_type') != opt['lr_scheduler']
                # and we're not already using a fresh scheduler
                and not hard_reset):
            # the LR scheduler changed, start things fresh
            warn_once(
                f"LR scheduler ({opt['lr_scheduler']}) is different from saved "
                f"({states.get('lr_scheduler_type')}). Starting fresh!")
            hard_reset = True

        if not hard_reset:
            # do the actual loading (if possible)
            scheduler.load_state(states)

        # setup warmup scheduler after loading saved scheduler
        scheduler._init_warmup_scheduler(optimizer, states)

        return scheduler
Ejemplo n.º 8
0
    def forward(self, input, encoder_state, embedded_input=None, incr_state=None):
        """
        Forward pass with the ability to pass in token-embedded inputs.
        """

        encoder_output, encoder_mask = encoder_state

        if input is not None:
            seq_len = input.size(1)
            positions = input.new(seq_len).long()
        else:
            seq_len = embedded_input.size(1)
            positions = embedded_input.new(seq_len).long()
        positions = torch.arange(seq_len, out=positions).unsqueeze(0)

        if incr_state is not None:
            # We're doing incremental decoding, so select only the most recent position
            if input is not None:
                input = input[:, -1:]
            if embedded_input is not None:
                embedded_input = embedded_input[:, -1:, :]
            if positions is not None:
                positions = positions[:, -1:]
        else:
            incr_state = {}

        if embedded_input is not None:
            tensor = embedded_input  # No need to copy because we only reassign below
        else:
            tensor = self.embeddings(input)
        if self.embeddings_scale:
            tensor = tensor * np.sqrt(self.dim)
        if self.variant == 'xlm':
            tensor = _normalize(tensor, self.norm_embeddings)
        if positions.max().item() > self.n_positions:
            warn_once(
                'You are inputting a sequence of {x} length, but only have '
                '--n-positions {y}. Set --truncate or increase --n-positions'.format(
                    x=positions.max().item(), y=self.n_positions
                )
            )
        tensor = tensor + self.position_embeddings(positions).expand_as(tensor)
        tensor = self.dropout(tensor)  # --dropout

        new_incr_state = {}
        if getattr(self.layers, 'is_model_parallel', False):
            tensor, new_incr_state = self._apply_model_parallel(
                tensor, encoder_output, encoder_mask, incr_state
            )
        else:
            for idx, layer in enumerate(self.layers):
                tensor, new_incr_state[idx] = layer(
                    x=tensor,
                    encoder_output=encoder_output,
                    encoder_mask=encoder_mask,
                    incr_state=incr_state.get(idx),
                )

        if self.variant == 'prelayernorm':
            tensor = _normalize(tensor, self.norm_embeddings)

        return tensor, new_incr_state
Ejemplo n.º 9
0
def warn(txt, act, opt):
    if opt.get('display_examples'):
        print(txt + ":\n" + str(act))
    else:
        warn_once(txt)
Ejemplo n.º 10
0
    def train(self):
        """
        Perform a training run.

        :return: tuple of reports (validation_report, test_report)
        """
        if is_distributed():
            warn_once(
                "Distributed training outputs average-per-worker metrics during "
                "training, and may be slightly distorted. Validation/test are "
                "unadulterated.")
        opt = self.opt
        world = self.world
        with world:
            while True:
                # do one example / batch of examples
                try:
                    world.parley()
                except StopTrainException:
                    if is_distributed():
                        raise RuntimeError(
                            "StopTrainException not supported for "
                            "distributed mode")
                    break

                self.parleys += 1

                # get the total training examples done, compute epochs
                self._total_epochs = (
                    self._preempted_epochs +
                    num_workers() * self.world.get_total_epochs())
                exs_per_epoch = self.world.num_examples()
                self._total_exs = int(
                    np.round(self._total_epochs * exs_per_epoch))

                # and use the primary worker's timings for everything
                train_time, log_time, validate_time = sync_object((
                    self.train_time.time(),
                    self.log_time.time(),
                    self.validate_time.time(),
                ))

                # check counters and timers
                if self._total_epochs >= self.max_num_epochs:
                    self.log()
                    print(
                        '[ num_epochs completed:{} time elapsed:{}s ]'.format(
                            self.max_num_epochs, train_time))
                    break
                if train_time > self.max_train_time:
                    print('[ max_train_time elapsed:{}s ]'.format(train_time))
                    break
                if log_time > self.log_every_n_secs:
                    self.log()
                if (validate_time > self.val_every_n_secs
                        or self._total_epochs - self.last_valid_epoch >=
                        self.val_every_n_epochs):
                    try:
                        stop_training = self.validate()
                    except StopTrainException:
                        if is_distributed():
                            raise RuntimeError(
                                "StopTrainException not "
                                "supported for distributed mode")
                        break
                    self.last_valid_epoch = self._total_epochs
                    if stop_training:
                        break
                if (self.save_time.time() > self.save_every_n_secs
                        and opt.get('model_file') and is_primary_worker()):
                    print("[ saving model checkpoint: {}.checkpoint".format(
                        opt['model_file']))
                    self.save_model('.checkpoint')
                    self.save_time.reset()

        if not self.saved and is_primary_worker():
            # save agent
            self.save_model()
        elif opt.get('model_file'):
            # reload best validation model
            self.agent = create_agent(opt)

        valid_worlds = _maybe_load_eval_worlds(self.agent, opt, 'valid')
        max_exs = opt['validation_max_exs'] if opt.get(
            'short_final_eval') else -1
        v_report = run_eval(valid_worlds,
                            opt,
                            'valid',
                            max_exs,
                            write_log=True)
        test_worlds = _maybe_load_eval_worlds(self.agent, opt, 'test')
        t_report = run_eval(test_worlds, opt, 'test', max_exs, write_log=True)
        if valid_worlds:
            for valid_world in valid_worlds:
                valid_world.shutdown()
        if test_worlds:
            for test_world in test_worlds:
                test_world.shutdown()

        print_announcements(opt)

        return v_report, t_report
Ejemplo n.º 11
0
    def __init__(self, opt, shared=None):
        opt['batch_sort'] = False
        super().__init__(opt, shared)
        self.use_batch_act = self.bsz > 1
        self.num_workers = opt['numworkers']
        self.batch_sort = (opt.get('pytorch_teacher_batch_sort')
                           and 'train' in self.datatype)
        self.batch_cache_type = opt.get('batch_sort_cache_type')
        self.batch_sort_field = opt.get('batch_sort_field')
        # One can specify a collate function to use for preparing a batch
        self.opt = opt.copy()
        self.is_shared = shared is not None
        dataset_classes = self._get_dataset_class(opt)
        self.ordered = 'ordered' in self.datatype or (
            'stream' in self.datatype and not opt.get('shuffle'))
        if self.ordered:
            # force index for ordered, so that we see every example
            warn_once('\nNote: You are using PytorchDataTeacher with ordered '
                      'examples. Please specify `--shuffle` if you would like '
                      'to have examples loaded in randomized order.\n')
            self.batch_cache_type = 'index'

        if not shared:
            BatchSortCache.create()
            if len(dataset_classes) > 1:
                datasets = []
                for class_name, collate_fn, task_name in dataset_classes:
                    dataset_opt = opt.copy()
                    dataset_opt['pytorch_teacher_task'] = task_name
                    dataset_opt['task'] = task_name
                    datasets.append(class_name(dataset_opt))
                    self.collate_fn = collate_fn
                self.id = ','.join([d[2] for d in dataset_classes])
                self.dataset = ParlAIConcatDataset(datasets)
            else:
                class_name, self.collate_fn, task_name = dataset_classes[0]
                self.id = task_name
                self.dataset = class_name(opt)
            if self.ordered or not self.training:
                data_sampler = sampler.SequentialSampler(self.dataset)
            else:
                data_sampler = sampler.RandomSampler(self.dataset)

            self.pytorch_dataloader = DataLoader(
                self.dataset,
                batch_size=self.bsz,
                sampler=data_sampler,
                num_workers=self.num_workers,
                collate_fn=self.collate_fn,
                pin_memory=False,
                drop_last=False,
            )

            self.lastYs = [None] * self.bsz
            if self.batch_sort:
                self.loader_process = LoaderProcess(opt)
                self.loader_process.start()
            self.data = enumerate(self.pytorch_dataloader)
        else:
            self.dataset = shared['dataset']
            self.pytorch_dataloader = shared['pytorch_dataloader']
            self.lastYs = shared['lastYs']
            self.data = shared['data']
            self.id = shared['id']

        self.num_batches = math.ceil(self.dataset.num_episodes() / self.bsz)
        self.reset()
Ejemplo n.º 12
0
import math
from typing import Dict, Tuple, Optional

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from parlai.core.torch_generator_agent import TorchGeneratorModel
from parlai.utils.misc import warn_once
from parlai.utils.torch import neginf

try:
    from apex.normalization.fused_layer_norm import FusedLayerNorm as LayerNorm
except ImportError:
    warn_once("Installing APEX can give a significant speed boost.")
    from torch.nn import LayerNorm

LAYER_NORM_EPS = 1e-5  # Epsilon for layer norm.


def _normalize(tensor, norm_layer):
    """
    Broadcast layer norm.
    """
    size = tensor.size()
    return norm_layer(tensor.view(-1, size[-1])).view(size)


def _create_embeddings(dictionary, embedding_size, padding_idx):
    """
Ejemplo n.º 13
0
def self_chat(opt, print_parser=None):
    if print_parser is not None:
        if print_parser is True and isinstance(opt, ParlaiParser):
            print_parser = opt
        elif print_parser is False:
            print_parser = None
    if isinstance(opt, ParlaiParser):
        print(
            '[ Deprecated Warning: self_chat should be passed opt not Parser ]'
        )
        opt = opt.parse_args()

    random.seed(opt['seed'])
    # Create models
    agent1 = create_agent(opt, requireModelExists=True)
    agent2 = agent1.clone()
    if hasattr(agent2, 'id'):
        agent2.id = agent2.id + "2"

    # Check for `selfchat` in the task name
    if 'selfchat' not in opt['task']:
        warn_once(
            'You are using self chat with task {}. '.format(opt['task']) +
            'If your task has an existing self chat world, then run with '
            '-t {}:selfchat'.format(opt['task']))

    world = create_task(opt, [agent1, agent2])

    if print_parser:
        # Show arguments after loading model
        print_parser.opt = agent1.opt
        print_parser.print_args()

    # set up logging
    log_every_n_secs = opt.get('log_every_n_secs', -1)
    if log_every_n_secs <= 0:
        log_every_n_secs = float('inf')
    log_time = TimeLogger()
    logger = WorldLogger(opt)

    # Run some self chats.
    max_cnt = int(opt['num_examples'] * opt.get('selfchat_max_turns') /
                  opt.get('batchsize'))
    cnt = 0
    for _ in tqdm.trange(max_cnt):
        cnt += opt.get('batchsize', 1)
        world.parley()
        logger.log(world)

        if opt.get('display_examples'):
            print(world.display())
        if log_time.time() > log_every_n_secs:
            text = log_time.log(cnt, max_cnt)
            print(text)

    if opt.get('display_examples'):
        print('-- end of episode --')

    logger.reset_world()  # flush last episode
    indent = opt['indent'] if opt['indent'] >= 0 else None
    logger.write(opt['outfile'], opt['format'], indent=indent)
    return logger.get_logs()
    def validate(self):
        """
        Perform a validation run, checking whether we should stop training.

        :return: boolean indicating whether training should stop
        :rtype: bool
        """
        opt = self.opt

        if self.valid_worlds is None:
            # we need to load the world now
            self.valid_worlds = load_eval_worlds(self.agent, opt, 'valid')

        # run evaluation on valid set
        # TODO(MW): replace sync_object with self._sync_metrics. You'll need some
        # logic to handle 'validation_max_exs' properly
        valid_report = run_eval(self.valid_worlds, opt, 'valid',
                                opt['validation_max_exs'])
        v = valid_report.copy()
        v['train_time'] = self.train_time.time()
        self.valid_reports.append(v)
        # logging
        if opt['tensorboard_log'] and is_primary_worker():
            valid_report['total_exs'] = self._total_exs
            self.tb_logger.log_metrics('valid', self.parleys, valid_report)
            # flush on a validation
            self.tb_logger.flush()
        # saving
        if (opt.get('model_file') and opt.get('save_after_valid')
                and is_primary_worker()):
            print("[ saving model checkpoint: " + opt['model_file'] +
                  ".checkpoint ]")
            self.save_model('.checkpoint')

        # send valid metrics to agent if the agent wants them
        if hasattr(self.agent, 'receive_metrics'):
            self.agent.receive_metrics(valid_report)

        # --------------- change by hengyicai -------------------------
        teacher_agent = self.return_teacher_agent()
        if teacher_agent:
            teacher_agent.receive_metrics(valid_report)
        # --------------- change by hengyicai -------------------------

        # check which metric to look at
        new_valid = valid_report[opt['validation_metric']]

        if isinstance(new_valid, Metric):
            new_valid = new_valid.value()

        # check if this is the best validation so far
        if (self.best_valid is None or self.valid_optim * new_valid >
                self.valid_optim * self.best_valid):
            print('[ new best {}: {}{} ]'.format(
                opt['validation_metric'],
                new_valid,
                ' (previous best was {})'.format(self.best_valid)
                if self.best_valid is not None else '',
            ))
            self.best_valid = new_valid
            self.impatience = 0
            if opt.get('model_file') and is_primary_worker():
                print("[ saving best valid model: " + opt['model_file'] + " ]")
                self.save_model()
                self.saved = True
            if (opt['validation_metric'] == 'accuracy'
                    and self.best_valid >= opt['validation_cutoff']):
                print('[ task solved! stopping. ]')
                return True
        else:
            self.impatience += 1
            print('[ did not beat best {}: {} impatience: {} ]'.format(
                opt['validation_metric'], round(self.best_valid, 4),
                self.impatience))
        # --------------- change by hengyicai -------------------------
        if self.opt.get('cutoff_metric_name', 'none') != 'none':
            cutoff_metric_name = self.opt['cutoff_metric_name']
            cutoff_metric_val = self.opt['cutoff_metric_val']
            if cutoff_metric_name in valid_report and cutoff_metric_val > 0:
                if valid_report[cutoff_metric_name] >= cutoff_metric_val:
                    print('[ {} >= {}, stopping. ]'.format(
                        cutoff_metric_name, cutoff_metric_val))
                    return True
            elif cutoff_metric_name not in valid_report:
                warn_once('[ {} is not in the validation report!'
                          'can not do metric cutoff stopping! ]'.format(
                              cutoff_metric_name))
            else:
                warn_once('[ you asked to do metric cutoff stopping,'
                          'but the cutoff_metric_val <= 0! ]')

        # --------------- change by hengyicai -------------------------
        self.validate_time.reset()

        # check if we are out of patience
        if (opt['validation_patience'] > 0
                and self.impatience >= opt['validation_patience']):
            print('[ ran out of patience! stopping training. ]')
            return True
        return False
Ejemplo n.º 15
0
def create_agent_from_opt_file(opt: Opt):
    """
    Load agent options and module from file if opt file exists.

    Checks to see if file exists opt['model_file'] + ".opt"; if so, load up the
    options from the file and use that to create an agent, loading the model
    type from that file and overriding any options specified in that file when
    instantiating the agent.

    If that file does not exist, return None.
    """
    model_file = opt['model_file']
    optfile = model_file + '.opt'

    if not PathManager.exists(optfile):
        return None

    opt_from_file = Opt.load(optfile)

    # delete args that we do not want to copy over when loading the model
    for arg in NOCOPY_ARGS:
        if arg in opt_from_file:
            del opt_from_file[arg]

    # only override opts specified in 'override' dict
    if opt.get('override'):
        for k, v in opt['override'].items():
            if k in opt_from_file and str(v) != str(opt_from_file.get(k)):
                logging.warn(
                    f'Overriding opt["{k}"] to {v} (previously: {opt_from_file.get(k)})'
                )
            opt_from_file[k] = v

    model_class = load_agent_module(opt_from_file['model'])

    if hasattr(model_class, 'upgrade_opt'):
        opt_from_file = model_class.upgrade_opt(opt_from_file)

    # add model arguments to opt_from_file if they aren't in opt_from_file already
    for k, v in opt.items():
        if k not in opt_from_file:
            opt_from_file[k] = v

    opt_from_file['model_file'] = model_file  # update model file path

    # update dict file path
    if not opt_from_file.get('dict_file'):
        old_dict_file = None
        opt_from_file['dict_file'] = model_file + '.dict'
    elif opt_from_file.get('dict_file') and not PathManager.exists(
        opt_from_file['dict_file']
    ):
        old_dict_file = opt_from_file['dict_file']
        opt_from_file['dict_file'] = model_file + '.dict'
    if not PathManager.exists(opt_from_file['dict_file']):
        warn_once(
            'WARNING: Neither the specified dict file ({}) nor the '
            '`model_file`.dict file ({}) exists, check to make sure either '
            'is correct. This may manifest as a shape mismatch later '
            'on.'.format(old_dict_file, opt_from_file['dict_file'])
        )

    # if we want to load weights from --init-model, compare opts with
    # loaded ones
    compare_init_model_opts(opt, opt_from_file)
    return model_class(opt_from_file)
Ejemplo n.º 16
0
    def _build_candidates(self, batch, source, mode):
        """
        Build a candidate set for this batch.

        :param batch:
            a Batch object (defined in torch_agent.py)
        :param source:
            the source from which candidates should be built, one of
            ['batch', 'batch-all-cands', 'inline', 'fixed']
        :param mode:
            'train' or 'eval'

        :return: tuple of tensors (label_inds, cands, cand_vecs)

            label_inds: A [bsz] LongTensor of the indices of the labels for each
                example from its respective candidate set
            cands: A [num_cands] list of (text) candidates
                OR a [batchsize] list of such lists if source=='inline'
            cand_vecs: A padded [num_cands, seqlen] LongTensor of vectorized candidates
                OR a [batchsize, num_cands, seqlen] LongTensor if source=='inline'

        Possible sources of candidates:

            * batch: the set of all labels in this batch
                Use all labels in the batch as the candidate set (with all but the
                example's label being treated as negatives).
                Note: with this setting, the candidate set is identical for all
                examples in a batch. This option may be undesirable if it is possible
                for duplicate labels to occur in a batch, since the second instance of
                the correct label will be treated as a negative.
            * batch-all-cands: the set of all candidates in this batch
                Use all candidates in the batch as candidate set.
                Note 1: This can result in a very large number of candidates.
                Note 2: In this case we will deduplicate candidates.
                Note 3: just like with 'batch' the candidate set is identical
                for all examples in a batch.
            * inline: batch_size lists, one list per example
                If each example comes with a list of possible candidates, use those.
                Note: With this setting, each example will have its own candidate set.
            * fixed: one global candidate list, provided in a file from the user
                If self.fixed_candidates is not None, use a set of fixed candidates for
                all examples.
                Note: this setting is not recommended for training unless the
                universe of possible candidates is very small.
            * vocab: one global candidate list, extracted from the vocabulary with the
                exception of self.NULL_IDX.
        """
        label_vecs = batch.label_vec  # [bsz] list of lists of LongTensors
        label_inds = None
        batchsize = (batch.text_vec.size(0)
                     if batch.text_vec is not None else batch.image.size(0))

        if label_vecs is not None:
            assert label_vecs.dim() == 2

        if source == 'batch':
            warn_once(
                '[ Executing {} mode with batch labels as set of candidates. ]'
                ''.format(mode))
            if batchsize == 1:
                warn_once(
                    "[ Warning: using candidate source 'batch' and observed a "
                    "batch of size 1. This may be due to uneven batch sizes at "
                    "the end of an epoch. ]")
            if label_vecs is None:
                raise ValueError(
                    "If using candidate source 'batch', then batch.label_vec cannot be "
                    "None.")

            cands = batch.labels
            cand_vecs = label_vecs
            label_inds = label_vecs.new_tensor(range(batchsize))

        elif source == 'batch-all-cands':
            warn_once(
                '[ Executing {} mode with all candidates provided in the batch ]'
                ''.format(mode))
            if batch.candidate_vecs is None:
                raise ValueError(
                    "If using candidate source 'batch-all-cands', then batch."
                    "candidate_vecs cannot be None. If your task does not have "
                    "inline candidates, consider using one of "
                    "--{m}={{'batch','fixed','vocab'}}."
                    "".format(m='candidates' if mode ==
                              'train' else 'eval-candidates'))
            # initialize the list of cands with the labels
            cands = []
            all_cands_vecs = []
            # dictionary used for deduplication
            cands_to_id = {}
            for i, cands_for_sample in enumerate(batch.candidates):
                for j, cand in enumerate(cands_for_sample):
                    if cand not in cands_to_id:
                        cands.append(cand)
                        cands_to_id[cand] = len(cands_to_id)
                        all_cands_vecs.append(batch.candidate_vecs[i][j])
            cand_vecs, _ = padded_tensor(
                all_cands_vecs,
                self.NULL_IDX,
                use_cuda=self.use_cuda,
                fp16friendly=self.fp16,
            )
            label_inds = label_vecs.new_tensor(
                [cands_to_id[label] for label in batch.labels])

        elif source == 'inline':
            warn_once(
                '[ Executing {} mode with provided inline set of candidates ]'
                ''.format(mode))
            if batch.candidate_vecs is None:
                raise ValueError(
                    "If using candidate source 'inline', then batch.candidate_vecs "
                    "cannot be None. If your task does not have inline candidates, "
                    "consider using one of --{m}={{'batch','fixed','vocab'}}."
                    "".format(m='candidates' if mode ==
                              'train' else 'eval-candidates'))

            cands = batch.candidates
            cand_vecs = padded_3d(
                batch.candidate_vecs,
                self.NULL_IDX,
                use_cuda=self.use_cuda,
                fp16friendly=self.fp16,
            )
            if label_vecs is not None:
                label_inds = label_vecs.new_empty((batchsize))
                bad_batch = False
                for i, label_vec in enumerate(label_vecs):
                    label_vec_pad = label_vec.new_zeros(
                        cand_vecs[i].size(1)).fill_(self.NULL_IDX)
                    if cand_vecs[i].size(1) < len(label_vec):
                        label_vec = label_vec[0:cand_vecs[i].size(1)]
                    label_vec_pad[0:label_vec.size(0)] = label_vec
                    label_inds[i] = self._find_match(cand_vecs[i],
                                                     label_vec_pad)
                    if label_inds[i] == -1:
                        bad_batch = True
                if bad_batch:
                    if self.ignore_bad_candidates and not self.is_training:
                        label_inds = None
                    else:
                        raise RuntimeError(
                            'At least one of your examples has a set of label candidates '
                            'that does not contain the label. To ignore this error '
                            'set `--ignore-bad-candidates True`.')

        elif source == 'fixed':
            if self.fixed_candidates is None:
                raise ValueError(
                    "If using candidate source 'fixed', then you must provide the path "
                    "to a file of candidates with the flag --fixed-candidates-path or "
                    "the name of a task with --fixed-candidates-task.")
            warn_once(
                "[ Executing {} mode with a common set of fixed candidates "
                "(n = {}). ]".format(mode, len(self.fixed_candidates)))

            cands = self.fixed_candidates
            cand_vecs = self.fixed_candidate_vecs

            if label_vecs is not None:
                label_inds = label_vecs.new_empty((batchsize))
                bad_batch = False
                for batch_idx, label_vec in enumerate(label_vecs):
                    max_c_len = cand_vecs.size(1)
                    label_vec_pad = label_vec.new_zeros(max_c_len).fill_(
                        self.NULL_IDX)
                    if max_c_len < len(label_vec):
                        label_vec = label_vec[0:max_c_len]
                    label_vec_pad[0:label_vec.size(0)] = label_vec
                    label_inds[batch_idx] = self._find_match(
                        cand_vecs, label_vec_pad)
                    if label_inds[batch_idx] == -1:
                        bad_batch = True
                if bad_batch:
                    if self.ignore_bad_candidates and not self.is_training:
                        label_inds = None
                    else:
                        raise RuntimeError(
                            'At least one of your examples has a set of label candidates '
                            'that does not contain the label. To ignore this error '
                            'set `--ignore-bad-candidates True`.')

        elif source == 'vocab':
            warn_once(
                '[ Executing {} mode with tokens from vocabulary as candidates. ]'
                ''.format(mode))
            cands = self.vocab_candidates
            cand_vecs = self.vocab_candidate_vecs
            # NOTE: label_inds is None here, as we will not find the label in
            # the set of vocab candidates
        else:
            raise Exception("Unrecognized source: %s" % source)

        return (cands, cand_vecs, label_inds)
    def eval_step(self, batch):
        """
        Evaluate a single batch of examples.
        """
        if batch.text_vec is None and batch.image is None:
            return
        if batch.text_vec is not None:
            bsz = batch.text_vec.size(0)
        else:
            bsz = len(batch.image)
        self.model.eval()
        cand_scores = None
        token_losses = None

        if batch.label_vec is not None:
            # calculate loss on targets with teacher forcing
            loss, model_output = self.compute_loss(batch, return_output=True)
            if self.output_token_losses:
                token_losses = self._construct_token_losses(
                    batch.label_vec, model_output)

        preds = None
        if self.skip_generation:
            warn_once(
                "--skip-generation does not produce accurate metrics beyond ppl",
                RuntimeWarning,
            )
        else:
            maxlen = self.label_truncate or 256
            beam_preds_scores, _ = self._generate(batch, self.beam_size,
                                                  maxlen)
            preds, scores = zip(*beam_preds_scores)

        cand_choices = None
        # TODO: abstract out the scoring here
        if self.rank_candidates:
            # compute roughly ppl to rank candidates
            cand_choices = []
            encoder_states = self.model.encoder(*self._encoder_input(batch))
            for i in range(bsz):
                num_cands = len(batch.candidate_vecs[i])
                enc = self.model.reorder_encoder_states(
                    encoder_states, [i] * num_cands)
                cands, _ = self._pad_tensor(batch.candidate_vecs[i])
                scores, _ = self.model.decode_forced(enc, cands)
                cand_losses = F.cross_entropy(
                    scores.view(num_cands * cands.size(1), -1),
                    cands.view(-1),
                    reduction='none',
                ).view(num_cands, cands.size(1))
                # now cand_losses is cands x seqlen size, but we still need to
                # check padding and such
                mask = (cands != self.NULL_IDX).float()
                cand_scores = (cand_losses *
                               mask).sum(dim=1) / (mask.sum(dim=1) + 1e-9)
                _, ordering = cand_scores.sort()
                cand_choices.append([batch.candidates[i][o] for o in ordering])

        text = [self._v2t(p) for p in preds] if preds is not None else None
        if text and self.compute_tokenized_bleu:
            # compute additional bleu scores
            self._compute_fairseq_bleu(batch, preds)
            self._compute_nltk_bleu(batch, text)
        return Output(text, cand_choices, token_losses=token_losses)
Ejemplo n.º 18
0
    def __init__(
        self,
        n_heads,
        n_layers,
        embedding_size,
        ffn_size,
        vocabulary_size,
        embedding=None,
        dropout=0.0,
        attention_dropout=0.0,
        relu_dropout=0.0,
        embeddings_scale=True,
        learn_positional_embeddings=False,
        padding_idx=None,
        n_positions=1024,
        n_segments=0,
        variant='aiayn',
        activation='relu',
    ):
        super().__init__()
        self.embedding_size = embedding_size
        self.ffn_size = ffn_size
        self.n_layers = n_layers
        self.n_heads = n_heads
        self.dim = embedding_size
        self.activation = activation
        self.variant = variant

        self.embeddings_scale = embeddings_scale
        self.dropout = nn.Dropout(p=dropout)  # --dropout

        self.n_positions = n_positions
        self.out_dim = embedding_size
        assert (
            embedding_size % n_heads == 0
        ), 'Transformer embedding size must be a multiple of n_heads'

        self.embeddings = embedding

        if (
            self.variant == 'xlm'
            or self.variant == 'prelayernorm'
            or self.variant == 'bart'
        ):
            self.norm_embeddings = LayerNorm(self.dim, eps=LAYER_NORM_EPS)
            if self.variant == 'xlm':
                warn_once(
                    'DEPRECATED: XLM should only be used for backwards compatibility, '
                    'as it involves a less-stable layernorm operation.'
                )
        elif self.variant == 'aiayn':
            pass
        else:
            raise ValueError("Can't handle --variant {}".format(self.variant))

        # create the positional embeddings
        self.position_embeddings = nn.Embedding(n_positions, embedding_size)
        if not learn_positional_embeddings:
            create_position_codes(
                n_positions, embedding_size, out=self.position_embeddings.weight
            )
        else:
            nn.init.normal_(self.position_embeddings.weight, 0, embedding_size ** -0.5)

        # build the model
        self.layers = nn.ModuleList()
        for _ in range(self.n_layers):
            self.layers.append(
                TransformerDecoderLayer(
                    n_heads,
                    embedding_size,
                    ffn_size,
                    attention_dropout=attention_dropout,
                    relu_dropout=relu_dropout,
                    dropout=dropout,
                    activation=activation,
                    variant=variant,
                )
            )