コード例 #1
0
    def __init__(self,
                 labels: Union[List[str], List[int]],
                 sequence_field: SequenceField,
                 label_namespace: str = 'labels',
                 strip_sentence_symbols: bool = False) -> None:
        self.labels = labels
        self.sequence_field = sequence_field
        self._label_namespace = label_namespace
        self._indexed_labels = None
        self._maybe_warn_for_namespace(label_namespace)
        if len(labels) != sequence_field.sequence_length(
        ) and not strip_sentence_symbols:
            raise ConfigurationError(
                "Label length and sequence length "
                "don't match: %d and %d" %
                (len(labels), sequence_field.sequence_length()))

        if all([isinstance(x, int) for x in labels]):
            self._indexed_labels = labels

        elif not all([isinstance(x, str) for x in labels]):
            raise ConfigurationError(
                "SequenceLabelFields must be passed either all "
                "strings or all ints. Found labels {} with "
                "types: {}.".format(labels, [type(x) for x in labels]))
コード例 #2
0
ファイル: train.py プロジェクト: qolina/arglinking
def create_serialization_dir(params: Params) -> None:
    """
    This function creates the serialization directory if it doesn't exist.  If it already exists
    and is non-empty, then it verifies that we're recovering from a training with an identical configuration.
    Parameters
    ----------
    params: ``Params``
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir: ``str``
        The directory in which to save results and logs.
    recover: ``bool``
        If ``True``, we will try to recover from an existing serialization directory, and crash if
        the directory doesn't exist, or doesn't match the configuration we're given.
    """
    serialization_dir = params['environment']['serialization_dir']
    recover = params['environment']['recover']
    if os.path.exists(serialization_dir) and os.listdir(serialization_dir):
        if not recover:
            raise ConfigurationError(
                f"Serialization directory ({serialization_dir}) already exists and is "
                f"not empty. Specify --recover to recover training from existing output."
            )

        logger.info(f"Recovering from prior training at {serialization_dir}.")

        recovered_config_file = os.path.join(serialization_dir, CONFIG_NAME)
        if not os.path.exists(recovered_config_file):
            raise ConfigurationError(
                "The serialization directory already exists but doesn't "
                "contain a config.json. You probably gave the wrong directory."
            )
        else:
            loaded_params = Params.from_file(recovered_config_file)
            if not params['environment'].get('ignore_params_check', False):
                if params != loaded_params:
                    raise ConfigurationError(
                        "Training configuration does not match the configuration we're "
                        "recovering from.")

            # In the recover mode, we don't need to reload the pre-trained embeddings.
            remove_pretrained_embedding_params(params)
    else:
        if recover:
            raise ConfigurationError(
                f"--recover specified but serialization_dir ({serialization_dir}) "
                "does not exist.  There is nothing to recover from.")
        os.makedirs(serialization_dir, exist_ok=True)
        params.to_file(os.path.join(serialization_dir, CONFIG_NAME))
コード例 #3
0
    def read(self, file_path: str) -> Iterable[Instance]:
        """
        Returns an ``Iterable`` containing all the instances
        in the specified dataset.

        If ``self.lazy`` is False, this calls ``self._read()``,
        ensures that the result is a list, then returns the resulting list.

        If ``self.lazy`` is True, this returns an object whose
        ``__iter__`` method calls ``self._read()`` each iteration.
        In this case your implementation of ``_read()`` must also be lazy
        (that is, not load all instances into memory at once), otherwise
        you will get a ``ConfigurationError``.

        In either case, the returned ``Iterable`` can be iterated
        over multiple times. It's unlikely you want to override this function,
        but if you do your result should likewise be repeatedly iterable.
        """
        lazy = getattr(self, 'lazy', None)
        if lazy is None:
            logger.warning(
                "DatasetReader.lazy is not set, "
                "did you forget to call the superclass constructor?")

        if lazy:
            return _LazyInstances(lambda: iter(self._read(file_path)))
        else:
            instances = self._read(file_path)
            if not isinstance(instances, list):
                instances = [instance for instance in Tqdm.tqdm(instances)]
            if not instances:
                raise ConfigurationError(
                    "No instances were read from the given filepath {}. "
                    "Is the path correct?".format(file_path))
            return instances
コード例 #4
0
ファイル: stacked_bilstm.py プロジェクト: qolina/arglinking
    def forward(
            self,  # pylint: disable=arguments-differ
            inputs: PackedSequence,
            initial_state: Optional[Tuple[torch.Tensor, torch.Tensor]] = None):
        """
        Parameters
        ----------
        inputs : ``PackedSequence``, required.
            A batch first ``PackedSequence`` to run the stacked LSTM over.
        initial_state : Tuple[torch.Tensor, torch.Tensor], optional, (default = None)
            A tuple (state, memory) representing the initial hidden state and memory
            of the LSTM. Each tensor has shape (1, batch_size, output_dimension * 2).
        Returns
        -------
        output_sequence : PackedSequence
            The encoded sequence of shape (batch_size, sequence_length, hidden_size * 2)
        final_states: torch.Tensor
            The per-layer final (state, memory) states of the LSTM, each with shape
            (num_layers, batch_size, hidden_size * 2).
        """
        if not initial_state:
            hidden_states = [None] * len(self.lstm_layers)
        elif initial_state[0].size()[0] != len(self.lstm_layers):
            raise ConfigurationError(
                "Initial states were passed to forward() but the number of "
                "initial states does not match the number of layers.")
        else:
            hidden_states = list(
                zip(initial_state[0].split(1, 0), initial_state[1].split(1,
                                                                         0)))

        output_sequence = inputs
        final_states = []
        for i, state in enumerate(hidden_states):
            forward_layer = getattr(self, 'forward_layer_{}'.format(i))
            backward_layer = getattr(self, 'backward_layer_{}'.format(i))
            # The state is duplicated to mirror the Pytorch API for LSTMs.
            forward_output, final_forward_state = forward_layer(
                output_sequence, state)
            backward_output, final_backward_state = backward_layer(
                output_sequence, state)

            forward_output, lengths = pad_packed_sequence(forward_output,
                                                          batch_first=True)
            backward_output, _ = pad_packed_sequence(backward_output,
                                                     batch_first=True)

            output_sequence = torch.cat([forward_output, backward_output], -1)
            output_sequence = pack_padded_sequence(output_sequence,
                                                   lengths,
                                                   batch_first=True)
            final_states.append(
                (torch.cat(both_direction_states,
                           -1) for both_direction_states in zip(
                               final_forward_state, final_backward_state)))

        final_state_tuple = [
            torch.cat(state_list, 0) for state_list in zip(*final_states)
        ]
        return output_sequence, final_state_tuple
コード例 #5
0
ファイル: environment.py プロジェクト: qolina/arglinking
def check_for_gpu(params) -> object:
    device_id = params['cuda_device']
    if device_id is not None and device_id >= cuda.device_count():
        raise ConfigurationError(
            "Experiment specified a GPU but none is available;"
            " if you want to run on CPU use the override"
            " 'trainer.cuda_device=-1' in the json config file.")
コード例 #6
0
    def __init__(self, index: int, sequence_field: SequenceField) -> None:
        self.sequence_index = index
        self.sequence_field = sequence_field

        if not isinstance(index, int):
            raise ConfigurationError("IndexFields must be passed integer indices. "
                                     "Found index: {} with type: {}.".format(index, type(index)))
コード例 #7
0
 def add_subclass_to_registry(subclass: Type[T]):
     # Add to registry, raise an error if key has already been used.
     if name in registry:
         message = "Cannot register %s as %s; name already in use for %s" % (
             name, cls.__name__, registry[name].__name__)
         raise ConfigurationError(message)
     registry[name] = subclass
     return subclass
コード例 #8
0
 def count_vocab_items(self, token: Token, counter: Dict[str, Dict[str,
                                                                   int]]):
     if token.text is None:
         raise ConfigurationError(
             'TokenCharactersIndexer needs a tokenizer that retains text')
     for character in self._character_tokenizer.tokenize(token.text):
         # If `text_id` is set on the character token (e.g., if we're using byte encoding), we
         # will not be using the vocab for this character.
         if getattr(character, 'text_id', None) is None:
             counter[self._namespace][character.text] += 1
コード例 #9
0
    def __init__(self,
                 input_dim: int,
                 num_layers: int,
                 hidden_dims: Union[int, Sequence[int]],
                 activations = str,
                 dropout: Union[float, Sequence[float]] = 0.0) -> None:

        super(FeedForward, self).__init__()
        # A better way would be to use registrable/from_params.
        if activations == "none":
            activations = lambda x: x
        elif activations == "relu":
            activations = torch.nn.functional.relu
        elif activations == "tanh":
            activations = torch.nn.functional.tanh
        else:
            raise ConfigurationError("{} is not a defined activation. Add it here".format(activations))

        if not isinstance(hidden_dims, list):
            hidden_dims = [hidden_dims] * num_layers  # type: ignore
        if not isinstance(activations, list):
            activations = [activations] * num_layers  # type: ignore
        if not isinstance(dropout, list):
            dropout = [dropout] * num_layers  # type: ignore
        if len(hidden_dims) != num_layers:
            raise ConfigurationError("len(hidden_dims) (%d) != num_layers (%d)" %
                                     (len(hidden_dims), num_layers))
        if len(activations) != num_layers:
            raise ConfigurationError("len(activations) (%d) != num_layers (%d)" %
                                     (len(activations), num_layers))
        if len(dropout) != num_layers:
            raise ConfigurationError("len(dropout) (%d) != num_layers (%d)" %
                                     (len(dropout), num_layers))
        self._activations = activations
        input_dims = [input_dim] + hidden_dims[:-1]
        linear_layers = []
        for layer_input_dim, layer_output_dim in zip(input_dims, hidden_dims):
            linear_layers.append(torch.nn.Linear(layer_input_dim, layer_output_dim))
        self._linear_layers = torch.nn.ModuleList(linear_layers)
        dropout_layers = [torch.nn.Dropout(p=value) for value in dropout]
        self._dropout = torch.nn.ModuleList(dropout_layers)
        self._output_dim = hidden_dims[-1]
        self.input_dim = input_dim
コード例 #10
0
ファイル: dataset.py プロジェクト: qolina/arglinking
 def _check_types(self) -> None:
     """
     Check that all the instances have the same types.
     """
     all_instance_fields_and_types: List[Dict[str, str]] = [{k: v.__class__.__name__
                                                             for k, v in x.fields.items()}
                                                            for x in self.instances]
     # Check all the field names and Field types are the same for every instance.
     if not all([all_instance_fields_and_types[0] == x for x in all_instance_fields_and_types]):
         raise ConfigurationError("You cannot construct a Batch with non-homogeneous Instances.")
コード例 #11
0
    def __init__(self, tokens: List[Token],
                 token_indexers: Dict[str, TokenIndexer]) -> None:
        self.tokens = tokens
        self._token_indexers = token_indexers
        self._indexed_tokens: Optional[Dict[str, TokenList]] = None
        self._indexer_name_to_indexed_token: Optional[Dict[str,
                                                           List[str]]] = None

        if not all([isinstance(x, (Token, SpacyToken)) for x in tokens]):
            raise ConfigurationError("TextFields must be passed Tokens. "
                                     "Found: {} with types {}.".format(
                                         tokens, [type(x) for x in tokens]))
コード例 #12
0
    def list_available(cls) -> List[str]:
        """List default first if it exists"""
        keys = list(Registrable._registry[cls].keys())
        default = cls.default_implementation

        if default is None:
            return keys
        elif default not in keys:
            message = "Default implementation %s is not registered" % default
            raise ConfigurationError(message)
        else:
            return [default] + [k for k in keys if k != default]
コード例 #13
0
ファイル: label_field.py プロジェクト: qolina/arglinking
    def __init__(self,
                 label: Union[str, int],
                 label_namespace: str = 'labels',
                 skip_indexing: bool = False) -> None:
        self.label = label
        self._label_namespace = label_namespace
        self._label_id = None
        self._maybe_warn_for_namespace(label_namespace)

        if skip_indexing:
            if not isinstance(label, int):
                raise ConfigurationError(
                    "In order to skip indexing, your labels must be integers. "
                    "Found label = {}".format(label))
            else:
                self._label_id = label
        else:
            if not isinstance(label, str):
                raise ConfigurationError(
                    "LabelFields must be passed a string label if skip_indexing=False. "
                    "Found label: {} with type: {}.".format(
                        label, type(label)))
コード例 #14
0
    def forward(
        self,
        tensors: List[torch.Tensor],  # pylint: disable=arguments-differ
        mask: torch.Tensor = None
    ) -> torch.Tensor:
        """
        Compute a weighted average of the ``tensors``.  The input tensors an be any shape
        with at least two dimensions, but must all be the same shape.

        When ``do_layer_norm=True``, the ``mask`` is required input.  If the ``tensors`` are
        dimensioned  ``(dim_0, ..., dim_{n-1}, dim_n)``, then the ``mask`` is dimensioned
        ``(dim_0, ..., dim_{n-1})``, as in the typical case with ``tensors`` of shape
        ``(batch_size, timesteps, dim)`` and ``mask`` of shape ``(batch_size, timesteps)``.

        When ``do_layer_norm=False`` the ``mask`` is ignored.
        """
        if len(tensors) != self.mixture_size:
            raise ConfigurationError(
                "{} tensors were passed, but the module was initialized to "
                "mix {} tensors.".format(len(tensors), self.mixture_size))

        def _do_layer_norm(tensor, broadcast_mask, num_elements_not_masked):
            tensor_masked = tensor * broadcast_mask
            mean = torch.sum(tensor_masked) / num_elements_not_masked
            variance = torch.sum(((tensor_masked - mean) * broadcast_mask)**
                                 2) / num_elements_not_masked
            return (tensor - mean) / torch.sqrt(variance + 1E-12)

        normed_weights = torch.nn.functional.softmax(torch.cat(
            [parameter for parameter in self.scalar_parameters]),
                                                     dim=0)
        normed_weights = torch.split(normed_weights, split_size_or_sections=1)

        if not self.do_layer_norm:
            pieces = []
            for weight, tensor in zip(normed_weights, tensors):
                pieces.append(weight * tensor)
            return self.gamma * sum(pieces)

        else:
            mask_float = mask.float()
            broadcast_mask = mask_float.unsqueeze(-1)
            input_dim = tensors[0].size(-1)
            num_elements_not_masked = torch.sum(mask_float) * input_dim

            pieces = []
            for weight, tensor in zip(normed_weights, tensors):
                pieces.append(weight * _do_layer_norm(tensor, broadcast_mask,
                                                      num_elements_not_masked))
            return self.gamma * sum(pieces)
コード例 #15
0
ファイル: from_params.py プロジェクト: qolina/arglinking
def takes_arg(obj, arg: str) -> bool:
    """
    Checks whether the provided obj takes a certain arg.
    If it's a class, we're really checking whether its constructor does.
    If it's a function or method, we're checking the object itself.
    Otherwise, we raise an error.
    """
    if inspect.isclass(obj):
        signature = inspect.signature(obj.__init__)
    elif inspect.ismethod(obj) or inspect.isfunction(obj):
        signature = inspect.signature(obj)
    else:
        raise ConfigurationError(f"object {obj} is not callable")
    return arg in signature.parameters
コード例 #16
0
def block_orthogonal(tensor: torch.Tensor,
                     split_sizes: List[int],
                     gain: float = 1.0) -> None:
    """
    An initializer which allows initializing model parameters in "blocks". This is helpful
    in the case of recurrent models which use multiple gates applied to linear projections,
    which can be computed efficiently if they are concatenated together. However, they are
    separate parameters which should be initialized independently.

    Parameters
    ----------
    tensor : ``torch.Tensor``, required.
        A tensor to initialize.
    split_sizes : List[int], required.
        A list of length ``tensor.ndim()`` specifying the size of the
        blocks along that particular dimension. E.g. ``[10, 20]`` would
        result in the tensor being split into chunks of size 10 along the
        first dimension and 20 along the second.
    gain : float, optional (default = 1.0)
        The gain (scaling) applied to the orthogonal initialization.
    """
    data = tensor.data
    sizes = list(tensor.size())
    if any([a % b != 0 for a, b in zip(sizes, split_sizes)]):
        raise ConfigurationError(
            "tensor dimensions must be divisible by their respective "
            "split_sizes. Found size: {} and split_sizes: {}".format(
                sizes, split_sizes))
    indexes = [
        list(range(0, max_size, split))
        for max_size, split in zip(sizes, split_sizes)
    ]
    # Iterate over all possible blocks within the tensor.
    for block_start_indices in itertools.product(*indexes):
        # A list of tuples containing the index to start at for this block
        # and the appropriate step size (i.e split_size[i] for dimension i).
        index_and_step_tuples = zip(block_start_indices, split_sizes)
        # This is a tuple of slices corresponding to:
        # tensor[index: index + step_size, ...]. This is
        # required because we could have an arbitrary number
        # of dimensions. The actual slices we need are the
        # start_index: start_index + step for each dimension in the tensor.
        block_slice = tuple([
            slice(start_index, start_index + step)
            for start_index, step in index_and_step_tuples
        ])
        data[block_slice] = torch.nn.init.orthogonal_(
            tensor[block_slice].contiguous(), gain=gain)
コード例 #17
0
    def __init__(self, module: torch.nn.Module, stateful: bool = False) -> None:
        super(PytorchSeq2SeqWrapper, self).__init__(stateful)
        self._module = module
        try:
            if not self._module.batch_first:
                raise ConfigurationError("Our encoder semantics assumes batch is always first!")
        except AttributeError:
            pass

        try:
            self._is_bidirectional = self._module.bidirectional
        except AttributeError:
            self._is_bidirectional = False
        if self._is_bidirectional:
            self._num_directions = 2
        else:
            self._num_directions = 1
コード例 #18
0
    def __call__(self, tensor: torch.Tensor, parameter_name: str,
                 **kwargs) -> None:  # type: ignore
        # Select the new parameter name if it's being overridden
        if parameter_name in self.parameter_name_overrides:
            parameter_name = self.parameter_name_overrides[parameter_name]

        # If the size of the source and destination tensors are not the
        # same, then we need to raise an error
        source_weights = self.weights[parameter_name]
        if tensor.data.size() != source_weights.size():
            raise ConfigurationError(
                "Incompatible sizes found for parameter %s. "
                "Found %s and %s" %
                (parameter_name, tensor.data.size(), source_weights.size()))

        # Copy the parameters from the source to the destination
        tensor.data[:] = source_weights[:]
コード例 #19
0
def _read_embeddings_from_hdf5(embeddings_filename: str,
                               embedding_dim: int,
                               vocab: Vocabulary,
                               namespace: str = "tokens",
                               amr: bool = False) -> torch.FloatTensor:
    """
    Reads from a hdf5 formatted file. The embedding matrix is assumed to
    be keyed by 'embedding' and of size ``(num_tokens, embedding_dim)``.
    """
    with h5py.File(embeddings_filename, 'r') as fin:
        embeddings = fin['embedding'][...]

    if list(embeddings.shape) != [vocab.get_vocab_size(namespace), embedding_dim]:
        raise ConfigurationError(
                "Read shape {0} embeddings from the file, but expected {1}".format(
                        list(embeddings.shape), [vocab.get_vocab_size(namespace), embedding_dim]))

    return torch.FloatTensor(embeddings)
コード例 #20
0
ファイル: model.py プロジェクト: qolina/arglinking
    def _get_prediction_device(self):
        """
        This method checks the device of the model parameters to determine the cuda_device
        this model should be run on for predictions.  If there are no parameters, it returns -1.
        Returns
        -------
        The cuda device this model should run on for predictions.
        """
        devices = {get_device_of(param) for param in self.parameters()}

        if len(devices) > 1:
            devices_string = ", ".join(str(x) for x in devices)
            raise ConfigurationError(
                f"Parameters have mismatching cuda_devices: {devices_string}")
        elif len(devices) == 1 and all(i >= 0 for i in devices):
            device = torch.device('cuda:{}'.format(devices.pop()))
        else:
            device = torch.device('cpu')
        return device
コード例 #21
0
 def tokens_to_indices(self, tokens: List[Token], vocabulary: Vocabulary,
                       index_name: str) -> Dict[str, List[List[int]]]:
     indices: List[List[int]] = []
     for token in tokens:
         token_indices: List[int] = []
         if token.text is None:
             raise ConfigurationError(
                 'TokenCharactersIndexer needs a tokenizer that retains text'
             )
         for character in self._character_tokenizer.tokenize(token.text):
             if getattr(character, 'text_id', None) is not None:
                 # `text_id` being set on the token means that we aren't using the vocab, we just
                 # use this id instead.
                 index = character.text_id
             else:
                 index = vocabulary.get_token_index(character.text,
                                                    self._namespace)
             token_indices.append(index)
         indices.append(token_indices)
     return {index_name: indices}
コード例 #22
0
ファイル: dataset.py プロジェクト: qolina/arglinking
    def print_statistics(self) -> None:
        # Make sure if has been indexed first
        sequence_field_lengths: Dict[str, List] = defaultdict(list)
        for instance in self.instances:
            if not instance.indexed:
                raise ConfigurationError("Instances must be indexed with vocabulary "
                                         "before asking to print dataset statistics.")
            for field, field_padding_lengths in instance.get_padding_lengths().items():
                for key, value in field_padding_lengths.items():
                    sequence_field_lengths[f"{field}.{key}"].append(value)

        print("\n\n----Dataset Statistics----\n")
        for name, lengths in sequence_field_lengths.items():
            print(f"Statistics for {name}:")
            print(f"\tLengths: Mean: {numpy.mean(lengths)}, Standard Dev: {numpy.std(lengths)}, "
                  f"Max: {numpy.max(lengths)}, Min: {numpy.min(lengths)}")

        print("\n10 Random instances: ")
        for i in list(numpy.random.randint(len(self.instances), size=10)):
            print(f"Instance {i}:")
            print(f"\t{self.instances[i]}")
コード例 #23
0
    def __init__(self,
                 sorting_keys: List[Tuple[str, str]],
                 padding_noise: float = 0.1,
                 biggest_batch_first: bool = False,
                 batch_size: int = 32,
                 instances_per_epoch: int = None,
                 max_instances_in_memory: int = None,
                 cache_instances: bool = False,
                 track_epoch: bool = False,
                 maximum_samples_per_batch: Tuple[str, int] = None) -> None:
        if not sorting_keys:
            raise ConfigurationError("BucketIterator requires sorting_keys to be specified")

        super().__init__(cache_instances=cache_instances,
                         track_epoch=track_epoch,
                         batch_size=batch_size,
                         instances_per_epoch=instances_per_epoch,
                         max_instances_in_memory=max_instances_in_memory,
                         maximum_samples_per_batch=maximum_samples_per_batch)
        self._sorting_keys = sorting_keys
        self._padding_noise = padding_noise
        self._biggest_batch_first = biggest_batch_first
コード例 #24
0
    def __init__(self,
                 num_embeddings: int,
                 embedding_dim: int,
                 projection_dim: int = None,
                 weight: torch.FloatTensor = None,
                 padding_index: int = None,
                 trainable: bool = True,
                 max_norm: float = None,
                 norm_type: float = 2.,
                 scale_grad_by_freq: bool = False,
                 sparse: bool = False) -> None:
        super(Embedding, self).__init__()
        self.num_embeddings = num_embeddings
        self.padding_index = padding_index
        self.max_norm = max_norm
        self.norm_type = norm_type
        self.scale_grad_by_freq = scale_grad_by_freq
        self.sparse = sparse
        self.trainable = trainable
        self.embedding_dim = embedding_dim

        self.output_dim = projection_dim or embedding_dim

        if weight is None:
            weight = torch.FloatTensor(num_embeddings, embedding_dim)
            self.weight = torch.nn.Parameter(weight, requires_grad=trainable)
            torch.nn.init.xavier_uniform_(self.weight)
        else:
            if weight.size() != (num_embeddings, embedding_dim):
                raise ConfigurationError("A weight matrix was passed with contradictory embedding shapes.")
            self.weight = torch.nn.Parameter(weight, requires_grad=trainable)

        if self.padding_index is not None:
            self.weight.data[self.padding_index].fill_(0)

        if projection_dim:
            self._projection = torch.nn.Linear(embedding_dim, projection_dim)
        else:
            self._projection = None
コード例 #25
0
    def __init__(self,
                 mixture_size: int,
                 do_layer_norm: bool = False,
                 initial_scalar_parameters: List[float] = None,
                 trainable: bool = True) -> None:
        super(ScalarMix, self).__init__()
        self.mixture_size = mixture_size
        self.do_layer_norm = do_layer_norm

        if initial_scalar_parameters is None:
            initial_scalar_parameters = [0.0] * mixture_size
        elif len(initial_scalar_parameters) != mixture_size:
            raise ConfigurationError(
                "Length of initial_scalar_parameters {} differs "
                "from mixture_size {}".format(initial_scalar_parameters,
                                              mixture_size))

        self.scalar_parameters = ParameterList([
            Parameter(torch.FloatTensor([initial_scalar_parameters[i]]),
                      requires_grad=trainable) for i in range(mixture_size)
        ])
        self.gamma = Parameter(torch.FloatTensor([1.0]),
                               requires_grad=trainable)
コード例 #26
0
    def __init__(self,
                 input_dim: int,
                 combination: str = "x,y",
                 num_width_embeddings: int = None,
                 span_width_embedding_dim: int = None,
                 bucket_widths: bool = False,
                 use_exclusive_start_indices: bool = False) -> None:
        super().__init__()
        self._input_dim = input_dim
        self._combination = combination
        self._num_width_embeddings = num_width_embeddings
        self._bucket_widths = bucket_widths

        self._use_exclusive_start_indices = use_exclusive_start_indices
        if use_exclusive_start_indices:
            self._start_sentinel = Parameter(torch.randn([1, 1, int(input_dim)]))

        if num_width_embeddings is not None and span_width_embedding_dim is not None:
            self._span_width_embedding = Embedding(num_width_embeddings, span_width_embedding_dim)
        elif not all([num_width_embeddings is None, span_width_embedding_dim is None]):
            raise ConfigurationError("To use a span width embedding representation, you must"
                                     "specify both num_width_buckets and span_width_embedding_dim.")
        else:
            self._span_width_embedding = None
コード例 #27
0
    def train(self):
        """Trains the supplied model with the supplied parameters.
        """
        try:
            epoch_counter, dev_metric_per_epoch = self._restore_checkpoint()
        except RuntimeError:
            traceback.print_exc()
            raise ConfigurationError(
                "Could not recover training from the checkpoint.  Did you mean to output to "
                "a different serialization directory or delete the existing serialization "
                "directory?")

        self._enable_gradient_clipping()

        logger.info('Start training...')

        # Init.
        training_start_time = time.time()
        epochs_trained_this_time = 0
        metrics = {}
        training_metrics = {}
        dev_metrics = {}
        is_best_so_far = True
        best_epoch_dev_metrics = {}

        for epoch in range(epoch_counter, self._num_epochs):
            epoch_start_time = time.time()
            training_metrics = self._train_epoch(epoch)
            # Validate on the dev set.
            if self._dev_dataset is not None:
                with torch.no_grad():
                    # Check if we want to do full evaluation (expensive)
                    if (self._cpu_eval_freq is not None
                            and (epoch % self._cpu_eval_freq
                                 == self._cpu_eval_freq - 1)):
                        # Perform full evaluation but do not use it for early stopping
                        _ = self._validate_dev(epoch,
                                               cpu_eval=True,
                                               outputdir=os.path.join(
                                                   self._serialization_dir,
                                                   "dev_predictions",
                                                   f"epoch_{epoch}"))
                    dev_metrics = self._validate_dev(
                        epoch,
                        outputdir=os.path.join(self._serialization_dir,
                                               "dev_predictions",
                                               f"epoch_{epoch}"))

                    # Check dev metric for early stopping
                    this_epoch_dev_metric = dev_metrics[self._dev_metric]

                    # Check dev metric to see if it's the best so far
                    is_best_so_far = self._is_best_so_far(
                        this_epoch_dev_metric, dev_metric_per_epoch)
                    if is_best_so_far:
                        best_epoch_dev_metrics = dev_metrics.copy()
                    dev_metric_per_epoch.append(this_epoch_dev_metric)
                    if self._should_stop_early(dev_metric_per_epoch):
                        logger.info("Ran out of patience.  Stopping training.")
                        break

            # Save status.
            self._save_checkpoint(epoch,
                                  dev_metric_per_epoch,
                                  is_best=is_best_so_far)
            self._metrics_to_tensorboard(epoch,
                                         training_metrics,
                                         dev_metrics=dev_metrics)
            self._metrics_to_console(training_metrics, dev_metrics=dev_metrics)
            self._tensorboard.add_dev_scalar('learning_rate',
                                             self._optimizer.lr, epoch)

            if is_best_so_far:
                # We may not have had validation data, so we need to hide this behind an if.
                metrics['best_epoch'] = epoch
                metrics.update({
                    f"best_dev_{k}": v
                    for k, v in best_epoch_dev_metrics.items()
                })

            # Estimate ETA.
            epoch_elapsed_time = time.time() - epoch_start_time
            logger.info(
                "Epoch duration: %s",
                time.strftime("%H:%M:%S", time.gmtime(epoch_elapsed_time)))

            if epoch < self._num_epochs - 1:
                training_elapsed_time = time.time() - training_start_time
                estimated_time_remaining = training_elapsed_time * \
                    ((self._num_epochs - epoch_counter) / float(epoch - epoch_counter + 1) - 1)
                formatted_time = str(
                    datetime.timedelta(seconds=int(estimated_time_remaining)))
                logger.info("Estimated training time remaining: %s",
                            formatted_time)

            epochs_trained_this_time += 1

        # Finish training, and summarize the status.
        training_elapsed_time = time.time() - training_start_time
        metrics.update(
            dict(training_duration=time.strftime(
                "%H:%M:%S", time.gmtime(training_elapsed_time)),
                 training_start_epoch=epoch_counter,
                 training_epochs=epochs_trained_this_time))
        for key, value in training_metrics.items():
            metrics["training_" + key] = value
        for key, value in dev_metrics.items():
            metrics["dev_" + key] = value
        return metrics
コード例 #28
0
 def by_name(cls: Type[T], name: str) -> Type[T]:
     logger.info(f"instantiating registered subclass {name} of {cls}")
     if name not in Registrable._registry[cls]:
         raise ConfigurationError("%s is not a registered name for %s" %
                                  (name, cls.__name__))
     return Registrable._registry[cls].get(name)
コード例 #29
0
    def get_padding_lengths(self) -> Dict[str, int]:
        """
        The ``TextField`` has a list of ``Tokens``, and each ``Token`` gets converted into arrays by
        (potentially) several ``TokenIndexers``.  This method gets the max length (over tokens)
        associated with each of these arrays.
        """
        # Our basic outline: we will iterate over `TokenIndexers`, and aggregate lengths over tokens
        # for each indexer separately.  Then we will combine the results for each indexer into a single
        # dictionary, resolving any (unlikely) key conflicts by taking a max.
        lengths = []
        if self._indexed_tokens is None:
            raise ConfigurationError(
                "You must call .index(vocabulary) on a "
                "field before determining padding lengths.")

        # Each indexer can return a different sequence length, and for indexers that return
        # multiple arrays each can have a different length.  We'll keep track of them here.
        for indexer_name, indexer in self._token_indexers.items():
            indexer_lengths = {}

            for indexed_tokens_key in self._indexer_name_to_indexed_token[
                    indexer_name]:
                # This is a list of dicts, one for each token in the field.
                token_lengths = [
                    indexer.get_padding_lengths(token)
                    for token in self._indexed_tokens[indexed_tokens_key]
                ]
            if not token_lengths:
                # This is a padding edge case and occurs when we want to pad a ListField of
                # TextFields. In order to pad the list field, we need to be able to have an
                # _empty_ TextField, but if this is the case, token_lengths will be an empty
                # list, so we add the default empty padding dictionary to the list instead.
                token_lengths = [{}]
            # Iterate over the keys and find the maximum token length.
            # It's fine to iterate over the keys of the first token since all tokens have the same keys.
            for key in token_lengths[0]:
                indexer_lengths[key] = max(x[key] if key in x else 0
                                           for x in token_lengths)
            lengths.append(indexer_lengths)

        indexer_sequence_lengths = {
            key: len(val)
            for key, val in self._indexed_tokens.items()
        }
        # Get the padding lengths for sequence lengths.
        if len(set(indexer_sequence_lengths.values())) == 1:
            # This is the default case where all indexers return the same length.
            # Keep the existing 'num_tokens' key for backward compatibility with existing config files.
            padding_lengths = {
                'num_tokens': list(indexer_sequence_lengths.values())[0]
            }
        else:
            # The indexers return different lengths.
            padding_lengths = indexer_sequence_lengths

        # Get all keys which have been used for padding for each indexer and take the max if there are duplicates.
        padding_keys = {key for d in lengths for key in d.keys()}
        for padding_key in padding_keys:
            padding_lengths[padding_key] = max(
                x[padding_key] if padding_key in x else 0 for x in lengths)
        return padding_lengths
コード例 #30
0
 def __iter__(self) -> Iterator[Instance]:
     instances = self.instance_generator()
     if isinstance(instances, list):
         raise ConfigurationError(
             "For a lazy dataset reader, _read() must return a generator")
     return instances