Esempio n. 1
0
    def __init__(self,
                 num_layers,
                 model_dim: int = Ref("exp_global.default_layer_dim"),
                 emb_dropout: float = Ref("exp_global.dropout", 0.1),
                 residual_dropout: float = Ref("exp_global.dropout", 0.1),
                 positional_encoding: PositionalEncoding = bare(SinusoidalPositionalEncoding),
                 attention=bare(MultiHeadAttention, num_heads=8),
                 feed_forward=bare(FeedForward, hidden_dim=2048),
                 batch_first=False,
                 masked_layers=False,
                 gated_residuals=False,
                 checkpointing_every: Optional[int] = None):
        super().__init__()
        self.model_dim: int = model_dim
        self.batch_first = batch_first
        self.masked_layers = masked_layers
        self.gated_residuals = gated_residuals

        self.preprocess = PrePostProcessing(model_dim, 'd', emb_dropout)
        self.postprocess = PrePostProcessing(model_dim, 'n', masking=masked_layers)

        self.layers = nn.ModuleList([
            TransformerEncoderLayer(
                model_dim=model_dim,
                dropout=residual_dropout,
                feed_forward=feed_forward if i == 0 else feed_forward.clone(),
                attention=attention if i == 0 else attention.clone(),
                masked_layers=masked_layers,
                gated_residuals=gated_residuals
            )
            for i in range(num_layers)
        ])

        self.positional_encoding = positional_encoding
        self.checkpointing_every = checkpointing_every
Esempio n. 2
0
 def __init__(self,
              in_features,
              out_features,
              bias=True,
              weight_norm=False,
              initializer=bare(XavierUniform),
              bias_initializer=bare(ConstantInitializer, val=0)):
     self.initializer = initializer
     self.bias_initializer = bias_initializer
     super().__init__(in_features, out_features, bias)
     self.weight_norm = weight_norm
     if weight_norm:
         nn.utils.weight_norm(self, name='weight')
 def __init__(self, max_length, model_dim=Ref("exp_global.default_layer_dim"), batch_first=True,
              initializer: Initializer = bare(XavierUniform)):
     super().__init__(model_dim, batch_first)
     self.max_length = max_length
     self.pos_emb = Parameter(torch.zeros(max_length, model_dim))
     self.initializer = initializer
     self.reset_parameters()
Esempio n. 4
0
    def __init__(self,
                 vocab: Vocab = None,
                 dim: int = Ref("exp_global.default_layer_dim"),
                 initializer: Initializer = bare(XavierUniform),
                 bias=True,
                 bias_initializer=bare(ConstantInitializer, val=0),
                 multiple: int = Ref("exp_global.multiple", 1),
                 embedding: DenseWordEmbedding = None):
        super().__init__()
        self._shared = embedding is not None
        self._vocab = vocab
        self._embedding = embedding
        self._dim = dim
        self.multiple = multiple

        if self._vocab is not None:
            assert self._embedding is None and self._dim is not None
            size = len(self._vocab)
            if size % multiple != 0:
                size += multiple - (size % multiple)
            self.weights = Parameter(torch.empty(len(self._vocab), self._dim))
            self.vocab_size = len(self._vocab)
        else:
            assert self._embedding is not None
            self.weights = self._embedding.weights
            if hasattr(self._embedding, "multiple"):
                self.multiple = self._embedding.multiple
                self.vocab_size = len(self._embedding.vocab)
            else:
                self.vocab_size = None

        if bias:
            self.bias = Parameter(torch.empty(self.weights.size(0)))
        else:
            self.register_parameter('bias', None)

        self.initializer = initializer
        self.bias_initializer = bias_initializer

        self.reset_parameters()

        self._register_load_state_dict_pre_hook(self._load_params)
Esempio n. 5
0
    def __init__(self,
                 vocab: Vocab,
                 dim: int = Ref("exp_global.default_layer_dim"),
                 initializer=bare(NormalInitializer),
                 dropout: float = Ref("exp_global.dropout", 0.0),
                 multiple: int = Ref("exp_global.multiple", 1)):
        super().__init__()
        self.vocab = vocab
        self.dim = dim
        self.multiple = multiple
        self.initializer = initializer
        self.dropout = dropout

        self.size = len(self.vocab)
        if self.size % self.multiple != 0:
            self.size += self.multiple - (self.size % self.multiple)
        self.embeddings = Parameter(torch.empty(self.size, self.dim))

        self.reset_parameters()

        self._register_load_state_dict_pre_hook(self._load_params)
Esempio n. 6
0
 def __init__(self,
              dataset: Dataset,
              metrics: Union[Evaluator, Sequence[Evaluator]],
              search_strategy: SearchStrategy = bare(BeamSearch,
                                                     beam_size=5),
              model: AutoregressiveModel = Ref("model"),
              name="dev",
              report_dir=Ref("exp_global.report_dir"),
              result_path: Optional[str] = None,
              print_output=False,
              report_every=100):
     super().__init__(name, report_dir)
     self.dataset = dataset
     if not isinstance(metrics, Sequence):
         metrics = [metrics]
     self.metrics = metrics
     self.model = model
     self.search_strategy = search_strategy
     self.result_path = result_path
     self.print_output = print_output
     self.result_file = None
     self.report_every = report_every