def __init__(self, num_layers, model_dim: int = Ref("exp_global.default_layer_dim"), emb_dropout: float = Ref("exp_global.dropout", 0.1), residual_dropout: float = Ref("exp_global.dropout", 0.1), positional_encoding: PositionalEncoding = bare(SinusoidalPositionalEncoding), attention=bare(MultiHeadAttention, num_heads=8), feed_forward=bare(FeedForward, hidden_dim=2048), batch_first=False, masked_layers=False, gated_residuals=False, checkpointing_every: Optional[int] = None): super().__init__() self.model_dim: int = model_dim self.batch_first = batch_first self.masked_layers = masked_layers self.gated_residuals = gated_residuals self.preprocess = PrePostProcessing(model_dim, 'd', emb_dropout) self.postprocess = PrePostProcessing(model_dim, 'n', masking=masked_layers) self.layers = nn.ModuleList([ TransformerEncoderLayer( model_dim=model_dim, dropout=residual_dropout, feed_forward=feed_forward if i == 0 else feed_forward.clone(), attention=attention if i == 0 else attention.clone(), masked_layers=masked_layers, gated_residuals=gated_residuals ) for i in range(num_layers) ]) self.positional_encoding = positional_encoding self.checkpointing_every = checkpointing_every
def __init__(self, in_features, out_features, bias=True, weight_norm=False, initializer=bare(XavierUniform), bias_initializer=bare(ConstantInitializer, val=0)): self.initializer = initializer self.bias_initializer = bias_initializer super().__init__(in_features, out_features, bias) self.weight_norm = weight_norm if weight_norm: nn.utils.weight_norm(self, name='weight')
def __init__(self, max_length, model_dim=Ref("exp_global.default_layer_dim"), batch_first=True, initializer: Initializer = bare(XavierUniform)): super().__init__(model_dim, batch_first) self.max_length = max_length self.pos_emb = Parameter(torch.zeros(max_length, model_dim)) self.initializer = initializer self.reset_parameters()
def __init__(self, vocab: Vocab = None, dim: int = Ref("exp_global.default_layer_dim"), initializer: Initializer = bare(XavierUniform), bias=True, bias_initializer=bare(ConstantInitializer, val=0), multiple: int = Ref("exp_global.multiple", 1), embedding: DenseWordEmbedding = None): super().__init__() self._shared = embedding is not None self._vocab = vocab self._embedding = embedding self._dim = dim self.multiple = multiple if self._vocab is not None: assert self._embedding is None and self._dim is not None size = len(self._vocab) if size % multiple != 0: size += multiple - (size % multiple) self.weights = Parameter(torch.empty(len(self._vocab), self._dim)) self.vocab_size = len(self._vocab) else: assert self._embedding is not None self.weights = self._embedding.weights if hasattr(self._embedding, "multiple"): self.multiple = self._embedding.multiple self.vocab_size = len(self._embedding.vocab) else: self.vocab_size = None if bias: self.bias = Parameter(torch.empty(self.weights.size(0))) else: self.register_parameter('bias', None) self.initializer = initializer self.bias_initializer = bias_initializer self.reset_parameters() self._register_load_state_dict_pre_hook(self._load_params)
def __init__(self, vocab: Vocab, dim: int = Ref("exp_global.default_layer_dim"), initializer=bare(NormalInitializer), dropout: float = Ref("exp_global.dropout", 0.0), multiple: int = Ref("exp_global.multiple", 1)): super().__init__() self.vocab = vocab self.dim = dim self.multiple = multiple self.initializer = initializer self.dropout = dropout self.size = len(self.vocab) if self.size % self.multiple != 0: self.size += self.multiple - (self.size % self.multiple) self.embeddings = Parameter(torch.empty(self.size, self.dim)) self.reset_parameters() self._register_load_state_dict_pre_hook(self._load_params)
def __init__(self, dataset: Dataset, metrics: Union[Evaluator, Sequence[Evaluator]], search_strategy: SearchStrategy = bare(BeamSearch, beam_size=5), model: AutoregressiveModel = Ref("model"), name="dev", report_dir=Ref("exp_global.report_dir"), result_path: Optional[str] = None, print_output=False, report_every=100): super().__init__(name, report_dir) self.dataset = dataset if not isinstance(metrics, Sequence): metrics = [metrics] self.metrics = metrics self.model = model self.search_strategy = search_strategy self.result_path = result_path self.print_output = print_output self.result_file = None self.report_every = report_every