Example #1
0
 def __init__(self, config):
     super().__init__()
     self.ln1 = nn.LayerNorm(config.n_embd)
     self.ln2 = nn.LayerNorm(config.n_embd)
     if config.additive:
         self.attn = attention.AdditiveSelfAttention(config)
     else:
         self.attn = attention.CausalSelfAttention(config)
     self.mlp = nn.Sequential(
         nn.Linear(config.n_embd, 4 * config.n_embd),
         nn.GELU(),
         nn.Linear(4 * config.n_embd, config.n_embd),
         nn.Dropout(config.resid_pdrop),
     )
Example #2
0
    def __init__(self, config, type):
        super().__init__()
        self.ln1 = nn.LayerNorm(config.n_embd)
        self.ln2 = nn.LayerNorm(config.n_embd)
        if config.additive:
            self.attn = attention.AdditiveSelfAttention(config)
        else:
            if type == "vanilla":
                self.attn = attention.CausalSelfAttention(config)
            else:
                self.attn = attention.SynthesizerAttention(config)

            print("Attention Block is initialized as {} type".format(type))
        self.mlp = nn.Sequential(
            nn.Linear(config.n_embd, 4 * config.n_embd),
            nn.GELU(),
            nn.Linear(4 * config.n_embd, config.n_embd),
            nn.Dropout(config.resid_pdrop),
        )
Example #3
0
    def __init__(self, config: GPTConfig):
        super().__init__()
        self.ln1 = nn.LayerNorm(config.n_embd)
        self.ln2 = nn.LayerNorm(config.n_embd)

        if config.attention_mode == attention.AttentionMode.additive:
            self.attn = attention.AdditiveSelfAttention(config)
        elif config.attention_mode == attention.AttentionMode.vanilla:
            self.attn = attention.CausalSelfAttention(config)
        elif config.attention_mode == attention.AttentionMode.synthesizer:
            self.attn = attention.SynthesizerAttention(config)
        elif config.attention_mode == attention.AttentionMode.dense_and_causual:
            self.attn = attention.DenseAndCausalAttention(config)
        else:
            raise RuntimeError(
                f"Unsupported attention mode {config.attention_mode}")
        self.mlp = nn.Sequential(
            nn.Linear(config.n_embd, 4 * config.n_embd),
            nn.GELU(),
            nn.Linear(4 * config.n_embd, config.n_embd),
            nn.Dropout(config.resid_pdrop),
        )