Ejemplo n.º 1
0
 def __init__(self, n_ctx, config, scale=False):
     super(BlockFP16, self).__init__(n_ctx, config, scale)
     nx = config.n_embd
     self.ln_1 = LayerNorm(nx, eps=config.layer_norm_epsilon)
     self.attn = AttentionFP16(nx, n_ctx, config, scale)
     self.ln_2 = LayerNorm(nx, eps=config.layer_norm_epsilon)
     self.mlp = MLP(4 * nx, config)
Ejemplo n.º 2
0
 def __init__(self, config):
     super(GPT2ModelFP16, self).__init__(config)
     self.wte = nn.Embedding(config.vocab_size, config.n_embd)
     self.wpe = nn.Embedding(config.n_positions, config.n_embd)
     block = BlockFP16(config.n_ctx, config, scale=True)
     self.h = nn.ModuleList(
         [copy.deepcopy(block) for _ in range(config.n_layer)])
     self.ln_f = LayerNorm(config.n_embd, eps=config.layer_norm_epsilon)
     self.apply(self.init_weights)