예제 #1
0
        def __init__(self, config):
            super().__init__(config)

            if args.grad_ckpt:
                self.transformer.layer = ModuleList([
                    CkptXLNetLayer(layer) for layer in self.transformer.layer
                ])
    def __init__(self, input_size, hidden_size, pad_idx,
                 vocab_size):
        super().__init__()

        self.embedding = Embedding(
            num_embeddings=vocab_size,
            embedding_dim=input_size,
            padding_idx=pad_idx)

        self.dropout = Dropout(p=0.1)

        self.merge = Linear(
            in_features=hidden_size * 2,
            out_features=hidden_size,
            bias=False)

        # creating rnn layer as module list so locked
        # dropout can be applied between each layer
        # NOTE: currently not using weight drop, because
        # it is incompatible with apex
        self.rnn = ModuleList([
            GRU(input_size=input_size,
                hidden_size=hidden_size,
                bidirectional=True,
                batch_first=True)] + [
            GRU(input_size=hidden_size,
                hidden_size=hidden_size,
                batch_first=True)
            for _ in range(2)
        ])
예제 #3
0
    def __init__(self, config):
        super().__init__(config)

        # TODO remove temporary solution that we
        # are only using the first 4 layers of
        # XLNet for faster testing speed
        self.transformer.layer = ModuleList(
            [layer for layer in self.transformer.layer[:4]])

        self.lm_loss = Linear(config.d_model, config.n_token)

        self.apply(self.init_weights)
        self.tie_weights()
예제 #4
0
 def __init__(self, input_dim, hidden_dim=None, num_classes=0, dropout=0.5, config=None):
     super(SBMGNN, self).__init__()
     self.num_classes = num_classes
     self.dropout = dropout
     self.config = config
     self.hidden = [int(x) for x in hidden_dim]
     self.num_layers = len(self.hidden)
     self.h = GraphConvolution(in_features=input_dim,
                               out_features=self.hidden[0],
                               act=nn.LeakyReLU(negative_slope=0.2),
                               dropout=-1.)
     h_mid = []
     for i in range(self.num_layers-2):
         h_mid.append(GraphConvolution(in_features=self.hidden[i],
                                       out_features=self.hidden[i+1],
                                       act=nn.LeakyReLU(negative_slope=0.2),
                                       dropout=self.dropout))
     self.h_mid = ModuleList(h_mid)
     self.h1 = GraphConvolution(in_features=self.hidden[-2],
                                out_features=self.hidden[-1],
                                act=lambda x: x,
                                dropout=self.dropout)
     self.h2 = GraphConvolution(in_features=self.hidden[-2],
                                out_features=self.hidden[-1],
                                act=lambda x: x,
                                dropout=self.dropout)
     self.h3 = GraphConvolution(in_features=self.hidden[-2],
                                out_features=self.hidden[-1],
                                act=lambda x: x,
                                dropout=self.dropout)
     self.deep_decoder = nn.Sequential(nn.Linear(self.hidden[self.num_layers-1], config.model.g_hidden),
                                       nn.LeakyReLU(negative_slope=0.2),
                                       nn.Linear(config.model.g_hidden, config.model.g_hidden//2))
     self.mean = None
     self.logv = None
     self.pi_logit = None
     self.a = None
     self.beta_a = None
     self.b = None
     self.beta_b = None
     self.v = None
     self.x_hat = None
     self.logit_post = None
     self.log_prior = None
     self.z_discrete = None
     self.z_real = None
     self.y_sample = None
     self.reconstructions = None
     self.get_alpha_beta(config=self.config)
    def __init__(self, input_size, hidden_size, vocab_size):
        super().__init__()

        self.embedding = Embedding(
            num_embeddings=vocab_size,
            embedding_dim=input_size)

        self.dropout = Dropout(p=0.1)

        self.rnn = ModuleList([
            GRU(input_size=input_size,
                hidden_size=hidden_size,
                batch_first=True)] + [
            GRU(input_size=hidden_size,
                hidden_size=hidden_size,
                batch_first=True)
            for _ in range(2)
        ])

        self.attn = Attention(hidden_size=hidden_size)

        self.out_bias = Parameter(torch.zeros((vocab_size, )))
        self.out_weight = self.embedding.weight
def _get_clones(module: nn.Module, N: int) -> ModuleList:
    return ModuleList([copy.deepcopy(module) for i in range(N)])
예제 #7
0
def _get_clones(module, n):
    return ModuleList([copy.deepcopy(module) for i in range(n)])