Ejemplo n.º 1
0
    def __init__(self,
                 d_model: int = 512,
                 nhead: int = 8,
                 num_encoder_layers: int = 6,
                 num_decoder_layers: int = 6,
                 dim_feedforward: int = 2048,
                 dropout: float = 0.1,
                 activation: str = 'relu',
                 custom_encoder: Optional[Any] = None,
                 custom_decoder: Optional[Any] = None) -> None:
        super(Transformer, self).__init__()

        if custom_encoder is not None:
            self.encoder = custom_encoder
        else:
            encoder_layer = TransformerEncoderLayer(d_model, nhead,
                                                    dim_feedforward, dropout,
                                                    activation)
            encoder_norm = LayerNorm(d_model)
            self.encoder = TransformerEncoder(encoder_layer,
                                              num_encoder_layers, encoder_norm)

        if custom_decoder is not None:
            self.decoder = custom_decoder
        else:
            decoder_layer = TransformerDecoderLayer(d_model, nhead,
                                                    dim_feedforward, dropout,
                                                    activation)
            decoder_norm = LayerNorm(d_model)
            self.decoder = TransformerDecoder(decoder_layer,
                                              num_decoder_layers, decoder_norm)

        self._reset_parameters()

        self.d_model = d_model
        self.nhead = nhead
Ejemplo n.º 2
0
    def __init__(
        self,
        d_model: int,
        nhead: int,
        d_hid: int,
        dropout=0.1,
        no_residual=False,
    ):
        super(Extractor, self).__init__()

        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
        self.cross_attn = MultiheadAttention(d_model, nhead, dropout=dropout)

        self.conv1 = Conv1d(d_model, d_hid, 9, padding=4)
        self.conv2 = Conv1d(d_hid, d_model, 1, padding=0)

        self.norm1 = LayerNorm(d_model)
        self.norm2 = LayerNorm(d_model)
        self.norm3 = LayerNorm(d_model)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)
        self.dropout3 = Dropout(dropout)

        self.no_residual = no_residual
Ejemplo n.º 3
0
    def __init__(self,
                 d_model=512,
                 nhead=8,
                 num_encoder_layers=6,
                 num_decoder_layers=6,
                 dim_feedforward=2048,
                 dropout=0.1,
                 activation="relu",
                 custom_encoder=None,
                 custom_decoder=None):
        super(Transformer, self).__init__()

        if custom_encoder is not None:
            self.encoder = custom_encoder
        else:
            encoder_layer = TransformerEncoderLayer(d_model, nhead,
                                                    dim_feedforward, dropout,
                                                    activation)
            encoder_norm = LayerNorm(d_model)
            self.encoder = TransformerEncoder(encoder_layer,
                                              num_encoder_layers, encoder_norm)

        if custom_decoder is not None:
            self.decoder = custom_decoder
        else:
            decoder_layer = TransformerDecoderLayer(d_model, nhead,
                                                    dim_feedforward, dropout,
                                                    activation)
            decoder_norm = LayerNorm(d_model)
            self.decoder = TransformerDecoder(decoder_layer,
                                              num_decoder_layers, decoder_norm)

        self._reset_parameters()

        self.d_model = d_model
        self.nhead = nhead
    def __init__(self, config):
        Module.__init__(self)
        self.mlp1_inc = config['n_inc'] + config['e_outc']
        self.mlp1_hs1 = config['node_model_mlp1_hidden_sizes'][0]
        self.mlp1_hs2 = config['node_model_mlp1_hidden_sizes'][1]
        self.mlp2_hs1 = config['node_model_mlp2_hidden_sizes'][0]

        self.dim_out = config['n_outc']
        self.g_inc = config['g_inc']
        self.node_mlp_1 = Seq(Linear(self.mlp1_inc, self.mlp1_hs1),
                              LayerNorm(self.mlp1_hs1), ReLU(),
                              Linear(self.mlp1_hs1, self.mlp1_hs2))

        self.mlp2_inc_uncond = config['n_inc'] + self.mlp1_hs2 + config['u_inc']
        self.mlp2_inc_cond = self.mlp2_inc_uncond + self.mlp1_hs2
    def __init__(self,
                 d_model,
                 nhead,
                 dim_feedforward=2048,
                 dropout=0.1,
                 attention_dropout=0.1,
                 drop_path_rate=0.1):
        super(TransformerEncoderLayer, self).__init__()
        self.pre_norm = LayerNorm(d_model)
        self.self_attn = Attention(dim=d_model,
                                   num_heads=nhead,
                                   attention_dropout=attention_dropout,
                                   projection_dropout=dropout)

        self.linear1 = Linear(d_model, dim_feedforward)
        self.dropout1 = Dropout(dropout)
        self.norm1 = LayerNorm(d_model)
        self.linear2 = Linear(dim_feedforward, d_model)
        self.dropout2 = Dropout(dropout)

        self.drop_path = DropPath(
            drop_path_rate) if drop_path_rate > 0 else Identity()

        self.activation = F.gelu
Ejemplo n.º 6
0
    def __init__(self, config: FSMTConfig):
        super().__init__()
        self.embed_dim = config.d_model

        self.self_attn = Attention(
            embed_dim=self.embed_dim,
            num_heads=config.decoder_attention_heads,
            dropout=config.attention_dropout,
        )
        self.dropout = config.dropout
        self.activation_fn = ACT2FN[config.activation_function]
        self.activation_dropout = config.activation_dropout

        self.self_attn_layer_norm = LayerNorm(self.embed_dim)
        self.encoder_attn = Attention(
            self.embed_dim,
            config.decoder_attention_heads,
            dropout=config.attention_dropout,
            encoder_decoder_attention=True,
        )
        self.encoder_attn_layer_norm = LayerNorm(self.embed_dim)
        self.fc1 = nn.Linear(self.embed_dim, config.decoder_ffn_dim)
        self.fc2 = nn.Linear(config.decoder_ffn_dim, self.embed_dim)
        self.final_layer_norm = LayerNorm(self.embed_dim)
Ejemplo n.º 7
0
    def __init__(self, input_size, hidden_size, bias=True, forget_bias=0):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.ih = Linear(input_size, 4 * hidden_size, bias=bias)
        self.hh = Linear(hidden_size, 4 * hidden_size, bias=bias)

        if bias:
            self.ih.bias.data.fill_(0)
            self.hh.bias.data.fill_(0)
            # forget bias init
            self.ih.bias.data[hidden_size:hidden_size * 2].fill_(forget_bias)
            self.hh.bias.data[hidden_size:hidden_size * 2].fill_(forget_bias)

        self.ln_cell = LayerNorm(hidden_size)
Ejemplo n.º 8
0
    def __init__(self,
                 d_model,
                 nhead,
                 dim_feedforward=2048,
                 dropout=0.1,
                 activation="relu"):
        super(TransformerDecoderLayer, self).__init__()
        self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout)
        self.multihead_attn = MultiheadAttention(d_model,
                                                 nhead,
                                                 dropout=dropout)
        # Implementation of Feedforward model
        self.linear1 = Linear(d_model, dim_feedforward)
        self.dropout = Dropout(dropout)
        self.linear2 = Linear(dim_feedforward, d_model)

        self.norm1 = LayerNorm(d_model)
        self.norm2 = LayerNorm(d_model)
        self.norm3 = LayerNorm(d_model)
        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)
        self.dropout3 = Dropout(dropout)

        self.activation = _get_activation_fn(activation)
Ejemplo n.º 9
0
 def __init__(self, dim_in, dim_out_local, dim_out_global, dim_local, dim_global,
              dim_hidden=20, dim_pre_aggr=20, n_iter=20, n_out_layers=5,
              global_flow=False, class_weight=torch.tensor([1.0, 1.0, 1.0, 1.0])):
     super(N2JNet, self).__init__()
     self.dim_in = dim_in
     self.dim_out_local = dim_out_local
     self.dim_out_global = dim_out_global
     self.dim_hidden = dim_hidden
     self.dim_local = dim_local
     self.dim_global = dim_global
     self.dim_pre_aggr = dim_pre_aggr
     self.n_iter = n_iter
     self.n_out_layers = n_out_layers
     self.global_flow = global_flow
     self.class_weight = class_weight
     # MLP for initially encoding local
     self.mlp_node_init = Seq(Lin(self.dim_in, self.dim_hidden),
                              ReLU(),
                              Lin(self.dim_hidden, self.dim_hidden),
                              ReLU(),
                              Lin(self.dim_hidden, self.dim_local),
                              LayerNorm(self.dim_local))
     # MLPs for encoding local and global
     meta_layers = ModuleList()
     for i in range(self.n_iter):
         node_model = NodeModel(self.dim_local, self.dim_global, self.dim_hidden)
         global_model = GlobalModel(self.dim_local, self.dim_global,
                                    self.dim_hidden, self.dim_pre_aggr)
         meta = CustomMetaLayer(node_model=node_model, global_model=global_model)
         meta_layers.append(meta)
     self.meta_layers = meta_layers
     # Networks for local and global output
     self.net_out_local = Seq(Lin(self.dim_local, self.dim_hidden),
                              ReLU(),
                              Lin(self.dim_hidden, self.dim_hidden),
                              ReLU(),
                              Lin(self.dim_hidden, self.dim_out_local*2))
     if self.global_flow:
         self.net_out_global = Flow(*[[
                                    MAF(self.dim_global, self.dim_out_global, hidden=dim_hidden),
                                    Perm(self.dim_global)][i%2] for i in \
                                    range(self.n_out_layers*2 + 1)])
     else:
         self.net_out_global = Seq(Lin(self.dim_global, self.dim_hidden),
                                   ReLU(),
                                   Lin(self.dim_hidden, self.dim_hidden),
                                   ReLU(),
                                   Lin(self.dim_hidden, self.dim_out_global*2))
Ejemplo n.º 10
0
 def __init__(self,
              num_skills,
              state_size,
              num_heads=2,
              dropout=0.2,
              infer=False):
     super(student_model, self).__init__()
     self.infer = infer
     self.num_skills = num_skills
     self.state_size = state_size
     # we use the (num_skills * 2 + 1) as key padding_index
     '''
     Embedding- drugi argument je maksimalna duljina tensora, prvi argument je broj tensora
         ako je num_embeddings velicina dictionariya onda bi mozda trebao biti velicine num_skills?
         
         (10,3) 10- broj razlicitih elemenata, 3-u koliko se dimenzija embeddaju elementi
         
         posto je 10 broj elemenata, najveci element moze biti 9, znaci num moze biti len(ex_id_converter ako indeksi pocinju od 0)
         state size je po defaultu 200, s tim bi se moglo igrati
     '''
     self.embedding = nn.Embedding(
         num_embeddings=num_skills *
         2,  #promijenjen s a *2+1 na +2, ovaj embedding sadrzi pitanja i appendane odgovore duljina je 2n
         embedding_dim=state_size
     )  #je li ispravno stavljati indekse pitanja i odgovore tocno netocno u isti embedding?
     # padding_idx=num_skills*2
     # self.position_embedding = PositionalEncoding(state_size)
     self.position_embedding = nn.Embedding(
         num_embeddings=opt.
         max_len,  #max len je najveci broj exercisea s kojim moze raditi, maknut je -1
         embedding_dim=state_size)
     # we use the (num_skills + 1) as query padding_index
     self.problem_embedding = nn.Embedding(
         num_embeddings=num_skills,  #maknut +1
         embedding_dim=state_size)
     # padding_idx=num_skills)
     self.multi_attn = MultiHeadedAttention(h=num_heads,
                                            d_model=state_size,
                                            dropout=dropout,
                                            infer=self.infer)
     self.feedforward1 = nn.Linear(in_features=state_size,
                                   out_features=state_size)
     self.feedforward2 = nn.Linear(in_features=state_size,
                                   out_features=state_size)
     self.pred_layer = nn.Linear(in_features=state_size,
                                 out_features=num_skills)
     self.dropout = nn.Dropout(dropout)
     self.layernorm = LayerNorm(state_size)
Ejemplo n.º 11
0
 def mlp(f_in, f_out):
     """
     This function returns a Multi-Layer Perceptron with ReLU non-linearities
     with num_layers layers and h hidden nodes in each layer, with f_in input
     features and f_out output features.
     """
     layers = []
     f1 = f_in
     for f2 in hidden_layer_sizes:
         layers.append(Linear(f1, f2))
         layers.append(ReLU())
         f1 = f2
     layers.append(Linear(f1, f_out))
     # layers.append(ReLU())
     layers.append(LayerNorm(f_out))
     return Sequential(*layers)
Ejemplo n.º 12
0
    def __init__(self, cfg):
        super().__init__()
        # Original BERT Embedding
        # self.tok_embed = nn.Embedding(cfg.vocab_size, cfg.hidden) # token embedding

        # factorized embedding
        self.tok_embed1 = nn.Embedding(cfg.vocab_size, cfg.embedding_size)
        self.tok_embed2 = nn.Linear(cfg.embedding_size, cfg.hidden_size)

        self.pos_embed = nn.Embedding(cfg.max_position_embeddings, cfg.hidden_size) # position embedding
        # self.seg_embed = nn.Embedding(cfg.n_segments, cfg.hidden) # segment(token type) embedding

        self.norm = LayerNorm(cfg.hidden_size)
        # self.drop = nn.Dropout(cfg.classifier_dropout_prob)
        
        self.pos = None
Ejemplo n.º 13
0
    def __init__(self, input_size, key_size, heads):

        super().__init__()

        self.input_size = input_size
        self.key_size = key_size
        self.heads = heads

        # bias设置为false 是为了防止padding为0的部分变为非0
        self.q_w = Linear(input_size, key_size * heads, bias=False)
        self.k_w = Linear(input_size, key_size * heads, bias=False)
        self.v_w = Linear(input_size, input_size * heads, bias=False)
        # 融合多头信息
        self.linear = Linear(input_size * heads, input_size)

        self.layer_norm = LayerNorm(input_size, eps=1e-6)
    def __init__(self, hidden_size, s_size, r_size, t_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.s_size = s_size
        self.r_size = r_size
        self.t_size = t_size
        self.b_size = 1

        write_size = s_size + r_size + t_size + self.b_size
        self.W_write = nn.Linear(hidden_size, 1 * write_size)

        # read
        self.W_read = nn.Linear(hidden_size, s_size + r_size * 3)
        self.ln_read = LayerNorm(t_size, elementwise_affine=False)

        self.reset_parameters()
Ejemplo n.º 15
0
    def define_weights_and_layers(self):
        gnn_layers = []

        use_rels = self.n_relations
        if self.inverse_edges and self.separate_relation_types_for_inverse:
            use_rels *= 2

        for layer in range(self.n_layers):
            gnn_layers.append(
                RGCNLayer(self.output_dim, self.output_dim, use_rels))

        gnn_layers = torch.nn.ModuleList(gnn_layers)
        self.gnn_layers = gnn_layers
        self.W_input = torch.nn.Sequential(
            Linear(self.input_dim, self.output_dim),
            LayerNorm(self.output_dim), ReLU())
Ejemplo n.º 16
0
 def __init__(self, c_in, num_nodes):
     super(SATT_3, self).__init__()
     self.conv1 = Conv2d(c_in * 12,
                         c_in,
                         kernel_size=(1, 1),
                         padding=(0, 0),
                         stride=(1, 1),
                         bias=False)
     self.conv2 = Conv2d(c_in * 12,
                         c_in,
                         kernel_size=(1, 1),
                         padding=(0, 0),
                         stride=(1, 1),
                         bias=False)
     self.bn = LayerNorm([num_nodes, num_nodes, 4])
     self.c_in = c_in
Ejemplo n.º 17
0
def test_layer_norm():
    bert = BertModel.from_pretrained("bert-base-cased").cuda().half()
    tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
    test_text = (
        "Hello. How are you? I am fine thank you and you? yes Good. "
        "hi hi hi hi hi hi hi hi hi hi hi hi hi"  # 32
    )

    tokens = tokenizer(
        [test_text] * 4,
        return_tensors="pt",
    )

    # [bsz, seq_len, d_model]
    embedding_output = (bert.embeddings(
        input_ids=tokens["input_ids"].cuda(),
        position_ids=None,
        token_type_ids=tokens["token_type_ids"].cuda(),
        inputs_embeds=None,
        past_key_values_length=0,
    ).cuda().half())

    fused_layernorm_layer = (MixedFusedLayerNorm(
        normalized_shape=embedding_output.size(-1)).cuda().half())

    torch_layernorm_layer = (LayerNorm(
        normalized_shape=embedding_output.size(-1)).cuda().half())

    fused_output = fused_layernorm_layer(embedding_output)
    torch_output = torch_layernorm_layer(embedding_output)
    test_result = (fused_output - torch_output).abs()

    while test_result.dim() != 1:
        test_result = test_result.mean(dim=-1)

    diff = test_result.mean(dim=-1)

    if diff <= 1e-3:
        print(f"\n[Success] test_layer_norm"
              f"\n > mean_difference={diff}"
              f"\n > fused_values={fused_output[-1][-1][:5].tolist()}"
              f"\n > torch_values={torch_output[-1][-1][:5].tolist()}")
    else:
        print(f"\n[Fail] test_layer_norm"
              f"\n > mean_difference={diff}, "
              f"\n > fused_values={fused_output[-1][-1][:5].tolist()}, "
              f"\n > torch_values={torch_output[-1][-1][:5].tolist()}")
Ejemplo n.º 18
0
    def __init__(
        self,
        cfg: WavLMConfig,
    ) -> None:
        super().__init__()
        logger.info(f"WavLM Config: {cfg.__dict__}")

        self.cfg = cfg
        feature_enc_layers = eval(cfg.conv_feature_layers)
        self.embed = feature_enc_layers[-1][0]

        self.feature_extractor = ConvFeatureExtractionModel(
            conv_layers=feature_enc_layers,
            dropout=0.0,
            mode=cfg.extractor_mode,
            conv_bias=cfg.conv_bias,
        )

        self.post_extract_proj = (nn.Linear(self.embed, cfg.encoder_embed_dim)
                                  if self.embed != cfg.encoder_embed_dim else
                                  None)

        self.mask_prob = cfg.mask_prob
        self.mask_selection = cfg.mask_selection
        self.mask_other = cfg.mask_other
        self.mask_length = cfg.mask_length
        self.no_mask_overlap = cfg.no_mask_overlap
        self.mask_min_space = cfg.mask_min_space

        self.mask_channel_prob = cfg.mask_channel_prob
        self.mask_channel_selection = cfg.mask_channel_selection
        self.mask_channel_other = cfg.mask_channel_other
        self.mask_channel_length = cfg.mask_channel_length
        self.no_mask_channel_overlap = cfg.no_mask_channel_overlap
        self.mask_channel_min_space = cfg.mask_channel_min_space

        self.dropout_input = nn.Dropout(cfg.dropout_input)
        self.dropout_features = nn.Dropout(cfg.dropout_features)

        self.feature_grad_mult = cfg.feature_grad_mult

        self.mask_emb = nn.Parameter(
            torch.FloatTensor(cfg.encoder_embed_dim).uniform_())

        self.encoder = TransformerEncoder(cfg)
        self.layer_norm = LayerNorm(self.embed)
Ejemplo n.º 19
0
    def __init__(self,in_channels, hidden_channels, out_channels, num_layers,dropout=0.5):
        super(MLP, self).__init__()
        self.node_encoder = Linear(in_channels, hidden_channels)
        self.layers = torch.nn.ModuleList()

        for i in range(1, num_layers+1):
            conv = Linear(hidden_channels, hidden_channels)
            norm = LayerNorm(hidden_channels, elementwise_affine=True)
            act = ReLU(inplace=True)

            layer = AdaGNNLayer(conv,norm,act,dropout=dropout, lin=True)
            self.layers.append(layer)
        
        self.lin = Linear(hidden_channels, out_channels)
        self.currenlayer = 1
        self.layers[0].unfix()
        self.num_layers = num_layers
Ejemplo n.º 20
0
 def __init__(self, config):
     super(BertCRFForAttr, self).__init__(config)
     self.bert = BertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.t_lstm = nn.LSTM(input_size=config.hidden_size,
                           hidden_size=config.hidden_size // 2,
                           batch_first=True,
                           bidirectional=True)
     self.a_lstm = nn.LSTM(input_size=config.hidden_size,
                           hidden_size=config.hidden_size // 2,
                           batch_first=True,
                           bidirectional=True)
     self.attention = CosAttention()
     self.ln = LayerNorm(config.hidden_size * 2)
     self.classifier = nn.Linear(config.hidden_size * 2, config.num_label)
     self.crf = CRF(num_tags=config.num_labels, batch_first=True)
     self.init_weights()
Ejemplo n.º 21
0
    def __init__(self,
                 n_in_channels,
                 n_out_channels,
                 n_blocks,
                 n_init_features,
                 growth_rate,
                 drop_rate,
                 kernel_sizes,
                 glu_act,
                 bt_f=None):
        super(DenseDeep1D, self).__init__()

        self.n_blocks = n_blocks
        self.features = torch.nn.Sequential(
            OrderedDict([('conv0',
                          Conv1d(n_in_channels,
                                 n_init_features,
                                 kernel_size=kernel_sizes['conv0'],
                                 padding_mode='zeros',
                                 padding=int(
                                     (kernel_sizes['conv0'] - 1) / 2))),
                         ('norm0', LayerNorm([n_init_features, 107])),
                         ('relu0', ReLU(inplace=True))]))

        for k_block in range(n_blocks):
            if bt_f is None:
                self.features.add_module(
                    'block_{}'.format(k_block),
                    _DenseConvBlock(n_init_features + k_block * growth_rate,
                                    growth_rate=growth_rate,
                                    drop_rate=drop_rate,
                                    kernel_size=kernel_sizes['blocks'],
                                    glu_act=glu_act))
            else:
                self.features.add_module(
                    'block_{}'.format(k_block),
                    _DenserConvBlock_bottleneck(
                        n_init_features + k_block * growth_rate,
                        growth_rate=growth_rate,
                        drop_rate=drop_rate,
                        kernel_size=kernel_sizes['blocks'],
                        bottle_factor=bt_f))

        self.act_final = ReLU(inplace=True)
        self.regression = Linear(n_init_features + n_blocks * growth_rate,
                                 n_out_channels)
Ejemplo n.º 22
0
    def __init__(self,
                 vocab_size,
                 num_classes,
                 embedding_dim,
                 nhead=1,
                 num_encoder_layers=2):
        super().__init__()
        d_model = embedding_dim
        dim_feedforward = 2 * d_model

        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        encoder_layer = TransformerEncoderLayer(d_model, nhead,
                                                dim_feedforward)
        encoder_norm = LayerNorm(d_model)
        self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers,
                                          encoder_norm)
        self.linear = nn.Linear(d_model, num_classes)
Ejemplo n.º 23
0
 def __init__(self, ):
     super(Model, self).__init__()
     self.hidden_size = model_config.hidden_size
     self.embedding = nn.Embedding(config.num_vocab, config.embed_dim)
     self.bilstm = nn.LSTM(input_size=config.embed_dim,
                           hidden_size=self.hidden_size,
                           batch_first=True,
                           num_layers=2,
                           dropout=model_config.dropout,
                           bidirectional=True)
     # self.dropout = SpatialDropout(drop_p)
     self.dropout = nn.Dropout(model_config.dropout)
     self.layer_norm = LayerNorm(self.hidden_size * 2)
     self.classifier = nn.Linear(self.hidden_size * 2, config.num_labels)
     self.crf = CRF(tagset_size=config.num_labels,
                    tag_dictionary=config.label2id,
                    is_bert=True)
Ejemplo n.º 24
0
 def __init__(self,
              in_channels,
              hidden_channels,
              out_channels,
              num_layers,
              gnn_type='GEN'):
     super(WeakGNN, self).__init__()
     self.node_encoder = Linear(in_channels, hidden_channels)
     self.edge_encoder = Linear(in_channels, hidden_channels)
     self.layers = torch.nn.ModuleList()
     self.gnn_type = gnn_type
     for i in range(1, num_layers + 1):
         if gnn_type == 'GEN':
             conv = GENConv(hidden_channels,
                            hidden_channels,
                            aggr='softmax',
                            t=1.0,
                            learn_t=True,
                            num_layers=1,
                            norm='layer')
         elif gnn_type == 'MLP':
             conv = torch.nn.Linear(hidden_channels, hidden_channels)
         elif gnn_type == 'GCN':
             conv = GCNConv(hidden_channels, hidden_channels)
         elif gnn_type == 'SAGE':
             conv = SAGEConv(hidden_channels, hidden_channels)
         elif gnn_type == 'GAT':
             conv = GATConv(hidden_channels, hidden_channels)
         norm = LayerNorm(hidden_channels, elementwise_affine=True)
         act = ReLU(inplace=True)
         if gnn_type == 'MLP':
             layer = AdaGNNLayer(conv,
                                 norm,
                                 act,
                                 dropout=0.1,
                                 ckpt_grad=False,
                                 lin=True)
         else:
             layer = AdaGNNLayer(conv,
                                 norm,
                                 act,
                                 dropout=0.1,
                                 ckpt_grad=False)
         self.layers.append(layer)
     self.lin = Linear(hidden_channels, out_channels)
Ejemplo n.º 25
0
    def __init__(self,
                 d_hid,
                 d_ff,
                 relu_dropout=0.1,
                 residual_dropout=0.1,
                 leaky_relu_slope=0.1):
        super(PositionwiseFeedForward, self).__init__()
        self.w_1 = nn.Linear(d_hid, d_ff)
        self.w_2 = nn.Linear(d_ff, d_hid)

        self.layer_norm = LayerNorm(d_hid, eps=1e-6)
        # The t2t code on github uses relu dropout, even though the transformer
        # paper describes residual dropout only. We implement relu dropout
        # because we always have the option to set it to zero.
        self.relu_dropout = FeatureDropout2(relu_dropout)
        self.residual_dropout = FeatureDropout2(residual_dropout)
        self.relu = nn.ReLU() if leaky_relu_slope == 0.0 else nn.LeakyReLU(
            leaky_relu_slope)
Ejemplo n.º 26
0
    def __init__(self,
                 cell_class,
                 input_size: int,
                 hidden_size: int,
                 input_keep_prob: float = 1.0,
                 recurrent_keep_prob: float = 1.0,
                 layer_norm=False):
        super(UniDirLSTMLayer, self).__init__()
        self.forward_layer = DynamicRNN(cell_class(input_size, hidden_size),
                                        input_keep_prob,
                                        recurrent_keep_prob,
                                        go_forward=True)
        self.use_layer_norm = layer_norm

        if layer_norm:
            self.layer_norm = LayerNorm(hidden_size)
        else:
            self.layer_norm = Identity()
Ejemplo n.º 27
0
    def __init__(self, hidden_channels, num_layers):
        super(DeeperGCN, self).__init__()

        self.node_encoder = Linear(data.x.size(-1), hidden_channels)
        self.edge_encoder = Linear(data.edge_attr.size(-1), hidden_channels)

        self.layers = torch.nn.ModuleList()
        for i in range(1, num_layers + 1):
            conv = GENConv(hidden_channels, hidden_channels, aggr='stat',
                           t=1.0, learn_t=True, num_layers=2, norm='layer', msg_norm=True)
            norm = LayerNorm(hidden_channels, elementwise_affine=True)
            act = ReLU(inplace=True)

            layer = DeepGCNLayer(conv, norm, act, block='res+', dropout=0.1,
                                 ckpt_grad=i % 3)
            self.layers.append(layer)

        self.lin = Linear(hidden_channels, data.y.size(-1))
Ejemplo n.º 28
0
    def __init__(self,
                 in_features,
                 hidden_dim,
                 num_heads=1,
                 dropout=0.2,
                 edge_encoding=EDGE_ENCODING_TYPE.RELATIVE_POSITION):
        super().__init__()

        self.attention = MultiHeadAttention(in_features=in_features,
                                            hidden_dim=hidden_dim,
                                            num_heads=num_heads,
                                            dropout=dropout,
                                            edge_encoding=edge_encoding)
        self.feed_forward = PositionwiseFeedForward(in_features=in_features,
                                                    hidden_dim=hidden_dim,
                                                    dropout=dropout)
        self.dropout = Dropout()
        self.layer_norm = LayerNorm(in_features)
Ejemplo n.º 29
0
    def __init__(self, config: ModelConfig, data_config: DataConfig, encoder_embeddings):
        super().__init__()

        self.embeddings = Embedding(num_embeddings=data_config.output_translation_vocabulary_sizes[0][0],
                                    embedding_dim=config.encoder_output_size,
                                    padding_idx=pad_token_index)

        self.positional_encoding = PositionalEncoding(config.encoder_output_size)

        if config.decoder_translation_scale_embeddings:
            self.embeddings_scale = math.sqrt(float(config.encoder_output_size))
        else:
            self.embeddings_scale = None

        self.dropout = Dropout(config.decoder_translation_transformer_dropout)

        if config.decoder_translation_share_encoder_embeddings:
            assert(self.embeddings.weight.shape == encoder_embeddings.get_lut_embeddings().weight.shape)
            self.embeddings.weight = encoder_embeddings.get_lut_embeddings().weight

        self.transformer_layers = ModuleList([TransformerDecoderLayer(d_model=config.encoder_output_size,
                                                                      heads=config.decoder_translation_transformer_heads,
                                                                      d_ff=config.decoder_translation_transformer_hidden_size,
                                                                      dropout=config.decoder_translation_transformer_dropout,
                                                                      attention_dropout=config.decoder_translation_transformer_dropout)
                                              for _ in range(config.decoder_translation_transformer_layers)])

        self.layer_norm = LayerNorm(config.encoder_output_size, eps=1e-6)

        self.linear: Linear = Linear(in_features=config.encoder_output_size, out_features=data_config.output_translation_vocabulary_sizes[0][0])

        if config.decoder_translation_share_embeddings:
            self.linear.weight = self.embeddings.weight

        self.linear_features = None
        if data_config.output_translation_features > 1:
            self.linear_features = ModuleList([Linear(in_features=config.encoder_output_size,
                                                      out_features=data_config.output_translation_vocabulary_sizes[0][i])
                                               for i in range(1, data_config.output_translation_features)])

        self.max_seq_out_len = 150
        self.beam_size = 1
        self.state = {}
Ejemplo n.º 30
0
 def __init__(self, args):
     super(BiLSTMForNer, self).__init__()
     self.embedding_size = args.embedding_size
     self.model_type = args.model_type
     self.embedding = nn.Embedding(args.vocab_size, args.embedding_size)
     self.bilstm = nn.LSTM(input_size=args.embedding_size,
                           hidden_size=args.hidden_size,
                           num_layers=2,
                           batch_first=True,
                           dropout=args.drop_p,
                           bidirectional=True)
     self.dropout = SpatialDropout(args.drop_p)
     self.layer_norm = LayerNorm(args.hidden_size * 2)
     self.classifier = nn.Linear(args.hidden_size * 2, args.num_labels)
     self.use_crf = args.use_crf
     self.loss_type = args.loss_type
     self.num_labels = args.num_labels
     if args.use_crf:
         self.crf = CRF(num_tags=args.num_labels, batch_first=True)