Ejemplo n.º 1
0
 def __init__(self, in_feat, out_feat, num_rels, regularizer="basis", num_bases=None, act_func="relu", dropout=0.0):
     super(RGINLayer, self).__init__()
     self.rgc_layer = RelGraphConv(
         in_feat=in_feat, out_feat=out_feat, num_rels=num_rels,
         regularizer=regularizer, num_bases=num_bases,
         activation=None, self_loop=True, dropout=0.0)
     self.mlp = nn.Sequential(
         nn.Linear(out_feat, out_feat),
         # nn.BatchNorm1d(out_feat),
         map_activation_str_to_layer(act_func), 
         nn.Linear(out_feat, out_feat),
         map_activation_str_to_layer(act_func))
     self.drop = nn.Dropout(dropout)
     
     # init
     if hasattr(self.rgc_layer, "weight") and self.rgc_layer.weight is not None:
         nn.init.normal_(self.rgc_layer.weight, 0.0, 1/(out_feat)**0.5)
     if hasattr(self.rgc_layer, "w_comp") and self.rgc_layer.w_comp is not None:
         nn.init.normal_(self.rgc_layer.w_comp, 0.0, 1/(out_feat)**0.5)
     if hasattr(self.rgc_layer, "loop_weight") and self.rgc_layer.loop_weight is not None:
         nn.init.normal_(self.rgc_layer.loop_weight, 0.0, 1/(out_feat)**0.5)
     if hasattr(self.rgc_layer, "h_bias") and self.rgc_layer.h_bias is not None:
         nn.init.zeros_(self.rgc_layer.h_bias)
     for m in self.mlp.modules():
         if isinstance(m, nn.Linear):
             nn.init.normal_(m.weight, 0.0, 1/(out_feat)**0.5)
             if hasattr(m, "bias") and m.bias is not None:
                 nn.init.zeros_(m.bias)
         elif isinstance(m, nn.BatchNorm1d):
             nn.init.ones_(m.weight)
             nn.init.zeros_(m.bias)
Ejemplo n.º 2
0
    def __init__(self,
                 d_model,
                 d_inner,
                 dropout,
                 act_func="relu",
                 pre_lnorm=False):
        super(PositionwiseFF, self).__init__()

        self.d_model = d_model
        self.d_inner = d_inner
        self.dropout = dropout

        self.CoreNet = nn.Sequential(nn.Linear(d_model, d_inner),
                                     map_activation_str_to_layer(act_func),
                                     nn.Dropout(dropout),
                                     nn.Linear(d_inner, d_model),
                                     nn.Dropout(dropout))

        self.layer_norm = nn.LayerNorm(d_model)
        self.pre_lnorm = pre_lnorm

        # init
        for m in self.CoreNet.modules():
            if isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0.0, 1 / (d_model**0.5))
                nn.init.zeros_(m.bias)
Ejemplo n.º 3
0
    def create_net(self, name, input_dim, **kw):
        num_layers = kw.get("num_layers", 1)
        hidden_dim = kw.get("hidden_dim", 64)
        num_rels = kw.get("num_rels", 1)
        num_bases = kw.get("num_bases", 8)
        regularizer = kw.get("regularizer", "basis")
        act_func = kw.get("act_func", "relu")
        dropout = kw.get("dropout", 0.0)

        rgcns = nn.ModuleList()
        for i in range(num_layers):
            rgcns.add_module(
                "%s_rgc%d" % (name, i),
                RelGraphConv(in_feat=hidden_dim if i > 0 else input_dim,
                             out_feat=hidden_dim,
                             num_rels=num_rels,
                             regularizer=regularizer,
                             num_bases=num_bases,
                             activation=map_activation_str_to_layer(act_func),
                             self_loop=True,
                             dropout=dropout))

        for m in rgcns.modules():
            if isinstance(m, RelGraphConv):
                if hasattr(m, "weight") and m.weight is not None:
                    nn.init.normal_(m.weight, 0.0, 1 / (hidden_dim)**0.5)
                if hasattr(m, "w_comp") and m.w_comp is not None:
                    nn.init.normal_(m.w_comp, 0.0, 1 / (hidden_dim)**0.5)
                if hasattr(m, "loop_weight") and m.loop_weight is not None:
                    nn.init.normal_(m.loop_weight, 0.0, 1 / (hidden_dim)**0.5)
                if hasattr(m, "h_bias") and m.h_bias is not None:
                    nn.init.zeros_(m.h_bias)

        return rgcns, hidden_dim
Ejemplo n.º 4
0
    def __init__(self, pattern_dim, graph_dim, hidden_dim, act_func="relu",
        num_heads=4, recurrent_steps=1, dropout=0.0, dropatt=0.0):
        super(BaseAttnPredictNet, self).__init__()
        self.pattern_dim = pattern_dim
        self.grpah_dim = graph_dim
        self.hidden_dim = hidden_dim
        self.recurrent_steps = recurrent_steps

        self.act = map_activation_str_to_layer(act_func)
        self.drop = nn.Dropout(dropout)
        self.p_layer = nn.Linear(pattern_dim, hidden_dim)
        self.g_layer = nn.Linear(graph_dim, hidden_dim)

        self.p_attn = GatedMultiHeadAttn(
            query_dim=graph_dim, key_dim=pattern_dim, value_dim=pattern_dim,
            hidden_dim=hidden_dim, num_heads=num_heads,
            pre_lnorm=True,
            dropatt=dropatt, act_func="softmax")
        self.g_attn = GatedMultiHeadAttn(
            query_dim=graph_dim, key_dim=graph_dim, value_dim=graph_dim,
            hidden_dim=hidden_dim, num_heads=num_heads,
            pre_lnorm=True,
            dropatt=dropatt, act_func="softmax")

        self.pred_layer1 = nn.Linear(self.hidden_dim*4+4, self.hidden_dim)
        self.pred_layer2 = nn.Linear(self.hidden_dim+4, 1)

        # init
        for layer in [self.p_layer, self.g_layer, self.pred_layer1]:
            nn.init.normal_(layer.weight, 0.0, 1/(self.hidden_dim**0.5))
            nn.init.zeros_(layer.bias)
        for layer in [self.pred_layer2]:
            nn.init.zeros_(layer.weight)
            nn.init.zeros_(layer.bias)
Ejemplo n.º 5
0
    def create_net(self, name, input_dim, **kw):
        conv_kernel_sizes = kw.get("conv_kernel_sizes", (1, 2, 3))
        conv_paddings = kw.get("conv_paddings", (-1, -1, -1))
        conv_channels = kw.get("conv_channels", (64, 64, 64))
        conv_strides = kw.get("conv_strides", (1, 1, 1))
        pool_kernel_sizes = kw.get("pool_kernel_sizes", (2, 3, 4))
        pool_strides = kw.get("pool_strides", (1, 1, 1))
        pool_paddings = kw.get("pool_paddings", (-1, -1, -1))
        act_func = kw.get("act_func", "relu")
        dropout = kw.get("dropout", 0.0)

        cnns = nn.ModuleList()
        for i, conv_kernel_size in enumerate(conv_kernel_sizes):
            conv_stride = conv_strides[i]
            conv_padding = conv_paddings[i]
            if conv_padding == -1:
                conv_padding = conv_kernel_size // 2

            pool_kernel_size = pool_kernel_sizes[i]
            pool_padding = pool_paddings[i]
            pool_stride = pool_strides[i]
            if pool_padding == -1:
                pool_padding = pool_kernel_size // 2

            cnn = nn.Sequential(
                OrderedDict([
                    ("conv",
                     nn.Conv1d(conv_channels[i - 1] if i > 0 else input_dim,
                               conv_channels[i],
                               kernel_size=conv_kernel_size,
                               stride=conv_stride,
                               padding=conv_padding)),
                    ("act", map_activation_str_to_layer(act_func)),
                    ("pool",
                     nn.MaxPool1d(kernel_size=pool_kernel_size,
                                  stride=pool_stride,
                                  padding=pool_padding)),
                    # ("norm", nn.BatchNorm1d(conv_channels[i])),
                    ("drop", nn.Dropout(dropout))
                ]))
            cnns.add_module("%s_cnn%d" % (name, i), cnn)
            num_features = conv_channels[i]

        # init
        for m in cnns.modules():
            if isinstance(m, nn.Conv1d):
                nn.init.kaiming_normal_(m.weight,
                                        mode='fan_out',
                                        nonlinearity=act_func)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm1d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
        return cnns, num_features
Ejemplo n.º 6
0
    def __init__(self,
                 input_dim,
                 filters,
                 output_dim,
                 num_highway=1,
                 activation="relu",
                 projection_location="after_highway",
                 layer_norm=False):
        super().__init__()

        assert projection_location in ["after_cnn", "after_highway"]

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.projection_location = projection_location

        self.activation = map_activation_str_to_layer(activation)
        # Create the convolutions
        self.convs = nn.ModuleList()
        for i, (width, num) in enumerate(filters):
            conv = nn.Conv1d(in_channels=input_dim,
                             out_channels=num,
                             kernel_size=width,
                             bias=True)
            self.convs.append(conv)

        # Create the highway layers
        num_filters = sum(num for _, num in filters)
        if projection_location == 'after_cnn':
            highway_dim = output_dim
        else:
            # highway_dim is the number of cnn filters
            highway_dim = num_filters
        self.highways = Highway(highway_dim,
                                num_highway,
                                activation=activation)

        # Projection layer: always num_filters -> output_dim
        self.proj = nn.Linear(num_filters, output_dim)

        # And add a layer norm
        if layer_norm:
            self.layer_norm = nn.LayerNorm(output_dim)
        else:
            self.layer_norm = None

        # init
        scale = 1 / num_filters**0.5
        for layer in self.convs:
            nn.init.kaiming_normal_(layer.weight)
            nn.init.constant_(layer.bias, 0.0)
        nn.init.normal_(self.proj.weight, 0.0, scale)
        nn.init.constant_(self.proj.bias, 0.0)
Ejemplo n.º 7
0
    def __init__(self, input_dim, num_layers=1, activation="relu"):
        super(Highway, self).__init__()
        self.input_dim = input_dim
        self.layers = nn.ModuleList(
            [nn.Linear(input_dim, input_dim * 2) for _ in range(num_layers)])
        self.activation = map_activation_str_to_layer(activation)

        # init
        scale = 1 / input_dim**0.5
        for layer in self.layers:
            nn.init.normal_(layer.weight, 0.0, scale)
            nn.init.constant_(layer.bias[:input_dim], 0.0)
            nn.init.constant_(layer.bias[input_dim:], 1.0)
Ejemplo n.º 8
0
    def __init__(self, pattern_dim, graph_dim, hidden_dim, act_func="relu", dropout=0.0):
        super(BasePoolPredictNet, self).__init__()
        self.pattern_dim = pattern_dim
        self.graph_dim = graph_dim
        self.hidden_dim = hidden_dim

        self.act = map_activation_str_to_layer(act_func)
        self.drop = nn.Dropout(dropout)
        self.p_layer = nn.Linear(pattern_dim, hidden_dim)
        self.g_layer = nn.Linear(graph_dim, hidden_dim)

        self.pred_layer1 = nn.Linear(self.hidden_dim*4+4, self.hidden_dim)
        self.pred_layer2 = nn.Linear(self.hidden_dim+4, 1)

        # init
        for layer in [self.p_layer, self.g_layer, self.pred_layer1]:
            nn.init.normal_(layer.weight, 0.0, 1/(self.hidden_dim**0.5))
            nn.init.zeros_(layer.bias)
        for layer in [self.pred_layer2]:
            nn.init.zeros_(layer.weight)
            nn.init.zeros_(layer.bias)
Ejemplo n.º 9
0
    def __init__(self, query_dim, key_dim, value_dim, hidden_dim, num_heads,
            dropatt=0.0, act_func="softmax", add_zero_attn=False,
            pre_lnorm=False, post_lnorm=False):
        super(GatedMultiHeadAttn, self).__init__()
        assert hidden_dim%num_heads == 0

        self.query_dim = query_dim
        self.key_dim = key_dim
        self.value_dim = value_dim
        self.hidden_dim = hidden_dim
        self.num_heads = num_heads
        self.dropatt = nn.Dropout(dropatt)

        head_dim = hidden_dim // num_heads

        self.q_net = nn.Linear(query_dim, hidden_dim, bias=False)
        self.k_net = nn.Linear(key_dim, hidden_dim, bias=False)
        self.v_net = nn.Linear(value_dim, hidden_dim, bias=False)
        self.o_net = nn.Linear(hidden_dim, query_dim, bias=False)
        self.g_net = nn.Linear(2*query_dim, query_dim, bias=True)

        self.act = map_activation_str_to_layer(act_func)
        self.add_zero_attn = add_zero_attn
        self.pre_lnorm = pre_lnorm
        self.post_lnorm = post_lnorm

        if pre_lnorm:
            self.q_layer_norm = nn.LayerNorm(query_dim)
            self.k_layer_norm = nn.LayerNorm(key_dim)
            self.v_layer_norm = nn.LayerNorm(value_dim)
        if post_lnorm:
            self.o_layer_norm = nn.LayerNorm(query_dim)
        
        # init
        scale = 1 / (head_dim ** 0.5)
        for m in [self.q_net, self.k_net, self.v_net, self.o_net]:
            nn.init.normal_(m.weight, 0.0, scale)
        # when new data comes, it prefers to output 1 so that the gate is 1
        nn.init.normal_(self.g_net.weight, 0.0, scale)
        nn.init.ones_(self.g_net.bias)
Ejemplo n.º 10
0
    def __init__(self, query_dim, key_dim, value_dim, hidden_dim, num_heads,
            dropatt=0.0, act_func="softmax", add_zero_attn=False,
            pre_lnorm=False, post_lnorm=False):
        super(MultiHeadAttn, self).__init__()
        assert hidden_dim%num_heads == 0
        assert act_func in ["softmax", "sigmoid"]

        self.query_dim = query_dim
        self.key_dim = key_dim
        self.value_dim = value_dim
        self.hidden_dim = hidden_dim
        self.num_heads = num_heads
        self.dropatt = nn.Dropout(dropatt)

        head_dim = hidden_dim // num_heads

        self.q_net = nn.Linear(query_dim, hidden_dim, bias=False)
        self.k_net = nn.Linear(key_dim, hidden_dim, bias=False)
        self.v_net = nn.Linear(value_dim, hidden_dim, bias=False)
        self.o_net = nn.Linear(hidden_dim, query_dim, bias=False)

        self.act = map_activation_str_to_layer(act_func)
        self.add_zero_attn = add_zero_attn
        self.pre_lnorm = pre_lnorm
        self.post_lnorm = post_lnorm

        if pre_lnorm:
            self.q_layer_norm = nn.LayerNorm(query_dim)
            self.k_layer_norm = nn.LayerNorm(key_dim)
            self.v_layer_norm = nn.LayerNorm(value_dim)
        if post_lnorm:
            self.o_layer_norm = nn.LayerNorm(query_dim)
        
        # init
        scale = 1 / (head_dim ** 0.5)
        for m in [self.q_net, self.k_net, self.v_net, self.o_net]:
            nn.init.normal_(m.weight, 0.0, scale)
Ejemplo n.º 11
0
    def __init__(self, pattern_dim, graph_dim, hidden_dim, act_func="relu",
        recurrent_steps=1, num_heads=4, mem_len=4, mem_init="mean",
        dropout=0.0, dropatt=0.0):
        super(DIAMNet, self).__init__()
        self.pattern_dim = pattern_dim
        self.graph_dim = graph_dim
        self.hidden_dim = hidden_dim
        self.mem_len = mem_len
        self.mem_init = mem_init
        self.recurrent_steps = recurrent_steps

        self.act = map_activation_str_to_layer(act_func)
        self.drop = nn.Dropout(dropout)
        self.p_layer = nn.Linear(pattern_dim, hidden_dim)
        self.g_layer = nn.Linear(graph_dim, hidden_dim)
        if mem_init.endswith("attn"):
            self.m_layer = MultiHeadAttn(
                query_dim=hidden_dim, key_dim=graph_dim, value_dim=graph_dim,
                hidden_dim=hidden_dim, num_heads=num_heads,
                dropatt=dropatt, act_func="softmax")
        elif mem_init.endswith("lstm"):
            self.m_layer = nn.LSTM(graph_dim, hidden_dim, batch_first=True)
        else:
            self.m_layer = self.g_layer
        self.p_attn = GatedMultiHeadAttn(
            query_dim=hidden_dim, key_dim=pattern_dim, value_dim=pattern_dim,
            hidden_dim=hidden_dim, num_heads=num_heads,
            pre_lnorm=True,
            dropatt=dropatt, act_func="softmax")
        self.g_attn = GatedMultiHeadAttn(
            query_dim=hidden_dim, key_dim=graph_dim, value_dim=graph_dim,
            hidden_dim=hidden_dim, num_heads=num_heads,
            pre_lnorm=True,
            dropatt=dropatt, act_func="softmax")
        self.m_attn = GatedMultiHeadAttn(
            query_dim=hidden_dim, key_dim=hidden_dim, value_dim=hidden_dim,
            hidden_dim=hidden_dim, num_heads=num_heads,
            pre_lnorm=True,
            dropatt=dropatt, act_func="softmax")

        self.pred_layer1 = nn.Linear(self.mem_len*self.hidden_dim+4, self.hidden_dim)
        self.pred_layer2 = nn.Linear(self.hidden_dim+4, 1)

        # init
        scale = 1/(self.hidden_dim**0.5)
        for layer in [self.p_layer, self.g_layer, self.pred_layer1]:
            nn.init.normal_(layer.weight, 0.0, scale)
            nn.init.zeros_(layer.bias)
        for layer in [self.pred_layer2]:
            nn.init.zeros_(layer.weight)
            nn.init.zeros_(layer.bias)

        if isinstance(self.m_layer, nn.LSTM):
            for layer_weights in self.m_layer._all_weights:
                for w in layer_weights:
                    if "weight" in w:
                        weight = getattr(self.m_layer, w)
                        nn.init.orthogonal_(weight)
                    elif "bias" in w:
                        bias = getattr(self.m_layer, w)
                        if bias is not None:
                            nn.init.zeros_(bias)
        elif isinstance(self.m_layer, nn.Linear):
            nn.init.normal_(layer.weight, 0.0, scale)
            nn.init.zeros_(layer.bias)