def __init__(self, in_feat, out_feat, num_rels, regularizer="basis", num_bases=None, act_func="relu", dropout=0.0): super(RGINLayer, self).__init__() self.rgc_layer = RelGraphConv( in_feat=in_feat, out_feat=out_feat, num_rels=num_rels, regularizer=regularizer, num_bases=num_bases, activation=None, self_loop=True, dropout=0.0) self.mlp = nn.Sequential( nn.Linear(out_feat, out_feat), # nn.BatchNorm1d(out_feat), map_activation_str_to_layer(act_func), nn.Linear(out_feat, out_feat), map_activation_str_to_layer(act_func)) self.drop = nn.Dropout(dropout) # init if hasattr(self.rgc_layer, "weight") and self.rgc_layer.weight is not None: nn.init.normal_(self.rgc_layer.weight, 0.0, 1/(out_feat)**0.5) if hasattr(self.rgc_layer, "w_comp") and self.rgc_layer.w_comp is not None: nn.init.normal_(self.rgc_layer.w_comp, 0.0, 1/(out_feat)**0.5) if hasattr(self.rgc_layer, "loop_weight") and self.rgc_layer.loop_weight is not None: nn.init.normal_(self.rgc_layer.loop_weight, 0.0, 1/(out_feat)**0.5) if hasattr(self.rgc_layer, "h_bias") and self.rgc_layer.h_bias is not None: nn.init.zeros_(self.rgc_layer.h_bias) for m in self.mlp.modules(): if isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0.0, 1/(out_feat)**0.5) if hasattr(m, "bias") and m.bias is not None: nn.init.zeros_(m.bias) elif isinstance(m, nn.BatchNorm1d): nn.init.ones_(m.weight) nn.init.zeros_(m.bias)
def __init__(self, d_model, d_inner, dropout, act_func="relu", pre_lnorm=False): super(PositionwiseFF, self).__init__() self.d_model = d_model self.d_inner = d_inner self.dropout = dropout self.CoreNet = nn.Sequential(nn.Linear(d_model, d_inner), map_activation_str_to_layer(act_func), nn.Dropout(dropout), nn.Linear(d_inner, d_model), nn.Dropout(dropout)) self.layer_norm = nn.LayerNorm(d_model) self.pre_lnorm = pre_lnorm # init for m in self.CoreNet.modules(): if isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0.0, 1 / (d_model**0.5)) nn.init.zeros_(m.bias)
def create_net(self, name, input_dim, **kw): num_layers = kw.get("num_layers", 1) hidden_dim = kw.get("hidden_dim", 64) num_rels = kw.get("num_rels", 1) num_bases = kw.get("num_bases", 8) regularizer = kw.get("regularizer", "basis") act_func = kw.get("act_func", "relu") dropout = kw.get("dropout", 0.0) rgcns = nn.ModuleList() for i in range(num_layers): rgcns.add_module( "%s_rgc%d" % (name, i), RelGraphConv(in_feat=hidden_dim if i > 0 else input_dim, out_feat=hidden_dim, num_rels=num_rels, regularizer=regularizer, num_bases=num_bases, activation=map_activation_str_to_layer(act_func), self_loop=True, dropout=dropout)) for m in rgcns.modules(): if isinstance(m, RelGraphConv): if hasattr(m, "weight") and m.weight is not None: nn.init.normal_(m.weight, 0.0, 1 / (hidden_dim)**0.5) if hasattr(m, "w_comp") and m.w_comp is not None: nn.init.normal_(m.w_comp, 0.0, 1 / (hidden_dim)**0.5) if hasattr(m, "loop_weight") and m.loop_weight is not None: nn.init.normal_(m.loop_weight, 0.0, 1 / (hidden_dim)**0.5) if hasattr(m, "h_bias") and m.h_bias is not None: nn.init.zeros_(m.h_bias) return rgcns, hidden_dim
def __init__(self, pattern_dim, graph_dim, hidden_dim, act_func="relu", num_heads=4, recurrent_steps=1, dropout=0.0, dropatt=0.0): super(BaseAttnPredictNet, self).__init__() self.pattern_dim = pattern_dim self.grpah_dim = graph_dim self.hidden_dim = hidden_dim self.recurrent_steps = recurrent_steps self.act = map_activation_str_to_layer(act_func) self.drop = nn.Dropout(dropout) self.p_layer = nn.Linear(pattern_dim, hidden_dim) self.g_layer = nn.Linear(graph_dim, hidden_dim) self.p_attn = GatedMultiHeadAttn( query_dim=graph_dim, key_dim=pattern_dim, value_dim=pattern_dim, hidden_dim=hidden_dim, num_heads=num_heads, pre_lnorm=True, dropatt=dropatt, act_func="softmax") self.g_attn = GatedMultiHeadAttn( query_dim=graph_dim, key_dim=graph_dim, value_dim=graph_dim, hidden_dim=hidden_dim, num_heads=num_heads, pre_lnorm=True, dropatt=dropatt, act_func="softmax") self.pred_layer1 = nn.Linear(self.hidden_dim*4+4, self.hidden_dim) self.pred_layer2 = nn.Linear(self.hidden_dim+4, 1) # init for layer in [self.p_layer, self.g_layer, self.pred_layer1]: nn.init.normal_(layer.weight, 0.0, 1/(self.hidden_dim**0.5)) nn.init.zeros_(layer.bias) for layer in [self.pred_layer2]: nn.init.zeros_(layer.weight) nn.init.zeros_(layer.bias)
def create_net(self, name, input_dim, **kw): conv_kernel_sizes = kw.get("conv_kernel_sizes", (1, 2, 3)) conv_paddings = kw.get("conv_paddings", (-1, -1, -1)) conv_channels = kw.get("conv_channels", (64, 64, 64)) conv_strides = kw.get("conv_strides", (1, 1, 1)) pool_kernel_sizes = kw.get("pool_kernel_sizes", (2, 3, 4)) pool_strides = kw.get("pool_strides", (1, 1, 1)) pool_paddings = kw.get("pool_paddings", (-1, -1, -1)) act_func = kw.get("act_func", "relu") dropout = kw.get("dropout", 0.0) cnns = nn.ModuleList() for i, conv_kernel_size in enumerate(conv_kernel_sizes): conv_stride = conv_strides[i] conv_padding = conv_paddings[i] if conv_padding == -1: conv_padding = conv_kernel_size // 2 pool_kernel_size = pool_kernel_sizes[i] pool_padding = pool_paddings[i] pool_stride = pool_strides[i] if pool_padding == -1: pool_padding = pool_kernel_size // 2 cnn = nn.Sequential( OrderedDict([ ("conv", nn.Conv1d(conv_channels[i - 1] if i > 0 else input_dim, conv_channels[i], kernel_size=conv_kernel_size, stride=conv_stride, padding=conv_padding)), ("act", map_activation_str_to_layer(act_func)), ("pool", nn.MaxPool1d(kernel_size=pool_kernel_size, stride=pool_stride, padding=pool_padding)), # ("norm", nn.BatchNorm1d(conv_channels[i])), ("drop", nn.Dropout(dropout)) ])) cnns.add_module("%s_cnn%d" % (name, i), cnn) num_features = conv_channels[i] # init for m in cnns.modules(): if isinstance(m, nn.Conv1d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity=act_func) nn.init.zeros_(m.bias) elif isinstance(m, nn.BatchNorm1d): nn.init.ones_(m.weight) nn.init.zeros_(m.bias) return cnns, num_features
def __init__(self, input_dim, filters, output_dim, num_highway=1, activation="relu", projection_location="after_highway", layer_norm=False): super().__init__() assert projection_location in ["after_cnn", "after_highway"] self.input_dim = input_dim self.output_dim = output_dim self.projection_location = projection_location self.activation = map_activation_str_to_layer(activation) # Create the convolutions self.convs = nn.ModuleList() for i, (width, num) in enumerate(filters): conv = nn.Conv1d(in_channels=input_dim, out_channels=num, kernel_size=width, bias=True) self.convs.append(conv) # Create the highway layers num_filters = sum(num for _, num in filters) if projection_location == 'after_cnn': highway_dim = output_dim else: # highway_dim is the number of cnn filters highway_dim = num_filters self.highways = Highway(highway_dim, num_highway, activation=activation) # Projection layer: always num_filters -> output_dim self.proj = nn.Linear(num_filters, output_dim) # And add a layer norm if layer_norm: self.layer_norm = nn.LayerNorm(output_dim) else: self.layer_norm = None # init scale = 1 / num_filters**0.5 for layer in self.convs: nn.init.kaiming_normal_(layer.weight) nn.init.constant_(layer.bias, 0.0) nn.init.normal_(self.proj.weight, 0.0, scale) nn.init.constant_(self.proj.bias, 0.0)
def __init__(self, input_dim, num_layers=1, activation="relu"): super(Highway, self).__init__() self.input_dim = input_dim self.layers = nn.ModuleList( [nn.Linear(input_dim, input_dim * 2) for _ in range(num_layers)]) self.activation = map_activation_str_to_layer(activation) # init scale = 1 / input_dim**0.5 for layer in self.layers: nn.init.normal_(layer.weight, 0.0, scale) nn.init.constant_(layer.bias[:input_dim], 0.0) nn.init.constant_(layer.bias[input_dim:], 1.0)
def __init__(self, pattern_dim, graph_dim, hidden_dim, act_func="relu", dropout=0.0): super(BasePoolPredictNet, self).__init__() self.pattern_dim = pattern_dim self.graph_dim = graph_dim self.hidden_dim = hidden_dim self.act = map_activation_str_to_layer(act_func) self.drop = nn.Dropout(dropout) self.p_layer = nn.Linear(pattern_dim, hidden_dim) self.g_layer = nn.Linear(graph_dim, hidden_dim) self.pred_layer1 = nn.Linear(self.hidden_dim*4+4, self.hidden_dim) self.pred_layer2 = nn.Linear(self.hidden_dim+4, 1) # init for layer in [self.p_layer, self.g_layer, self.pred_layer1]: nn.init.normal_(layer.weight, 0.0, 1/(self.hidden_dim**0.5)) nn.init.zeros_(layer.bias) for layer in [self.pred_layer2]: nn.init.zeros_(layer.weight) nn.init.zeros_(layer.bias)
def __init__(self, query_dim, key_dim, value_dim, hidden_dim, num_heads, dropatt=0.0, act_func="softmax", add_zero_attn=False, pre_lnorm=False, post_lnorm=False): super(GatedMultiHeadAttn, self).__init__() assert hidden_dim%num_heads == 0 self.query_dim = query_dim self.key_dim = key_dim self.value_dim = value_dim self.hidden_dim = hidden_dim self.num_heads = num_heads self.dropatt = nn.Dropout(dropatt) head_dim = hidden_dim // num_heads self.q_net = nn.Linear(query_dim, hidden_dim, bias=False) self.k_net = nn.Linear(key_dim, hidden_dim, bias=False) self.v_net = nn.Linear(value_dim, hidden_dim, bias=False) self.o_net = nn.Linear(hidden_dim, query_dim, bias=False) self.g_net = nn.Linear(2*query_dim, query_dim, bias=True) self.act = map_activation_str_to_layer(act_func) self.add_zero_attn = add_zero_attn self.pre_lnorm = pre_lnorm self.post_lnorm = post_lnorm if pre_lnorm: self.q_layer_norm = nn.LayerNorm(query_dim) self.k_layer_norm = nn.LayerNorm(key_dim) self.v_layer_norm = nn.LayerNorm(value_dim) if post_lnorm: self.o_layer_norm = nn.LayerNorm(query_dim) # init scale = 1 / (head_dim ** 0.5) for m in [self.q_net, self.k_net, self.v_net, self.o_net]: nn.init.normal_(m.weight, 0.0, scale) # when new data comes, it prefers to output 1 so that the gate is 1 nn.init.normal_(self.g_net.weight, 0.0, scale) nn.init.ones_(self.g_net.bias)
def __init__(self, query_dim, key_dim, value_dim, hidden_dim, num_heads, dropatt=0.0, act_func="softmax", add_zero_attn=False, pre_lnorm=False, post_lnorm=False): super(MultiHeadAttn, self).__init__() assert hidden_dim%num_heads == 0 assert act_func in ["softmax", "sigmoid"] self.query_dim = query_dim self.key_dim = key_dim self.value_dim = value_dim self.hidden_dim = hidden_dim self.num_heads = num_heads self.dropatt = nn.Dropout(dropatt) head_dim = hidden_dim // num_heads self.q_net = nn.Linear(query_dim, hidden_dim, bias=False) self.k_net = nn.Linear(key_dim, hidden_dim, bias=False) self.v_net = nn.Linear(value_dim, hidden_dim, bias=False) self.o_net = nn.Linear(hidden_dim, query_dim, bias=False) self.act = map_activation_str_to_layer(act_func) self.add_zero_attn = add_zero_attn self.pre_lnorm = pre_lnorm self.post_lnorm = post_lnorm if pre_lnorm: self.q_layer_norm = nn.LayerNorm(query_dim) self.k_layer_norm = nn.LayerNorm(key_dim) self.v_layer_norm = nn.LayerNorm(value_dim) if post_lnorm: self.o_layer_norm = nn.LayerNorm(query_dim) # init scale = 1 / (head_dim ** 0.5) for m in [self.q_net, self.k_net, self.v_net, self.o_net]: nn.init.normal_(m.weight, 0.0, scale)
def __init__(self, pattern_dim, graph_dim, hidden_dim, act_func="relu", recurrent_steps=1, num_heads=4, mem_len=4, mem_init="mean", dropout=0.0, dropatt=0.0): super(DIAMNet, self).__init__() self.pattern_dim = pattern_dim self.graph_dim = graph_dim self.hidden_dim = hidden_dim self.mem_len = mem_len self.mem_init = mem_init self.recurrent_steps = recurrent_steps self.act = map_activation_str_to_layer(act_func) self.drop = nn.Dropout(dropout) self.p_layer = nn.Linear(pattern_dim, hidden_dim) self.g_layer = nn.Linear(graph_dim, hidden_dim) if mem_init.endswith("attn"): self.m_layer = MultiHeadAttn( query_dim=hidden_dim, key_dim=graph_dim, value_dim=graph_dim, hidden_dim=hidden_dim, num_heads=num_heads, dropatt=dropatt, act_func="softmax") elif mem_init.endswith("lstm"): self.m_layer = nn.LSTM(graph_dim, hidden_dim, batch_first=True) else: self.m_layer = self.g_layer self.p_attn = GatedMultiHeadAttn( query_dim=hidden_dim, key_dim=pattern_dim, value_dim=pattern_dim, hidden_dim=hidden_dim, num_heads=num_heads, pre_lnorm=True, dropatt=dropatt, act_func="softmax") self.g_attn = GatedMultiHeadAttn( query_dim=hidden_dim, key_dim=graph_dim, value_dim=graph_dim, hidden_dim=hidden_dim, num_heads=num_heads, pre_lnorm=True, dropatt=dropatt, act_func="softmax") self.m_attn = GatedMultiHeadAttn( query_dim=hidden_dim, key_dim=hidden_dim, value_dim=hidden_dim, hidden_dim=hidden_dim, num_heads=num_heads, pre_lnorm=True, dropatt=dropatt, act_func="softmax") self.pred_layer1 = nn.Linear(self.mem_len*self.hidden_dim+4, self.hidden_dim) self.pred_layer2 = nn.Linear(self.hidden_dim+4, 1) # init scale = 1/(self.hidden_dim**0.5) for layer in [self.p_layer, self.g_layer, self.pred_layer1]: nn.init.normal_(layer.weight, 0.0, scale) nn.init.zeros_(layer.bias) for layer in [self.pred_layer2]: nn.init.zeros_(layer.weight) nn.init.zeros_(layer.bias) if isinstance(self.m_layer, nn.LSTM): for layer_weights in self.m_layer._all_weights: for w in layer_weights: if "weight" in w: weight = getattr(self.m_layer, w) nn.init.orthogonal_(weight) elif "bias" in w: bias = getattr(self.m_layer, w) if bias is not None: nn.init.zeros_(bias) elif isinstance(self.m_layer, nn.Linear): nn.init.normal_(layer.weight, 0.0, scale) nn.init.zeros_(layer.bias)