def __init__(self, args): super(LWSNet, self).__init__() self.maxdisplist = args.maxdisplist self.layers_3d = args.layers_3d self.channels_3d = args.channels_3d self.growth_rate = args.growth_rate self.feature_extraction = feature_extraction() self.volume_postprocess = [] for i in range(3): net3d = post_3dconvs(self.layers_3d, self.channels_3d*self.growth_rate[i]) self.volume_postprocess.append(net3d) self.volume_postprocess = nn.LayerList(self.volume_postprocess) #3D CNN in Stage 1 to Stage 3 self.refinement1_left = refinement1(in_channels=3, out_channels=32) #input: left image output: left features self.refinement1_disp = refinement1(in_channels=1, out_channels=32) #input: disparity stage 3 output: disparity features self.refinement2 = refinement2(in_channels=64, out_channels=32)
def __init__(self, input_size, num_class, num_layers=1, feat_drop=0.6, attn_drop=0.6, num_heads=8, hidden_size=8, **kwargs): super(GAT, self).__init__() self.num_class = num_class self.num_layers = num_layers self.feat_drop = feat_drop self.attn_drop = attn_drop self.num_heads = num_heads self.hidden_size = hidden_size self.gats = nn.LayerList() for i in range(self.num_layers): if i == 0: self.gats.append( pgl.nn.GATConv(input_size, self.hidden_size, self.feat_drop, self.attn_drop, self.num_heads, activation='elu')) elif i == (self.num_layers - 1): self.gats.append( pgl.nn.GATConv(self.num_heads * self.hidden_size, self.num_class, self.feat_drop, self.attn_drop, 1, concat=False, activation=None)) else: self.gats.append( pgl.nn.GATConv(self.num_heads * self.hidden_size, self.hidden_size, self.feat_drop, self.attn_drop, self.num_heads, activation='elu'))
def __init__(self, bond_dim, hidden_dim, num_angle, dropout, merge='cat', activation=None): super(Bond2BondLayer, self).__init__() self.num_angle = num_angle self.hidden_dim = hidden_dim self.merge = merge self.conv_layer = nn.LayerList() for _ in range(num_angle): conv = DomainAttentionLayer(bond_dim, hidden_dim, dropout, activation=None) self.conv_layer.append(conv) self.activation = activation
def __init__(self, latent_dim=16, style_dim=64, num_domains=2): super().__init__() layers = [] layers += [nn.Linear(latent_dim, 512)] layers += [nn.ReLU()] for _ in range(3): layers += [nn.Linear(512, 512)] layers += [nn.ReLU()] self.shared = nn.Sequential(*layers) self.unshared = nn.LayerList() for _ in range(num_domains): self.unshared.append(nn.Sequential(nn.Linear(512, 512), nn.ReLU(), nn.Linear(512, 512), nn.ReLU(), nn.Linear(512, 512), nn.ReLU(), nn.Linear(512, style_dim)))
def __init__(self, num_layers, mode='ir', opts=None): super(GradualStyleEncoder, self).__init__() assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152' assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se' blocks = get_blocks(num_layers) if mode == 'ir': unit_module = bottleneck_IR elif mode == 'ir_se': unit_module = bottleneck_IR_SE self.input_layer = Sequential( Conv2D(opts.input_nc, 64, (3, 3), 1, 1, bias_attr=False), BatchNorm2D(64), PReLU(64)) modules = [] for block in blocks: for bottleneck in block: modules.append( unit_module(bottleneck.in_channel, bottleneck.depth, bottleneck.stride)) self.body = Sequential(*modules) self.styles = nn.LayerList() self.style_count = 18 self.coarse_ind = 3 self.middle_ind = 7 for i in range(self.style_count): if i < self.coarse_ind: style = GradualStyleBlock(512, 512, 16) elif i < self.middle_ind: style = GradualStyleBlock(512, 512, 32) else: style = GradualStyleBlock(512, 512, 64) self.styles.append(style) self.latlayer1 = nn.Conv2D(256, 512, kernel_size=1, stride=1, padding=0) self.latlayer2 = nn.Conv2D(128, 512, kernel_size=1, stride=1, padding=0)
def __init__(self, inplanes, dilation_series, padding_series, num_classes): super(ClassifierModule, self).__init__() self.conv2d_list = nn.LayerList() for dilation, padding in zip(dilation_series, padding_series): weight_attr = paddle.ParamAttr( initializer=nn.initializer.Normal(std=0.01), learning_rate=10.0) bias_attr = paddle.ParamAttr( initializer=nn.initializer.Constant(value=0.0), learning_rate=10.0) self.conv2d_list.append( nn.Conv2D(inplanes, num_classes, kernel_size=3, stride=1, padding=padding, dilation=dilation, weight_attr=weight_attr, bias_attr=bias_attr))
def __init__(self, aspp_ratios, in_channels, out_channels, align_corners, use_sep_conv=False, image_pooling=False): super().__init__() self.align_corners = align_corners self.aspp_blocks = nn.LayerList() for ratio in aspp_ratios: if use_sep_conv and ratio > 1: conv_func = layers.SeparableConvBNReLU else: conv_func = layers.ConvBNReLU block = conv_func(in_channels=in_channels, out_channels=out_channels, kernel_size=1 if ratio == 1 else 3, dilation=ratio, padding=0 if ratio == 1 else ratio) self.aspp_blocks.append(block) out_size = len(self.aspp_blocks) if image_pooling: self.global_avg_pool = nn.Sequential( nn.AdaptiveAvgPool2D(output_size=(1, 1)), layers.ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) out_size += 1 self.image_pooling = image_pooling self.conv_bn_relu = layers.ConvBNReLU(in_channels=out_channels * out_size, out_channels=out_channels, kernel_size=1) self.dropout = nn.Dropout(p=0.1) # drop rate
def __init__(self, in_features, layer_num=2, low_rank=32, num_experts=4): super(CrossNetMix, self).__init__() self.layer_num = layer_num self.num_experts = num_experts # U: (in_features, low_rank) self.U_list = paddle.nn.ParameterList([ paddle.create_parameter( shape=[num_experts, in_features, low_rank], dtype='float32', default_initializer=paddle.nn.initializer.XavierNormal()) for i in range(self.layer_num) ]) # V: (in_features, low_rank) self.V_list = paddle.nn.ParameterList([ paddle.create_parameter( shape=[num_experts, in_features, low_rank], dtype='float32', default_initializer=paddle.nn.initializer.XavierNormal()) for i in range(self.layer_num) ]) # C: (low_rank, low_rank) self.C_list = paddle.nn.ParameterList([ paddle.create_parameter( shape=[num_experts, low_rank, low_rank], dtype='float32', default_initializer=paddle.nn.initializer.XavierNormal()) for i in range(self.layer_num) ]) self.gating = nn.LayerList( [nn.Linear(in_features, 1) for i in range(self.num_experts)]) self.bias = paddle.nn.ParameterList([ paddle.create_parameter( shape=[in_features, 1], dtype='float32', default_initializer=paddle.nn.initializer.Constant(value=0.0)) for i in range(self.layer_num) ])
def __init__(self, img_size=256, style_dim=64, num_domains=2, max_conv_dim=512): super().__init__() dim_in = 2**14 // img_size blocks = [] blocks += [nn.Conv2D(3, dim_in, 3, 1, 1)] repeat_num = int(np.log2(img_size)) - 2 for _ in range(repeat_num): dim_out = min(dim_in*2, max_conv_dim) blocks += [ResBlk(dim_in, dim_out, downsample=True)] dim_in = dim_out blocks += [nn.LeakyReLU(0.2)] blocks += [nn.Conv2D(dim_out, dim_out, 4, 1, 0)] blocks += [nn.LeakyReLU(0.2)] self.shared = nn.Sequential(*blocks) self.unshared = nn.LayerList() for _ in range(num_domains): self.unshared.append(nn.Linear(dim_out, style_dim))
def __init__(self, in_channels, out_channels, key_channels, value_channels, dropout_prob, repeat_sizes=([1]), psp_size=(1, 3, 6, 8)): super().__init__() self.psp_size = psp_size self.stages = nn.LayerList([ SelfAttentionBlock_APNB(in_channels, out_channels, key_channels, value_channels, size) for size in repeat_sizes ]) self.conv_bn = layers.ConvBNReLU(in_channels=in_channels * 2, out_channels=out_channels, kernel_size=1) self.dropout = nn.Dropout(p=dropout_prob)
def __init__(self, n_src_vocab=200, d_word_vec=20, n_layers=3, n_head=2, d_k=10, d_v=10, d_model=20, d_inner=10, pad_idx=0, dropout=0.1, n_position=200, emb_weight=None): "args:" "n_src_vocab(int): the number of vocabulary of input" "src_pad_idx(int): the index of padding word of input" "d_word_vec(int) : the dimension of word2vec and d_word_vec is equal to d_model" "d_inner(int): the number of hidden units of PositionwiseForward layer" "n_layers(int): the number of decoder layer and encoder layer" "n_head(int): the number of attention head" "d_k: dimension of d matrix" "d_v: dimension of v matrix" "src_emb_weight: weight of input w2v" super().__init__() self.src_word_emb = nn.Embedding(n_src_vocab, d_word_vec, sparse=True, padding_idx=pad_idx) if emb_weight is not None: self.src_word_emb.weight.set_value(emb_weight) self.src_word_emb.stop_gradient = True self.position_enc = PositionalEncoding(d_word_vec, n_position=n_position) self.dropout = nn.Dropout(dropout) self.layer_stack = nn.LayerList([ EncoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=dropout) for _ in range(n_layers) ]) self.layer_norm = nn.LayerNorm(d_model, epsilon=1e-6)
def __init__(self, vocab_size, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=16, initializer_range=0.02, pad_token_id=0, fit_size=768): super(TinyBertModel, self).__init__() self.pad_token_id = pad_token_id self.initializer_range = initializer_range self.embeddings = BertEmbeddings(vocab_size, hidden_size, hidden_dropout_prob, max_position_embeddings, type_vocab_size) encoder_layer = nn.TransformerEncoderLayer( hidden_size, num_attention_heads, intermediate_size, dropout=hidden_dropout_prob, activation=hidden_act, attn_dropout=attention_probs_dropout_prob, act_dropout=0) self.encoder = nn.TransformerEncoder(encoder_layer, num_hidden_layers) self.pooler = BertPooler(hidden_size) # fit_dense(s) means a hidden states' transformation from student to teacher. # `fit_denses` is used in v2 model, and `fit_dense` is used in other pretraining models. self.fit_denses = nn.LayerList([ nn.Linear(hidden_size, fit_size) for i in range(num_hidden_layers + 1) ]) self.fit_dense = nn.Linear(hidden_size, fit_size) self.apply(self.init_weights)
def __init__(self, in_channels, out_channels, kernel_size, candidate_config={}, stride=1, padding=0, dilation=1, norm_layer=nn.InstanceNorm2D, bias_attr=None, scale_factor=1): super(SuperSeparableConv2D, self).__init__() self.conv = nn.LayerList([ nn.Conv2D( in_channels=in_channels, out_channels=in_channels * scale_factor, kernel_size=kernel_size, stride=stride, padding=padding, groups=in_channels, bias_attr=bias_attr) ]) self.conv.extend([norm_layer(in_channels * scale_factor)]) self.conv.extend([ nn.Conv2D( in_channels=in_channels * scale_factor, out_channels=out_channels, kernel_size=1, stride=1, bias_attr=bias_attr) ]) self.candidate_config = candidate_config self.expand_ratio = candidate_config[ 'expand_ratio'] if 'expand_ratio' in candidate_config else None self.base_output_dim = self.conv[0]._out_channels if self.expand_ratio != None: self.base_output_dim = int(self.conv[0]._out_channels / max(self.expand_ratio))
def __init__(self, d_mels: int, d_hidden: int, kernel_size: int, num_layers: int, dropout: float): super().__init__() self.dropout = dropout self.num_layers = num_layers padding = int((kernel_size - 1) / 2), self.conv_batchnorms = nn.LayerList() k = math.sqrt(1.0 / (d_mels * kernel_size)) self.conv_batchnorms.append( Conv1dBatchNorm( d_mels, d_hidden, kernel_size=kernel_size, padding=padding, bias_attr=paddle.ParamAttr( initializer=nn.initializer.Uniform(low=-k, high=k)), data_format='NLC')) k = math.sqrt(1.0 / (d_hidden * kernel_size)) self.conv_batchnorms.extend([ Conv1dBatchNorm( d_hidden, d_hidden, kernel_size=kernel_size, padding=padding, bias_attr=paddle.ParamAttr( initializer=nn.initializer.Uniform(low=-k, high=k)), data_format='NLC') for i in range(1, num_layers - 1) ]) self.conv_batchnorms.append( Conv1dBatchNorm( d_hidden, d_mels, kernel_size=kernel_size, padding=padding, bias_attr=paddle.ParamAttr( initializer=nn.initializer.Uniform(low=-k, high=k)), data_format='NLC'))
def __init__( self, num_hidden_layers: int, hidden_size: int, num_attention_heads: int, intermediate_size: int, attention_probs_dropout_prob: float, hidden_dropout_prob: float, hidden_act: str = "relu", ) -> None: super().__init__() self.layers = nn.LayerList([ EncoderLayer( hidden_size, num_attention_heads, intermediate_size, attention_probs_dropout_prob, hidden_dropout_prob, hidden_act, ) for _ in range(num_hidden_layers) ])
def __init__(self, hidden_dim, edge_dim, num_angle, dropout): super().__init__() self.hidden_dim = hidden_dim self.e_in_dim = edge_dim self.out_dim = hidden_dim self.num_angle = num_angle self.drop = dropout self.edg_fc = nn.Linear(hidden_dim, hidden_dim, bias_attr=False) self.dst_fc = nn.Linear(hidden_dim, hidden_dim, bias_attr=False) self.src_fcs = nn.LayerList() for i in range(self.num_angle): self.src_fcs.append(nn.Linear(edge_dim, hidden_dim, bias_attr=False)) self.weight_src = nn.Linear(hidden_dim, 1, bias_attr=False) self.weight_dst = nn.Linear(hidden_dim, 1, bias_attr=False) self.weight_edg = nn.Linear(hidden_dim, 1, bias_attr=False) self.drop = nn.Dropout(p=self.drop) self.leaky_relu = nn.LeakyReLU(negative_slope=0.2)
def __init__(self, base_channels, growth_rate, grmul, n_layers, keepBase=False): super().__init__() self.keepBase = keepBase self.links = [] layers_ = [] self.out_channels = 0 for i in range(n_layers): outch, inch, link = get_link(i + 1, base_channels, growth_rate, grmul) self.links.append(link) layers_.append( layers.ConvBNReLU(inch, outch, kernel_size=3, bias_attr=False)) if (i % 2 == 0) or (i == n_layers - 1): self.out_channels += outch self.layers = nn.LayerList(layers_)
def __init__(self, n_blocks, in_channels, ch_list, gr, grmul, n_layers): super().__init__() self.skip_connection_channels = [] self.shortcut_layers = [] self.blks = nn.LayerList() ch = in_channels for i in range(n_blocks): blk = HarDBlock(ch, gr[i], grmul, n_layers[i]) ch = blk.get_out_ch() self.skip_connection_channels.append(ch) self.blks.append(blk) if i < n_blocks - 1: self.shortcut_layers.append(len(self.blks) - 1) self.blks.append( layers.ConvBNReLU( ch, ch_list[i], kernel_size=1, bias_attr=False)) ch = ch_list[i] if i < n_blocks - 1: self.blks.append(nn.AvgPool2D(kernel_size=2, stride=2)) self.out_channels = ch
def __init__(self, student, student_args=dict(), in_channels=[], out_channels=[], mid_channel=[], feat_keepkeys=[], **kargs): super().__init__() self.shapes = [1, 7, 14, 28, 56] self.student = eval(student)(**student_args) self.feat_keepkeys = feat_keepkeys abfs = nn.LayerList() for idx, in_channel in enumerate(in_channels): abfs.append( ABF(in_channel, mid_channel, out_channels[idx], idx < len(in_channels) - 1)) self.abfs = abfs[::-1]
def __init__(self, in_channels, out_channels, mid_channel, shapes=[1, 7, 14, 28, 56], hcl_mode="avg", name="loss_review_kd"): super().__init__() self.shapes = shapes self.name = name abfs = nn.LayerList() for idx, in_channel in enumerate(in_channels): abfs.append( ABF(in_channel, mid_channel, out_channels[idx], idx < len(in_channels) - 1)) self.abfs = abfs[::-1] self.hcl = HCL(mode=hcl_mode)
def __init__(self, input_size, num_channels, kernel_size=2, dropout=0.2): super(TCNEncoder, self).__init__() self._input_size = input_size self._output_dim = num_channels[-1] layers = nn.LayerList() num_levels = len(num_channels) for i in range(num_levels): dilation_size = 2**i in_channels = input_size if i == 0 else num_channels[i - 1] out_channels = num_channels[i] layers.append( TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size, padding=(kernel_size - 1) * dilation_size, dropout=dropout)) self.network = nn.Sequential(*layers)
def __init__(self, hidden_size, activation=None, lambda_l=0.5, alpha=0.2, k_hop=10, dropout=0.6): super(GCNII, self).__init__() self.hidden_size = hidden_size self.activation = activation self.lambda_l = lambda_l self.alpha = alpha self.k_hop = k_hop self.dropout = dropout self.drop_fn = nn.Dropout(dropout) self.mlps = nn.LayerList() for _ in range(k_hop): self.mlps.append(nn.Linear(hidden_size, hidden_size)) if isinstance(activation, str): activation = getattr(F, activation) self.activation = activation
def __init__(self, dim, depth, num_heads, window_size=7, mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0., norm_layer=nn.LayerNorm, downsample=None): super().__init__() self.window_size = window_size self.shift_size = window_size // 2 self.depth = depth # build blocks self.blocks = nn.LayerList([ SwinTransformerBlock( dim=dim, num_heads=num_heads, window_size=window_size, shift_size=0 if (i % 2 == 0) else window_size // 2, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop, attn_drop=attn_drop, drop_path=drop_path[i] if isinstance(drop_path, np.ndarray) else drop_path, norm_layer=norm_layer) for i in range(depth) ]) # patch merging layer if downsample is not None: self.downsample = downsample(dim=dim, norm_layer=norm_layer) else: self.downsample = None
def __init__(self, in_channels, out_channels, kernel_sizes=(5, 9, 13), activation="silu"): super().__init__() hidden_channels = in_channels // 2 self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=activation) self.m = nn.LayerList([ nn.MaxPool2D(kernel_size=ks, stride=1, padding=ks // 2) for ks in kernel_sizes ]) conv2_channels = hidden_channels * (len(kernel_sizes) + 1) self.conv2 = BaseConv(conv2_channels, out_channels, 1, stride=1, act=activation)
def __init__(self, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_relative_position=64, layer_norm_eps='1e-12'): super(NeZhaEncoder, self).__init__() layer = NeZhaLayer( hidden_size=hidden_size, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, hidden_act=hidden_act, hidden_dropout_prob=hidden_dropout_prob, attention_probs_dropout_prob=attention_probs_dropout_prob, max_relative_position=max_relative_position, layer_norm_eps=layer_norm_eps) self.layer = nn.LayerList( [copy.deepcopy(layer) for _ in range(num_hidden_layers)])
def __init__(self, inplanes, out_channels, dilation_series, padding_series, num_classes): super(edge_branch, self).__init__() self.conv_x1 = nn.Conv2D(inplanes[0], 512, kernel_size=3) self.conv_x4 = nn.Conv2D(inplanes[1], 512, kernel_size=3) self.conv0 = resnet_vd.ConvBNLayer(in_channels=512 * 2, out_channels=out_channels, kernel_size=3, act='relu') self.conv1 = resnet_vd.ConvBNLayer(in_channels=out_channels, out_channels=out_channels, kernel_size=3, act=None) self.add = layers.Add() self.relu = layers.Activation(act="relu") self.conv2d_list = nn.LayerList() for dilation, padding in zip(dilation_series, padding_series): weight_attr = paddle.ParamAttr( initializer=nn.initializer.Normal(std=0.01), learning_rate=10.0) bias_attr = paddle.ParamAttr( initializer=nn.initializer.Constant(value=0.0), learning_rate=10.0) self.conv2d_list.append( nn.Conv2D(out_channels, num_classes, kernel_size=3, stride=1, padding=padding, dilation=dilation, weight_attr=weight_attr, bias_attr=bias_attr)) self.classifier = nn.Conv2D(out_channels, num_classes, kernel_size=3, stride=1)
def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer): num_branches_cur = len(num_channels_cur_layer) num_branches_pre = len(num_channels_pre_layer) transition_layers = [] for i in range(num_branches_cur): if i < num_branches_pre: if num_channels_cur_layer[i] != num_channels_pre_layer[i]: transition_layers.append( nn.Sequential( nn.Conv2D(num_channels_pre_layer[i], num_channels_cur_layer[i], kernel_size=3, stride=1, padding=1, bias_attr=False), self.norm_layer(num_channels_cur_layer[i]), nn.ReLU())) else: transition_layers.append(None) else: conv3x3s = [] for j in range(i + 1 - num_branches_pre): inchannels = num_channels_pre_layer[-1] outchannels = num_channels_cur_layer[i] \ if j == i - num_branches_pre else inchannels conv3x3s.append( nn.Sequential( nn.Conv2D(inchannels, outchannels, kernel_size=3, stride=2, padding=1, bias_attr=False), self.norm_layer(outchannels), nn.ReLU())) transition_layers.append(nn.Sequential(*conv3x3s)) return nn.LayerList(transition_layers)
def __init__(self, num_classes, decoder_channels, head_channels, class_key): super(SinglePanopticDeepLabHead, self).__init__() self.num_head = len(num_classes) if self.num_head != len(class_key): raise ValueError( "len(num_classes) != len(class_key), they are {} and {}". format(num_classes, class_key)) classifier = [] for i in range(self.num_head): classifier.append( nn.Sequential( SeparableConvBNReLU(decoder_channels, head_channels, 5, padding=2, bias_attr=False), nn.Conv2D(head_channels, num_classes[i], 1))) self.classifier = nn.LayerList(classifier) self.class_key = class_key
def __init__(self, Ch, h, window): """ Initialization. Ch: Channels per head. h: Number of heads. window: Window size(s) in convolutional relative positional encoding. It can have two forms: 1. An integer of window size, which assigns all attention heads with the same window size in ConvRelPosEnc. 2. A dict mapping window size to #attention head splits (e.g. {window size 1: #attention head split 1, window size 2: #attention head split 2}) It will apply different window size to the attention head splits. """ super().__init__() if isinstance(window, int): # Set the same window size for all attention heads. window = {window: h} self.window = window elif isinstance(window, dict): self.window = window else: raise ValueError() self.conv_list = nn.LayerList() self.head_splits = [] for cur_window, cur_head_split in window.items(): # Use dilation=1 at default. dilation = 1 padding_size = (cur_window + (cur_window - 1) * (dilation - 1)) // 2 cur_conv = nn.Conv2D( cur_head_split * Ch, cur_head_split * Ch, kernel_size=(cur_window, cur_window), padding=(padding_size, padding_size), dilation=(dilation, dilation), groups=cur_head_split * Ch, ) self.conv_list.append(cur_conv) self.head_splits.append(cur_head_split) self.channel_splits = [x * Ch for x in self.head_splits]
def __init__( self, edge_dim, node_dim, hidden_dim, num_heads, num_angle, dropout, merge="mean", activation=F.relu, ): super(Edge2NodeLayer, self).__init__() self.merge = merge self.num_heads = num_heads self.hidden_dim = hidden_dim self.activation = activation self.att_layers = nn.LayerList( Edge2NodeAttentionLayer(hidden_dim, edge_dim, num_angle, dropout) for _ in range(num_heads) )