def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2): super(TemporalBlock, self).__init__() self.conv1 = weight_norm(nn.Conv1d(n_inputs, n_outputs, kernel_size, stride=stride, padding=padding, dilation=dilation)) self.chomp1 = Chomp1d(padding) self.relu1 = nn.ReLU() self.dropout1 = nn.Dropout(dropout) self.conv2 = weight_norm(nn.Conv1d(n_outputs, n_outputs, kernel_size, stride=stride, padding=padding, dilation=dilation)) self.chomp2 = Chomp1d(padding) self.relu2 = nn.ReLU() self.dropout2 = nn.Dropout(dropout) self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1, self.conv2, self.chomp2, self.relu2, self.dropout2) self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None self.relu = nn.ReLU() self.init_weights()
def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)): super(ResBlock1, self).__init__() self.convs1 = nn.ModuleList([ weight_norm( Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0], padding=get_padding(kernel_size, dilation[0]))), weight_norm( Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1], padding=get_padding(kernel_size, dilation[1]))), weight_norm( Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2], padding=get_padding(kernel_size, dilation[2]))) ]) self.convs1.apply(init_weights) self.convs2 = nn.ModuleList([ weight_norm( Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=get_padding(kernel_size, 1))), weight_norm( Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=get_padding(kernel_size, 1))), weight_norm( Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=get_padding(kernel_size, 1))) ]) self.convs2.apply(init_weights)
def conv_layer( ni: int, nf: int, ks: int = 3, stride: int = 1, padding: int = None, bias: bool = None, is_1d: bool = False, norm_type: Optional[NormType] = NormType.Batch, use_activ: bool = True, leaky: float = None, transpose: bool = False, init: Callable = nn.init.kaiming_normal_, self_attention: bool = False, ): "Create a sequence of convolutional (`ni` to `nf`), ReLU (if `use_activ`) and batchnorm (if `bn`) layers." if padding is None: padding = (ks - 1) // 2 if not transpose else 0 bn = norm_type in (NormType.Batch, NormType.BatchZero) if bias is None: bias = not bn conv_func = nn.ConvTranspose2d if transpose else nn.Conv1d if is_1d else nn.Conv2d conv = init_default( conv_func(ni, nf, kernel_size=ks, bias=bias, stride=stride, padding=padding), init, ) if norm_type == NormType.Weight: conv = weight_norm(conv) elif norm_type == NormType.Spectral: conv = spectral_norm(conv) layers = [conv] if use_activ: layers.append(relu(True, leaky=leaky)) if bn: layers.append((nn.BatchNorm1d if is_1d else nn.BatchNorm2d)(nf)) if self_attention: layers.append(SelfAttention(nf)) return nn.Sequential(*layers)
def __init__(self, ni, nf, ks=3, stride=1, padding=None, bias=None, ndim=2, norm_type=NormType.Batch, bn_1st=True, act_cls=defaults.activation, transpose=False, init=nn.init.kaiming_normal_, xtra=None, **kwargs): if padding is None: padding = ((ks-1)//2 if not transpose else 0) bn = norm_type in (NormType.Batch, NormType.BatchZero) inn = norm_type in (NormType.Instance, NormType.InstanceZero) if bias is None: bias = not (bn or inn) conv_func = _conv_func(ndim, transpose=transpose) conv = init_default(conv_func(ni, nf, kernel_size=ks, bias=bias, stride=stride, padding=padding, **kwargs), init) if norm_type==NormType.Weight: conv = weight_norm(conv) elif norm_type==NormType.Spectral: conv = spectral_norm(conv) layers = [conv] act_bn = [] if act_cls is not None: act_bn.append(act_cls()) if bn: act_bn.append(BatchNorm(nf, norm_type=norm_type, ndim=ndim)) if inn: act_bn.append(InstanceNorm(nf, norm_type=norm_type, ndim=ndim)) if bn_1st: act_bn.reverse() layers += act_bn if xtra: layers.append(xtra) super().__init__(*layers)
def __init__( self, word_emb, word_emb_dim, context_dim, hid_dim, vocab_size, inst_dict, ): super().__init__() self.word_emb = word_emb self.vocab_size = vocab_size self.inst_dict = inst_dict self.rnn = nn.LSTM(word_emb_dim + context_dim, hid_dim, batch_first=True) self.decoder = weight_norm(nn.Linear(hid_dim, vocab_size), dim=None)
def __init__(self, num_input_feats, input_feat_name, lab_phn_num, N_filters, kernel_sizes, dropout): super(GCNN_phn_Net, self).__init__() self.input_feat_name = input_feat_name self.num_input_feats = num_input_feats gcnn_out_num = 500 self.gcnn = GCNN(num_input_feats=self.num_input_feats, num_output_feats=gcnn_out_num, N_filters=N_filters, kernel_sizes=kernel_sizes, dropout=dropout[:-1]) self.linear_phn = weight_norm(nn.Linear(gcnn_out_num, lab_phn_num), dim=0) self.context_left = sum(self.context()) self.context_right = 0
def __init__(self, vocabulary, blank=0, name="masr"): super().__init__() self.blank = blank self.vocabulary = vocabulary self.name = name output_units = len(vocabulary) modules = [] modules.append(ConvBlock(nn.Conv1d(161, 500, 48, 2, 97), 0.2)) for i in range(7): modules.append(ConvBlock(nn.Conv1d(250, 500, 7, 1), 0.3)) modules.append(ConvBlock(nn.Conv1d(250, 2000, 32, 1), 0.5)) modules.append(ConvBlock(nn.Conv1d(1000, 2000, 1, 1), 0.5)) modules.append(weight_norm(nn.Conv1d(1000, output_units, 1, 1))) self.cnn = nn.Sequential(*modules)
def __init__(self, x_size, y_size, opt, prefix="decoder", dropout=None): super(Classifier, self).__init__() self.opt = opt if dropout is None: self.dropout = DropoutWrapper( opt.get("{}_dropout_p".format(prefix), 0)) else: self.dropout = dropout self.merge_opt = opt.get("{}_merge_opt".format(prefix), 0) self.weight_norm_on = opt.get("{}_weight_norm_on".format(prefix), False) if self.merge_opt == 1: self.proj = nn.Linear(x_size * 4, y_size) else: self.proj = nn.Linear(x_size * 2, y_size) if self.weight_norm_on: self.proj = weight_norm(self.proj)
def __init__(self, main_task, input_dim, skip_connection=False, clamp_bias=False, init_value=1., weight_normalization=True): super().__init__(main_task=main_task, input_dim=main_task, clamp_bias=clamp_bias) self.init_value = init_value self.skip_connection = skip_connection self.linear = nn.Linear(input_dim, 1, bias=False) self._init_weights() self.weight_normalization = weight_normalization if self.weight_normalization: self.linear = weight_norm(self.linear)
def __init__(self, z_dim=128, nf=64): super(G_cifar10, self).__init__() self.nf = nf self.z_dim = z_dim self.latent = nn.Sequential(nn.Linear(self.z_dim, 8*nf*4*4), nn.BatchNorm1d(8*nf*4*4), nn.ReLU() ) self.network = nn.Sequential(nn.ConvTranspose2d(8*nf, 4*nf, 4, 2, 1, bias=False), nn.BatchNorm2d(4*nf), nn.ReLU(inplace=True), nn.ConvTranspose2d(4*nf, 2*nf, 4, 2, 1, bias=False), nn.BatchNorm2d(2*nf), nn.ReLU(inplace=True), weight_norm(nn.ConvTranspose2d(2*nf, 3, 4, 2, 1, bias=False)), nn.Tanh() ) self._initialize_weights()
def __init__(self, query_dim, key_dim, value_dim, prefix='attention', opt={}, dropout=None): super().__init__() self.prefix = prefix self.num_heads = opt.get('{}_head'.format(self.prefix), 1) self.dropout = DropoutWrapper(opt.get('{}_dropout'.format(self.prefix), 0)) if dropout is None else dropout self.qkv_dim = [query_dim, key_dim, value_dim] assert query_dim == key_dim, "query dim must equal with key dim" self.hidden_size = opt.get('{}_hidden_size'.format(self.prefix), 64) self.proj_on = opt.get('{}_proj_on'.format(prefix), False) self.share = opt.get('{}_share'.format(self.prefix), False) self.layer_norm_on = opt.get('{}_norm_on'.format(self.prefix), False) self.scale_on = opt.get('{}_scale_on'.format(self.prefix), False) if self.proj_on: self.proj_modules = nn.ModuleList([nn.Linear(dim, self.hidden_size) for dim in self.qkv_dim[0:2]]) if self.layer_norm_on: for proj in self.proj_modules: proj = weight_norm(proj) if self.share and self.qkv_dim[0] == self.qkv_dim[1]: self.proj_modules[1] = self.proj_modules[0] self.f = activation(opt.get('{}_activation'.format(self.prefix), 'relu')) self.qkv_head_dim = [self.hidden_size // self.num_heads] * 3 self.qkv_head_dim[2] = value_dim // self.num_heads assert self.qkv_head_dim[0] * self.num_heads == self.hidden_size, "hidden size must be divisible by num_heads" assert self.qkv_head_dim[2] * self.num_heads == value_dim, "value size must be divisible by num_heads" else: self.qkv_head_dim = [emb // self.num_heads for emb in self.qkv_dim] #import pdb; pdb.set_trace() assert self.qkv_head_dim[0] * self.num_heads == self.qkv_dim[0], "query size must be divisible by num_heads" assert self.qkv_head_dim[1] * self.num_heads == self.qkv_dim[1], "key size must be divisible by num_heads" assert self.qkv_head_dim[2] * self.num_heads == self.qkv_dim[2], "value size must be divisible by num_heads" if self.scale_on: self.scaling = self.qkv_head_dim[0]**-0.5 self.drop_diagonal = opt.get('{}_drop_diagonal'.format(self.prefix), False) self.output_size = self.qkv_dim[2]
def __init__(self, nc_input, nc_output, use_dropout=False, pixel_shuffle=False, round=False): super(dec_block_v9, self).__init__() self.model = [] self.model += [nn.ReLU(True)] if pixel_shuffle: self.model += [ weight_norm(nn.Conv2d(nc_input, 4 * nc_output, kernel_size=3, stride=1, padding=1, bias=True), name='weight', dim=None) ] self.model += [nn.PixelShuffle(2)] elif round: self.model += [ nn.ConvTranspose2d(nc_input, nc_output, kernel_size=4, stride=2, padding=1, bias=True) ] else: self.model += [ nn.ConvTranspose2d(nc_input, nc_output, kernel_size=5, stride=2, padding=1, bias=True) ] if use_dropout: self.model += [nn.Dropout(0.5)] self.model = nn.Sequential(*self.model)
def __init__(self, in_channels, out_channels, kernel, nonlinearity=nn.PReLU): super().__init__() self.nonlinearity = nonlinearity() if nonlinearity else None conv = nn.Conv2d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel, padding=[ kernel[0] // 2, kernel[1] // 2, ], # Same padding in temporal direction bias=True, ) self.conv = weight_norm(conv) # Apply Kaiming initialization to convolutional weights nn.init.xavier_uniform_(self.conv.weight)
def __init__(self, layer_dim, X, USE_CUDA, device): super().__init__() self.X_mean = Variable(torch.from_numpy(X.mean(0, keepdims=True)).float(), requires_grad=False) self.X_std = Variable(torch.from_numpy(X.std(0, keepdims=True)).float(), requires_grad=False) if (USE_CUDA): self.X_mean = self.X_mean.to(device) self.X_std = self.X_std.to(device) self.num_layers = len(layer_dim) temp = [] for l in range(1, self.num_layers): temp.append( weight_norm(nn.Linear(layer_dim[l - 1], layer_dim[l]), dim=0)) nn.init.normal_(temp[l - 1].weight) self.layers = nn.ModuleList(temp)
def __init__(self, vocabulary, blank=0, name="masr"): """ vocabulary : str : string of all labels such that vocaulary[0] == ctc_blank """ super().__init__(vocabulary=vocabulary, name=name, blank=blank) self.blank = blank self.vocabulary = vocabulary self.name = name output_units = len(vocabulary) modules = [] modules.append(ConvBlock(nn.Conv1d(161, 500, 48, 2, 97), 0.2)) for i in range(7): modules.append(ConvBlock(nn.Conv1d(250, 500, 7, 1), 0.3)) modules.append(ConvBlock(nn.Conv1d(250, 2000, 32, 1), 0.5)) modules.append(ConvBlock(nn.Conv1d(1000, 2000, 1, 1), 0.5)) modules.append(weight_norm(nn.Conv1d(1000, output_units, 1, 1))) self.cnn = nn.Sequential(*modules)
def __init__(self): super(Ban, self).__init__() self.mid_dim = 1024 self.v_dim = 2048 self.s_dim = 300 self.glimpses = 2 self.biattention = weight_norm(BiAttention(v_dim=self.v_dim, s_dim=self.s_dim, mid_dim=self.mid_dim, glimpses=self.glimpses, drop=0.0), name="h_weight", dim=None) self.applyattention = ApplyAttention(v_dim=self.v_dim, s_dim=self.s_dim, mid_dim=self.mid_dim, glimpses=self.glimpses, drop=0.0)
def __init__(self, batch_size, n_features): super().__init__() upsampling_rate = [8, 2] mult = 16 model = [ # nn.ReflectionPad1d(3), weight_norm(nn.Conv1d(128, 128, 5)) ] for i in range(2): u_r = upsampling_rate[i] for ii in range(2): model += [ nn.LeakyReLU(0.2), weight_norm( nn.ConvTranspose1d(128, 128, u_r, u_r, u_r // 2 + u_r % 2, u_r % 2)), ] for residual_layer in range(3): model += [ nn.Sequential( nn.LeakyReLU(0.2), weight_norm( nn.Conv1d(128, 128, kernel_size=3, dilation=3**residual_layer)), nn.LeakyReLU(0.2), weight_norm(nn.Conv1d(128, 128, kernel_size=1)), ) ] mult //= 2 model += [ nn.LeakyReLU(0.2), # nn.ReflectionPad1d(5), weight_norm(nn.Conv1d(128, 128, stride=251, kernel_size=1)), nn.LeakyReLU(0.2), weight_norm(nn.Conv1d(128, 128, kernel_size=1, padding=0)), nn.ReflectionPad1d((1, 1)), nn.Tanh() ] self.model = nn.Sequential(*model)
def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( weight_norm(nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), name="weight"), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes)) return nn.Sequential(*layers)
def __init__(self, in_channels, out_channels, kernel_size=(2, 2), stride=(1, 1), left_pad_output=False): super(down_rightward_conv, self).__init__() # for horiontal, (left, top) have to be padded self.left_pad_output = left_pad_output self.pad = nn.ZeroPad2d((kernel_size[1] - 1, 0, kernel_size[0] - 1, 0)) self.conv = weight_norm( nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride)) if self.left_pad_output: # right shift means removing the last column of output and add padding at the first index of column # so that it prevents prediction operation of the last column self.right_shift = right_shift
def __init__(self, k_shots, num_classes, win_len, model=cnn): super(Net, self).__init__() self.fuse = Fusion() self.model = model if cfg.data == 'WIFI': self.fc = nn.Sequential(*utils.block('BN', 60 * 2, cfg.hid_dim)) self.lstm_time = nn.LSTM(60, cfg.hid_dim // 2) self.lstm_freq = nn.LSTM(60, cfg.hid_dim // 2) elif cfg.data == 'UWB': self.fc = nn.Sequential(*utils.block('BN', 138 * 2, cfg.hid_dim)) self.lstm_time = nn.LSTM(138, cfg.hid_dim // 2) self.lstm_freq = nn.LSTM(138, cfg.hid_dim // 2) elif cfg.data == 'FMCW': self.fc = nn.Sequential(*utils.block('BN', 253 * 2, cfg.hid_dim)) self.lstm_time = nn.LSTM(253, cfg.hid_dim // 2) self.lstm_freq = nn.LSTM(253, cfg.hid_dim // 2) self.classifier = nn.Linear(output_dim, cfg.num_class) self.attention = weight_norm(BiAttention( time_features=cfg.hid_dim // 2, freq_features=cfg.hid_dim // 2, mid_features=cfg.hid_dim, glimpses=1, drop=0.5, ), name='h_weight', dim=None) self.apply_attention = ApplyAttention( time_features=cfg.hid_dim // 2, freq_features=cfg.hid_dim // 2, mid_features=cfg.hid_dim // 2, glimpses=1, num_obj=512, drop=0.2, ) self.cnn1 = torch.nn.Conv2d(2, 3, kernel_size=3, stride=1, padding=1) self.fc1 = FCNet(cfg.hid_dim // 2, cfg.hid_dim // 2, 'relu', 0.4) self.fc2 = FCNet(cfg.hid_dim // 2, cfg.hid_dim // 2, 'relu', 0.4) self.fc3 = FCNet(cfg.hid_dim, cfg.hid_dim, drop=0.4)
def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, groups=1, bias=True): super(CausalConv1d, self).__init__() self.conv = weight_norm( nn.Conv1d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)) self.chomp = Chomp1d(padding)
def create_layer(in_channels, out_channels, kernel_size, wn=True, bn=True, activation=nn.ReLU, convolution=nn.Conv2d): assert kernel_size % 2 == 1 layer = [] conv = convolution(in_channels, out_channels, kernel_size, padding=kernel_size // 2) if wn: conv = weight_norm(conv) layer.append(conv) if activation is not None: layer.append(activation()) if bn: layer.append(nn.BatchNorm2d(out_channels)) return nn.Sequential(*layer)
def _get_layer( self, input_channels, output_channel, init_upper=None, init_lower=None, bias=True, kernel_size=3, stride=1, padding=1, ): layer = nn.Conv2d(input_channels, output_channel, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias) self._init_layer(layer, init_upper=init_upper, init_lower=init_lower) if self.weight_normalization: return weight_norm(layer) return layer
def __init__(self, num_inputs, num_outputs, num_channels, seq_len, kernel_size=2, dropout=0.2, attention=True): super(TemporalConvNet, self).__init__() layers = [] num_levels = len(num_channels) for layer in range(num_levels): dilation = 2 ** layer in_channels = num_inputs if layer == 0 else num_channels[layer-1] out_channels = num_channels[layer] layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation, padding=(kernel_size-1) * dilation, dropout=dropout)] self.network = nn.Sequential(*layers) if attention: self.attention = AttentionBlock(num_channels[-1], num_channels[-1], num_channels[-1]) else: self.attention = None self.fc = nn.Sequential(weight_norm(nn.Linear(seq_len * num_channels[-1] * 2, num_outputs)), nn.ReLU() )
def __init__(self, in_channels, out_channels, kernel_size=(2, 3), stride=(1, 1), top_pad_output=False): super(downward_conv, self).__init__() # for vertical, (left, right, top) have to be padded self.top_pad_output = top_pad_output self.pad = nn.ZeroPad2d((int((kernel_size[1] - 1) / 2), (int( (kernel_size[1] - 1) / 2)), kernel_size[0] - 1, 0)) self.conv = weight_norm( nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride)) if self.top_pad_output: # down shift means removing the last row of output and add padding at the first index of row # so that it prevents prediction operation of the last row self.down_shift = down_shift
def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2, apply_weight_norm=False): super(TemporalBlock2DHW, self).__init__() self.conv1 = nn.Conv2d(n_inputs, n_outputs, kernel_size, stride=stride, padding=padding, dilation=dilation) self.chomp1 = Chomp2d(padding) self.relu1 = nn.ReLU() self.dropout1 = nn.Dropout(dropout) ''' self.conv2 = nn.Conv2d(n_outputs, n_outputs, kernel_size, stride=stride, padding=padding, dilation=dilation) self.chomp2 = Chomp2d(padding) self.relu2 = nn.ReLU() self.dropout2 = nn.Dropout(dropout) ''' if apply_weight_norm: self.conv1 = weight_norm(self.conv1) #self.conv2 = weight_norm(self.conv2) self.relu = nn.ReLU() self.init_weights() self.kernel_size = kernel_size
def __init__(self, config: FFConfig, verbose=False): super(GLM, self).__init__() self.config = config spat_dim = 15 self.spatiotemporal = nn.ModuleList([ nn.Sequential( nn.Flatten(), weight_norm(nn.Linear( in_features=config.time_lags * 2 * spat_dim ** 2, out_features=1, bias=True,)), nn.Softplus(),) for _ in range(len(config.useful_cells[config.expt])) ]) self.criterion = nn.PoissonNLLLoss(log_input=False, reduction="sum") self.apply(get_init_fn(self.config.init_range)) if verbose: print_num_params(self)
def __init__(self, v_dim, q_dim, h_dim, h_out, dropout=[.2,.5], k=3): super(BCNet, self).__init__() self.c = 32 self.k = k self.v_dim = v_dim; self.q_dim = q_dim self.h_dim = h_dim; self.h_out = h_out self.v_net = FCNet(v_dim, h_dim * self.k, activate='relu', drop=dropout[0]) self.q_net = FCNet(q_dim, h_dim * self.k, activate='relu', drop=dropout[0]) self.dropout = nn.Dropout(dropout[1]) # attention if 1 < k: self.p_net = nn.AvgPool1d(self.k, stride=self.k) if None == h_out: pass elif h_out <= self.c: self.h_mat = nn.Parameter(torch.Tensor(1, h_out, 1, h_dim * self.k).normal_()) self.h_bias = nn.Parameter(torch.Tensor(1, h_out, 1, 1).normal_()) else: self.h_net = weight_norm(nn.Linear(h_dim * self.k, h_out), dim=None)
def __init__(self, block, layers, num_classes=1000, isL2=False, double_output=False): self.inplanes = 64 self.isL2 = isL2 self.double_output = double_output super(ResNet, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc1 = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) if self.double_output: self.fc2 = weight_norm( nn.Linear(512 * block.expansion, num_classes))
def __init__( self, upsample_in_channel: int, upsample_out_channel: int, upsample_kernel_size: int, upsample_rates: int, resblock_kernel_sizes: List[int], resblock_dilation_sizes: List[List[int]] ): super(UpSampler, self).__init__() self.up = weight_norm( ConvTranspose1d( upsample_in_channel, upsample_out_channel, upsample_kernel_size, upsample_rates, padding=(upsample_kernel_size - upsample_rates) // 2, ) ) self.up.apply(init_weights) self.res_0 = ResBlock( channels=upsample_out_channel, kernel_size=resblock_kernel_sizes[0], dilation=resblock_dilation_sizes[0] ) self.res_1 = ResBlock( channels=upsample_out_channel, kernel_size=resblock_kernel_sizes[1], dilation=resblock_dilation_sizes[1] ) self.res_2 = ResBlock( channels=upsample_out_channel, kernel_size=resblock_kernel_sizes[2], dilation=resblock_dilation_sizes[2] ) self.num_kernels = len(resblock_kernel_sizes)
def __init__(self, img_num_obj=151, img_num_rel=51, txt_num_obj=4460, txt_num_rel=646): super(SGEncode, self).__init__() self.embed_dim = 512 self.hidden_dim = 512 self.final_dim = 1024 self.num_layer = 2 self.margin = 1.0 self.img_num_obj = img_num_obj self.img_num_rel = img_num_rel self.txt_num_obj = txt_num_obj self.txt_num_rel = txt_num_rel self.img_obj_embed = nn.Embedding(self.img_num_obj, self.embed_dim) self.img_rel_head_embed = nn.Embedding(self.img_num_obj, self.embed_dim) self.img_rel_tail_embed = nn.Embedding(self.img_num_obj, self.embed_dim) self.img_rel_pred_embed = nn.Embedding(self.img_num_rel, self.embed_dim) self.txt_obj_embed = nn.Embedding(self.txt_num_obj, self.embed_dim) self.txt_rel_head_embed = nn.Embedding(self.txt_num_obj, self.embed_dim) self.txt_rel_tail_embed = nn.Embedding(self.txt_num_obj, self.embed_dim) self.txt_rel_pred_embed = nn.Embedding(self.txt_num_rel, self.embed_dim) self.attention = weight_norm(BiAttention( v_features=self.embed_dim*3, q_features=self.embed_dim, mid_features=self.hidden_dim, glimpses=self.num_layer, drop=0.2,), name='h_weight', dim=None) self.apply_attention = ApplyAttention( v_features=self.embed_dim*3, q_features=self.embed_dim, mid_features=self.hidden_dim, glimpses=self.num_layer, drop=0.2,) self.final_fc = nn.Sequential(*[nn.Linear(self.hidden_dim, self.hidden_dim), nn.ReLU(inplace=True), nn.Linear(self.hidden_dim, self.final_dim), nn.ReLU(inplace=True) ])