def __init__(self, args): super(ThreeLayersPretrain, self).__init__() self.embedding = RobertaModel.from_pretrained(args.pretrain_model_name) self.fact_convs = nn.ModuleList([ nn.Conv1d(args.embedding_dim, args.filters_num, args.kernel_size_1), nn.Conv1d(args.filters_num, args.filters_num, args.kernel_size_2) ]) self.article_convs = nn.ModuleList([ nn.Conv1d(args.embedding_dim, args.filters_num, args.kernel_size_1), nn.Conv1d(args.filters_num, args.filters_num, args.kernel_size_2), ]) self.conv_paddings = nn.ModuleList([ nn.ConstantPad1d( (args.kernel_size_1 // 2 - 1, args.kernel_size_1 // 2), 0.), nn.ConstantPad1d( (args.kernel_size_2 // 2 - 1, args.kernel_size_2 // 2), 0.) ]) self.ffs = nn.ModuleList( [nn.Linear(args.embedding_dim, args.linear_output)] + [ nn.Linear(args.filters_num, args.linear_output) for _ in range(2) ] + [nn.Linear(args.article_len * 3, args.linear_output)]) self.predict = nn.Linear(args.linear_output, 2)
def __init__(self, N, M, file_path=Config.project_root+"filters/"): super().__init__() self.N = N #nsubband self.M = M #nfilter try: assert (N,M) in [(8,64),(4,64),(2,64)] except: print("Warning:",N,"subband and ",M," filter is not supported") self.name = str(N)+"_"+str(M)+".mat" self.ana_conv_filter = nn.Conv1d(1, out_channels=N, kernel_size=M, stride=N, bias=False) data = load_mat2numpy(file_path+"f_"+self.name) data = data['f'].astype(np.float32)/N data=np.flipud(data.T).T data=np.reshape(data, (N, 1, M)).copy() dict_new = self.ana_conv_filter.state_dict().copy() dict_new['weight'] = torch.from_numpy(data) self.ana_pad = nn.ConstantPad1d((M-N,0), 0) self.ana_conv_filter.load_state_dict(dict_new) self.syn_pad = nn.ConstantPad1d((0, M//N-1), 0) self.syn_conv_filter = nn.Conv1d(N, out_channels=N, kernel_size=M//N, stride=1, bias=False) gk= load_mat2numpy(file_path+"h_"+self.name) gk = gk['h'].astype(np.float32) gk = np.transpose(np.reshape(gk, (N, M//N, N)), (1, 0, 2))*N gk = np.transpose(gk[::-1, :, :], (2, 1, 0)).copy() dict_new = self.syn_conv_filter.state_dict().copy() dict_new['weight'] = torch.from_numpy(gk) self.syn_conv_filter.load_state_dict(dict_new) for param in self.parameters(): param.requires_grad = False
def create_batch(train_data, batch_ids, is_cuda): max_len = max([len(train_data[bi][0]) for bi in batch_ids]) batch_input_ids = [] batch_label_ids = [] batch_segment_ids = [] for bi in batch_ids: input_ids, label_ids, segment_ids = train_data[bi] pad_len = max_len - len(input_ids) padding_op = nn.ConstantPad1d((0, pad_len), const.pad_token_id) batch_input_ids.append(padding_op(input_ids).unsqueeze(0)) padding_op = nn.ConstantPad1d((0, pad_len), const.label_pad_id) batch_label_ids.append(padding_op(label_ids).unsqueeze(0)) padding_op = nn.ConstantPad1d((0, pad_len), const.pad_token_segment_id) batch_segment_ids.append(padding_op(segment_ids).unsqueeze(0)) batch_input_ids = torch.cat(batch_input_ids) batch_label_ids = torch.cat(batch_label_ids) batch_segment_ids = torch.cat(batch_segment_ids) att_mask = batch_input_ids.ne(const.pad_token_id) if is_cuda: batch_input_ids = batch_input_ids.cuda() batch_label_ids = batch_label_ids.cuda() batch_segment_ids = batch_segment_ids.cuda() att_mask = att_mask.cuda() inputs = { 'input_ids': batch_input_ids, 'attention_mask': att_mask, 'token_type_ids': batch_segment_ids, 'labels': batch_label_ids } return inputs
def __init__(self, sequence_length): # Refer to "ZHANG C, ZHONG M, WANG Z, et al. Sequence-to-point learning with neural networks for non-intrusive load monitoring[C].The 32nd AAAI Conference on Artificial Intelligence" super(attention_cnn_Pytorch, self).__init__() self.seq_length = sequence_length self.conv = nn.Sequential( nn.ConstantPad1d((4, 5), 0), nn.Conv1d(1, 30, 10, stride=1), nn.ReLU(True), nn.ConstantPad1d((3, 4), 0), nn.Conv1d(30, 30, 8, stride=1), nn.ReLU(True), nn.ConstantPad1d((2, 3), 0), nn.Conv1d(30, 40, 6, stride=1), nn.ReLU(True), nn.ConstantPad1d((2, 2), 0), nn.Conv1d(40, 50, 5, stride=1), nn.ReLU(True), nn.ConstantPad1d((2, 2), 0), nn.Conv1d(50, 50, 5, stride=1), nn.ReLU(True) ) self.ca = ChannelAttention(in_planes=50, ratio=4) self.sa = SpatialAttention(kernel_size=7) self.dense = nn.Sequential( nn.Linear(50 * sequence_length, 1024), nn.ReLU(), nn.Linear(1024, 1) )
def __init__(self, SEQ_LENGTH): super(_JKsDenoisingAutoEncoderOriginal, self).__init__() self.seq_length = SEQ_LENGTH self.nc = 1 # number of channels self.nef = 8 # number of filters nrepr = 128 # dimension of internal representation self.fs = 4 # filter size self.pad1 = nn.ConstantPad1d((1, 2), 0) self.conv1 = nn.Conv1d(self.nc, self.nef, self.fs, stride=1, padding=0) self.n_dense_units = self.nef * self.seq_length self.dense1 = nn.Sequential( nn.Linear(self.n_dense_units, self.n_dense_units), nn.ReLU(True), nn.Linear(self.n_dense_units, nrepr), nn.ReLU(True), nn.Linear(nrepr, self.n_dense_units), nn.ReLU(True)) self.pad2 = nn.ConstantPad1d((1, 2), 0) self.conv2 = nn.Conv1d(self.nef, self.nc, self.fs, stride=1, padding=0) for m in self.modules(): if isinstance(m, nn.Conv1d): nn.init.xavier_uniform_(m.weight) m.bias.data.zero_() if isinstance(m, nn.Linear): nn.init.xavier_uniform_(m.weight) m.bias.data.zero_()
def __init__(self, SEQ_LENGTH, TARGET_SEQ_LENGTH): super(_SGNShinSubNetPaddingSame, self).__init__() self.seq_length = SEQ_LENGTH self.target_seq_length = TARGET_SEQ_LENGTH self.nc = 1 # number of channels self.conv1 = nn.Sequential( nn.ConstantPad1d((4, 5), 0), nn.Conv1d(self.nc, 30, 10, stride=1, bias=True), nn.ReLU(True), nn.ConstantPad1d((3, 4), 0), nn.Conv1d(30, 30, 8, stride=1, bias=True), nn.ReLU(True), nn.ConstantPad1d((2, 3), 0), nn.Conv1d(30, 40, 6, stride=1, bias=True), nn.ReLU(True), nn.Conv1d(40, 50, 5, stride=1, padding=2, bias=True), nn.ReLU(True), nn.Conv1d(50, 50, 5, stride=1, padding=2, bias=True), nn.ReLU(True), nn.Conv1d(50, 50, 5, stride=1, padding=2, bias=True), nn.ReLU(True), ) self.n_dense_units = self.seq_length * 50 self.dense1 = nn.Linear(self.n_dense_units, 1024) self.act1 = nn.ReLU(True) self.dense2 = nn.Linear(1024, self.target_seq_length)
def __init__(self, dilation, in_channel, causal_flag=False): super(GLU, self).__init__() self.in_conv = nn.Sequential( nn.Conv1d(in_channel, 64, kernel_size=1), nn.BatchNorm1d(64)) if causal_flag is True: self.pad = nn.ConstantPad1d((int(dilation * 6), 0), value=0.) else: self.pad = nn.ConstantPad1d((int(dilation * 3), int(dilation * 3)), value=0.) self.left_conv = nn.Sequential( nn.ELU(), self.pad, nn.Conv1d(64, 64, kernel_size=7, dilation=dilation), nn.BatchNorm1d(64)) self.right_conv = nn.Sequential( nn.ELU(), self.pad, nn.Conv1d(64, 64, kernel_size=7, dilation=dilation), nn.BatchNorm1d(num_features=64), nn.Sigmoid()) self.out_conv = nn.Sequential( nn.Conv1d(64, 256, kernel_size=1), nn.BatchNorm1d(256)) self.out_elu = nn.ELU()
def __init__(self, input_shape, n_class, RF_size): super(FCN, self).__init__() hidden_layer_1 = 128 kernel_size = int(RF_size / 2) self.padding1 = nn.ConstantPad1d((int( (kernel_size - 1) / 2), int(kernel_size / 2)), 0) self.conv1 = torch.nn.Conv1d(in_channels=1, out_channels=hidden_layer_1, kernel_size=kernel_size) self.bn1 = nn.BatchNorm1d(num_features=hidden_layer_1) self.relu1 = nn.ReLU() kernel_size = int(RF_size * 5 / 16) hidden_layer_2 = hidden_layer_1 * 2 self.padding2 = nn.ConstantPad1d((int( (kernel_size - 1) / 2), int(kernel_size / 2)), 0) self.conv2 = torch.nn.Conv1d(in_channels=hidden_layer_1, out_channels=hidden_layer_2, kernel_size=kernel_size) self.bn2 = nn.BatchNorm1d(num_features=hidden_layer_2) self.relu2 = nn.ReLU() hidden_layer_3 = hidden_layer_1 kernel_size = int(RF_size * 3 / 16) self.padding3 = nn.ConstantPad1d((int( (kernel_size - 1) / 2), int(kernel_size / 2)), 0) self.conv3 = torch.nn.Conv1d(in_channels=hidden_layer_2, out_channels=hidden_layer_3, kernel_size=kernel_size) self.bn3 = nn.BatchNorm1d(num_features=hidden_layer_3) self.relu3 = nn.ReLU() self.averagepool = nn.AvgPool1d(kernel_size=input_shape) self.hidden = nn.Linear(hidden_layer_3, n_class)
def __init__(self, n_inputs: int, n_filters: int, kernel_size: int, bottleneck: int = None): super(_InceptionBlock, self).__init__() self.n_filters = n_filters self.bottleneck = None \ if bottleneck is None \ else nn.Conv1d(n_inputs, bottleneck, kernel_size=1) kernel_sizes = [kernel_size // (2 ** i) for i in range(3)] n_inputs = n_inputs if bottleneck is None else bottleneck # create 3 conv layers with different kernel sizes which are applied in parallel self.pad1 = nn.ConstantPad1d( padding=self._padding(kernel_sizes[0]), value=0) self.conv1 = nn.Conv1d(n_inputs, n_filters, kernel_sizes[0]) self.pad2 = nn.ConstantPad1d( padding=self._padding(kernel_sizes[1]), value=0) self.conv2 = nn.Conv1d(n_inputs, n_filters, kernel_sizes[1]) self.pad3 = nn.ConstantPad1d( padding=self._padding(kernel_sizes[2]), value=0) self.conv3 = nn.Conv1d(n_inputs, n_filters, kernel_sizes[2]) # create 1 maxpool and conv layer which are also applied in parallel self.maxpool = nn.MaxPool1d(kernel_size=3, stride=1, padding=1) self.convpool = nn.Conv1d(n_inputs, n_filters, 1) self.bn = nn.BatchNorm1d(4 * n_filters)
def __init__(self, condition_dim=512, in_channels=1, out_channels=1, kernel_size=3, dilation=1, norm='LN', causal=False, residual=True): super(ConditionDConv1d, self).__init__() self.residual = residual self.in_channels = in_channels self.lin1 = nn.Sequential( nn.Conv1d(condition_dim, self.in_channels, kernel_size=1), nn.Tanh(), ) self.lin2 = nn.Conv1d(condition_dim, self.in_channels, kernel_size=1) self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, dilation=dilation) if causal: self.pad = nn.ConstantPad1d([(kernel_size - 1) * dilation, 0], 0) else: self.pad = nn.ConstantPad1d([(kernel_size - 1) // 2 * dilation, (kernel_size - 1) // 2 * dilation], 0) if norm == 'LN': self.norm = LayerNorm1d(out_channels) elif norm == 'BN': self.norm = nn.BatchNorm1d(out_channels) self.nl = nn.PReLU()
def __init__(self, in_channels=1, out_channels=1, kernel_size=3, dilation=1, norm='LN', causal=False, residual=True): super(DConv1d, self).__init__() if causal: self.pad = nn.ConstantPad1d([(kernel_size - 1) * dilation, 0], 0) else: self.pad = nn.ConstantPad1d([(kernel_size - 1) // 2 * dilation, (kernel_size - 1) // 2 * dilation], 0) self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, dilation=dilation) self.residual = residual if norm == 'LN': self.norm = LayerNorm1d(out_channels) elif norm == 'BN': self.norm = nn.BatchNorm1d(out_channels) self.nl = nn.PReLU()
def __init__(self, mains_length, appliance_length): super(sgn_branch_network, self).__init__() self.mains_length = mains_length self.appliance_length = appliance_length self.conv = nn.Sequential( nn.ConstantPad1d((4, 5), 0), nn.Conv1d(1, 30, 10, stride = 1), nn.ReLU(True), nn.ConstantPad1d((3, 4), 0), nn.Conv1d(30, 30, 8, stride = 1), nn.ReLU(True), nn.ConstantPad1d((2, 3), 0), nn.Conv1d(30, 40, 6, stride = 1), nn.ReLU(True), nn.ConstantPad1d((2, 2), 0), nn.Conv1d(40, 50, 5, stride = 1), nn.ReLU(True), nn.ConstantPad1d((2, 2), 0), nn.Conv1d(50, 50, 5, stride = 1), nn.ReLU(True) ) self.dense = nn.Sequential( nn.Linear(50 * self.mains_length, 1024), nn.ReLU(True), nn.Linear(1024, self.appliance_length) )
def __init__(self, args): super(ThreeLayers, self).__init__() self.embedding = nn.Embedding.from_pretrained( load_pkl(args.data_dir + args.embedding_matrix_name), freeze=True) self.fact_convs = nn.ModuleList([ nn.Conv1d(args.embedding_dim, args.filters_num, args.kernel_size_1), nn.Conv1d(args.filters_num, args.filters_num, args.kernel_size_2) ]) self.article_convs = nn.ModuleList([ nn.Conv1d(args.embedding_dim, args.filters_num, args.kernel_size_1), nn.Conv1d(args.filters_num, args.filters_num, args.kernel_size_2), ]) self.conv_paddings = nn.ModuleList([ nn.ConstantPad1d( (args.kernel_size_1 // 2 - 1, args.kernel_size_1 // 2), 0.), nn.ConstantPad1d( (args.kernel_size_2 // 2 - 1, args.kernel_size_2 // 2), 0.) ]) self.ffs = nn.ModuleList( [nn.Linear(args.embedding_dim, args.linear_output)] + [ nn.Linear(args.filters_num, args.linear_output) for _ in range(2) ] + [nn.Linear(args.article_len * 3, args.linear_output)]) self.predict = nn.Linear(args.linear_output, 2)
def __init__(self, emb_dim): super().__init__() self.conv_unigram = nn.Sequential(nn.ConstantPad1d((0, 0), 0), nn.Conv1d(emb_dim, emb_dim, 1, 1), nn.Tanh()) self.conv_bigram = nn.Sequential(nn.ConstantPad1d((1, 0), 0), nn.Conv1d(emb_dim, emb_dim, 2, 1), nn.Tanh()) self.conv_trigram = nn.Sequential(nn.ConstantPad1d((1, 1), 0), nn.Conv1d(emb_dim, emb_dim, 3, 1), nn.Tanh()) # Max-Pool (kernel = 1x3) - subsample from n-gram representations of tokens) self.max_pool = nn.MaxPool2d(kernel_size=(1, 3))
def block(self, input, output): padding = [1, 1] p1 = nn.ConstantPad1d(padding, 0) c1 = nn.Conv1d(input, output, (3), stride=1, padding=0) p2 = nn.ConstantPad1d(padding, 0) b1 = nn.BatchNorm1d(output) c2 = nn.Conv1d(output, output, (3), stride=1, padding=0) b2 = nn.BatchNorm1d(output) return [c1, c2, b1, b2, p1, p2]
def __init__(self, encoder: EncoderBase, second_encoder: nn.Module, second_dim: int, decoder: RNNDecoderBase, generator): super().__init__(encoder, second_encoder, second_dim, decoder, generator) directions = 2 if self.decoder.bidirectional_encoder else 1 enc_output_size = self.encoder.rnn.hidden_size * directions self.enc_pad_layer = nn.ConstantPad1d((0, self.second_dim), 0) self.second_pad_layer = nn.ConstantPad1d((enc_output_size, 0), 0) self.merge_layer = nn.Linear(enc_output_size + self.second_dim, self.decoder.hidden_size, bias=True)
def __init__(self): super().__init__() self.enc_convs = nn.Sequential( nn.ConstantPad1d((2, 1), 0.), nn.Conv2d(1, 16, 4, stride=2, bias=False), nn.ReLU(inplace=True), nn.ConstantPad1d((2, 1), 0.), nn.Conv2d(16, 32, 4, stride=2, bias=False), nn.ReLU(inplace=True), nn.ConstantPad1d(1, 0.), nn.Conv2d(32, 64, 4, stride=2, bias=False), nn.ReLU(inplace=True), _Flatten()) self.mu_linear = nn.Sequential(nn.Linear(64 * 3, 32), nn.Linear(32, 2)) self.logvar_linear = nn.Linear(64 * 3, 2)
def __init__( self, input_dim, hidden_dim, output_dim, left_frames=1, left_dilation=1, right_frames=1, right_dilation=1, ): ''' input_dim as it's name .... hidden_dim means the dimension or channles num of the memory left means history right means future ''' super(DFSMN, self).__init__() self.left_frames = left_frames self.right_frames = right_frames self.in_conv = nn.Conv1d(input_dim, hidden_dim, kernel_size=1) #self.norm = nn.InstanceNorm1d(hidden_dim) #nn.init.normal_(self.in_conv.weight.data,std=0.05) if left_frames > 0: self.left_conv = nn.Sequential( #nn.ConstantPad1d([left_dilation*left_frames,-left_dilation],0), nn.ConstantPad1d([left_dilation * left_frames, 0], 0), nn.Conv1d(hidden_dim, hidden_dim, kernel_size=left_frames + 1, dilation=left_dilation, bias=False, groups=hidden_dim)) # nn.init.normal_(self.left_conv[1].weight.data,std=0.05) if right_frames > 0: self.right_conv = nn.Sequential( nn.ConstantPad1d( [-right_dilation, right_frames * right_dilation], 0), nn.Conv1d(hidden_dim, hidden_dim, kernel_size=right_frames, dilation=right_dilation, bias=False, groups=hidden_dim)) # nn.init.normal_(self.right_conv[1].weight.data,std=0.05) self.out_conv = nn.Conv1d(hidden_dim, output_dim, kernel_size=1) #nn.init.normal_(self.out_conv.weight.data,std=0.05) self.weight = nn.Parameter(torch.Tensor([0]), requires_grad=True)
def build(self): """ Build model structure. ArcII has the desirable property of letting two sentences meet before their own high-level representations mature. """ self.embedding = self._make_default_embedding_layer() # Phrase level representations self.conv1d_left = nn.Sequential( nn.ConstantPad1d((0, self._params['kernel_1d_size'] - 1), 0), nn.Conv1d(in_channels=self._params['embedding_output_dim'], out_channels=self._params['kernel_1d_count'], kernel_size=self._params['kernel_1d_size'])) self.conv1d_right = nn.Sequential( nn.ConstantPad1d((0, self._params['kernel_1d_size'] - 1), 0), nn.Conv1d(in_channels=self._params['embedding_output_dim'], out_channels=self._params['kernel_1d_count'], kernel_size=self._params['kernel_1d_size'])) # Interaction self.matching = Matching(matching_type='plus') # Build conv activation = parse_activation(self._params['activation']) in_channel_2d = [ self._params['kernel_1d_count'], *self._params['kernel_2d_count'][:-1] ] conv2d = [ self._make_conv_pool_block(ic, oc, ks, activation, ps) for ic, oc, ks, ps in zip(in_channel_2d, self._params['kernel_2d_count'], self._params['kernel_2d_size'], self._params['pool_2d_size']) ] self.conv2d = nn.Sequential(*conv2d) self.dropout = nn.Dropout(p=self._params['dropout_rate']) left_length = self._params['left_length'] right_length = self._params['right_length'] for ps in self._params['pool_2d_size']: left_length = left_length // ps[0] for ps in self._params['pool_2d_size']: right_length = right_length // ps[1] # Build output self.out = self._make_output_layer(left_length * right_length * self._params['kernel_2d_count'][-1])
def __init__(self, length, in_channels, out_channels, residual_channels, block_channels, num_blocks, feedforward_channels): """ Arguments: length: int. Length of input sequences. in_channels: int. Number of input channels in input. out_channels: int. Number of output channels. residual_channels: int. Number of channels to transform to at the start. block_channels: int. Number of channels in dilated convolutions. num_blocks: int. Number of dilated convlutions, with skip connections, to take. feedforward_channels: int. Size of hidden layer in final feedforward network. Note that the WaveNet paper doesn't use normalization; the TCN paper uses only a funny kind of normalization for ReLUs but also states that the choice of doing so didn't affect performance. So we're not using normalization either. """ super(TCN, self).__init__() self.length = length self.in_channels = in_channels self.out_channels = out_channels self.residual_channels = residual_channels self.block_channels = block_channels self.num_blocks = num_blocks self.feedforward_channels = feedforward_channels self.first_padding = nn.ConstantPad1d((3, 0), 0) self.first_conv = nn.Conv1d(in_channels=in_channels, out_channels=residual_channels, kernel_size=4) # arbitrarily self.blocks = nn.ModuleList() dilation = 1 for _ in range(num_blocks): block = nn.Sequential( nn.ConstantPad1d((dilation, 0), 0), nn.Conv1d(in_channels=residual_channels, out_channels=block_channels, kernel_size=2, dilation=dilation), nn.ReLU(), nn.Conv1d(in_channels=block_channels, out_channels=residual_channels, kernel_size=1)) self.blocks.append(block) dilation *= 2 self.final_affine_one = nn.Linear(length * residual_channels, feedforward_channels) self.final_affine_two = nn.Linear(feedforward_channels, out_channels)
def __init__(self, channels, d, k=3, dropout=None, casual=False, use_bias=False, final_layer=False): super(TemOpResBlock, self).__init__() self.channel = channels # input features self.d = d # dilation size self.k = k # "kernel size" self.drop = nn.Dropout(dropout) self.final_layer = final_layer ub = use_bias self.relu = nn.ReLU(inplace=True) self.conv1 = nn.Conv1d(channels, channels // 2, kernel_size=1, bias=ub) self.bn1 = nn.BatchNorm1d(channels // 2, momentum=0.1) if casual: padding = (_same_pad(k, d), 0) else: p = _same_pad(k, d) if p % 2 == 1: padding = [p // 2 + 1, p // 2] else: padding = (p // 2, p // 2) self.pad = nn.ConstantPad1d(padding, 0.) if final_layer: self.dconv1 = nn.Conv1d(channels // 2, channels // 2, kernel_size=k, stride=1, bias=ub) else: self.dconv1 = nn.Conv1d(channels // 2, channels // 2, kernel_size=k, stride=2, bias=ub) self.op_pad_1 = nn.ConstantPad1d((1, 1), 0.) self.bn2 = nn.BatchNorm1d(channels // 2, momentum=0.1) self.conv2 = nn.Conv1d(channels // 2, channels, kernel_size=1, bias=ub) self.bn3 = nn.BatchNorm1d(channels, momentum=0.1)
def __init__(self, model_config, device): super(ConvRNN, self).__init__() self.config = model_config self.device = device self.ar = False # receptive_field rec_field = self.config.dil_conv.kernel_size**self.config.dil_conv.n_convs self.padding = nn.ConstantPad1d(int((rec_field - 1) / 2), 0) # DilConv self.dil_conv = DilConv(self.config.dil_conv) # Rnn self.rnn = RNN(self.config.rnn, self.config.dil_conv, device=self.device) self.h_size = self.rnn.h_size #, self.rnn2.h_size] # out_dim self.conv = nn.Conv1d( self.config.rnn.h_units * (self.config.rnn.bidirectional + 1), self.config.rnn.out_dim, 1) # AR model if self.config.rnn.model_arch == 'ar': self.rnn.add_conv(self.conv) self.ar = True elif self.config.rnn.model_arch == "rnn": self.conv2 = nn.Conv1d(self.rnn.in_dim + self.config.rnn.out_dim, self.config.rnn.out_dim, 1) else: raise ValueError('model arch %s is not supported.' % self.config.rnn.model_arch)
def decode(self, data_loader): self.model.eval() with torch.no_grad(): for i, (data) in enumerate(data_loader): # predict phones using AM xs, frame_lens, filenames = data if self.use_cuda: xs = xs.cuda(non_blocking=True) ys_hat = self.model(xs) ys_hat = ys_hat.unsqueeze(dim=0).transpose(1, 2) pos = torch.cat((torch.zeros( (1, ), dtype=torch.long), torch.cumsum(frame_lens, dim=0))) ys_hats = [ ys_hat.narrow(2, p, l).clone() for p, l in zip(pos[:-1], frame_lens) ] max_len = torch.max(frame_lens) ys_hats = [ nn.ConstantPad1d((0, max_len - yh.size(2)), 0)(yh) for yh in ys_hats ] ys_hat = torch.cat(ys_hats).transpose(1, 2) # latgen decoding if self.use_cuda: ys_hat = ys_hat.cpu() words, alignment, w_sizes, a_sizes = self.decoder( ys_hat, frame_lens) # print results ys_hat = [y[:s] for y, s in zip(ys_hat, frame_lens)] words = [w[:s] for w, s in zip(words, w_sizes)] for results in zip(filenames, ys_hat, words): self.print_result(*results)
def __init__(self, kernels: int, gate_channels: int, residual_channels: int) -> None: """ :param kernels: Размер сверток в signal и gate сверточных слоях. :param gate_channels: Количество каналов в signal и gate сверточных слоях. :param residual_channels: Количество каналов на входе и по обходному пути. """ super().__init__() self.signal_gate_pad = nn.ConstantPad1d(padding=(kernels - 1, 0), value=0.0) self.signal_conv = nn.Conv1d( in_channels=residual_channels, out_channels=gate_channels, kernel_size=kernels, stride=1, ) self.gate_conv = nn.Conv1d( in_channels=residual_channels, out_channels=gate_channels, kernel_size=kernels, stride=1, ) self.output_conv = nn.Conv1d(in_channels=gate_channels, out_channels=residual_channels, kernel_size=1)
def __init__(self, in_features, K=16, conv_bank_features=128, conv_projections=[128, 128], highway_features=128, gru_features=128, num_highways=4): super(CBHG, self).__init__() self.in_features = in_features self.conv_bank_features = conv_bank_features self.highway_features = highway_features self.gru_features = gru_features self.conv_projections = conv_projections self.relu = nn.ReLU() # list of conv1d bank with filter size k=1...K # TODO: try dilational layers instead self.conv1d_banks = nn.ModuleList([ BatchNormConv1d(in_features, conv_bank_features, kernel_size=k, stride=1, padding=[(k - 1) // 2, k // 2], activation=self.relu) for k in range(1, K + 1) ]) # max pooling of conv bank, with padding # TODO: try average pooling OR larger kernel size self.max_pool1d = nn.Sequential( nn.ConstantPad1d([0, 1], value=0), nn.MaxPool1d(kernel_size=2, stride=1, padding=0)) out_features = [K * conv_bank_features] + conv_projections[:-1] activations = [self.relu] * (len(conv_projections) - 1) activations += [None] # setup conv1d projection layers layer_set = [] for (in_size, out_size, ac) in zip(out_features, conv_projections, activations): layer = BatchNormConv1d(in_size, out_size, kernel_size=3, stride=1, padding=[1, 1], activation=ac) layer_set.append(layer) self.conv1d_projections = nn.ModuleList(layer_set) # setup Highway layers if self.highway_features != conv_projections[-1]: self.pre_highway = nn.Linear(conv_projections[-1], highway_features, bias=False) self.highways = nn.ModuleList([ Highway(highway_features, highway_features) for _ in range(num_highways) ]) # bi-directional GPU layer self.gru = nn.GRU(gru_features, gru_features, 1, batch_first=True, bidirectional=True)
def forward(self, tokens: torch.Tensor, mask: torch.Tensor): # pylint: disable=arguments-differ if mask is not None: tokens = tokens * mask.unsqueeze(-1).float() # Our input is expected to have shape `(batch_size, num_tokens, embedding_dim)`. The # convolution layers expect input of shape `(batch_size, in_channels, sequence_length)`, # where the conv layer `in_channels` is our `embedding_dim`. We thus need to transpose the # tensor first. tokens = torch.transpose(tokens, 1, 2) # Each convolution layer returns output of size `(batch_size, num_filters, pool_length)`, # where `pool_length = num_tokens - ngram_size + 1`. We then do an activation function, # then do max pooling over each filter for the whole input sequence. Because our max # pooling is simple, we just use `torch.max`. The resultant tensor of has shape # `(batch_size, num_conv_layers * num_filters)`, which then gets projected using the # projection layer, if requested. filter_outputs = [] for i in range(len(self._convolution_layers)): ngram_filter_size = self._ngram_filter_sizes[i] padder = nn.ConstantPad1d((0, ngram_filter_size - 1), 0) tokens_input = padder(tokens) convolution_layer = getattr(self, 'conv_layer_{}'.format(i)) filter_outputs.append( self._activation(convolution_layer(tokens_input))) filter_outputs_transformed = [ torch.transpose(e, 1, 2) for e in filter_outputs ] result = torch.cat(filter_outputs_transformed, dim=-1) return result
def __init__(self, embedding, params): super(CNN_Text_Attn, self).__init__() self.args = params n_words = embedding.shape[0] emb_dim = embedding.shape[1] filters = params.filters if (params.kernels <= 3): Ks = [i for i in range(1, params.kernels + 1)] else: Ks = [i for i in range(1, params.kernels + 1, 2)] self.embed = nn.Embedding(n_words, emb_dim) self.embed.weight = nn.Parameter(torch.from_numpy(embedding), requires_grad=False) self.convs = nn.ModuleList( [nn.Conv1d(emb_dim, filters, K) for K in Ks]) self.U = nn.Linear(filters, 1, bias=False) self.fc = nn.Linear(filters, 1) self.dropout = nn.Dropout(p=self.args.dropout) self.embed_dropout = nn.Dropout(p=self.args.embed_dropout) self.padding = nn.ModuleList( [nn.ConstantPad1d((0, K - 1), 0) for K in Ks])
def __init__(self, device, hop_len=256, upsample_kernel=512, n_mels=80, caus_ks=256, r=120, s=240, a=256, n_blocks=16): super(WaveNet, self).__init__() self.hop_len = hop_len self.a = a self.upsample = nn.ConvTranspose1d(n_mels, n_mels, kernel_size=upsample_kernel, stride=hop_len, padding=upsample_kernel // 2) self.left_pad = nn.ConstantPad1d((caus_ks - 1, 0), 0) self.causal = nn.Conv1d(1, r, kernel_size=caus_ks) dilations = [2**i for i in range(8)] dilations.extend(dilations) self.blocks = nn.ModuleList( [Block(n_mels=n_mels, dilation=i, r=r, s=s) for i in dilations]) self.relu1 = nn.ReLU(True) self.out = nn.Conv1d(s, a, kernel_size=1) self.relu2 = nn.ReLU(True) self.end = nn.Conv1d(a, a, kernel_size=1) self.device = device
def unit_test(self, data, target_test=False): xs, ys, frame_lens, label_lens, filenames, texts = data if not target_test: if self.use_cuda: xs = xs.cuda(non_blocking=True) ys_hat = self.model(xs) if self.fp16: ys_hat = ys_hat.float() ys_hat = ys_hat.unsqueeze(dim=0).transpose(1, 2) pos = torch.cat((torch.zeros( (1, ), dtype=torch.long), torch.cumsum(frame_lens, dim=0))) ys_hats = [ ys_hat.narrow(2, p, l).clone() for p, l in zip(pos[:-1], frame_lens) ] max_len = torch.max(frame_lens) ys_hats = [ nn.ConstantPad1d((0, max_len - yh.size(2)), 0)(yh) for yh in ys_hats ] ys_hat = torch.cat(ys_hats).transpose(1, 2) else: ys_hat = self.target_to_loglikes(ys, label_lens) # latgen decoding if self.use_cuda: ys_hat = ys_hat.cpu() words, alignment, w_sizes, a_sizes = self.decoder(ys_hat, frame_lens) w2i = self.labeler.word2idx num_words = self.labeler.get_num_words() words.masked_fill_(words.ge(num_words), w2i('<unk>')) words.masked_fill_(words.lt(0), w2i('<unk>')) hyps = [w[:s] for w, s in zip(words, w_sizes)] # convert target texts to word indices refs = [[w2i(w.strip()) for w in t.strip().split()] for t in texts] return hyps, refs
def __init__(self, in_channels=5, out_channels=5, kernel_size=3, dilation=1, activation="relu"): super().__init__() self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size self.dilation = dilation self.activation = activation self.padding = (self.kernel_size + (self.kernel_size - 1) * (self.dilation - 1) - 1) // 2 self.layers = nn.Sequential( nn.ConstantPad1d(padding=(self.padding, self.padding), value=0), nn.Conv1d( in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=self.kernel_size, dilation=self.dilation, bias=True, ), nn.ReLU(), nn.BatchNorm1d(self.out_channels), ) nn.init.xavier_uniform_(self.layers[1].weight) nn.init.zeros_(self.layers[1].bias)