def submissions_separation(input_tensor, separator_int, padding_int=1): rel_idx = [ x[0] for x in (input_tensor[0] == separator_int).nonzero().tolist() ] rel_idx_len = len(rel_idx) # special case if the input tensor includes a single case if rel_idx_len == 1: return torch.narrow(input_tensor, 1, 0, rel_idx[0] - 1) sent_length = [j - i - 1 for i, j in zip(rel_idx[:-1], rel_idx[1:])] new_tensors_list = [] max_sent_len = max(sent_length) # special case for the first appearance new_tensor = torch.narrow(input_tensor, 1, 0, rel_idx[0]) padding_obj = ConstantPad1d((0, max_sent_len - len(new_tensor[0])), padding_int) new_tensor = padding_obj(new_tensor) new_tensors_list.append(new_tensor) if rel_idx_len > 1: for loop_num, (i, sl) in enumerate( zip(range(rel_idx_len - 1), sent_length)): new_tensor = torch.narrow(input_tensor, 1, rel_idx[i] + 1, sl) padding_obj = ConstantPad1d((0, max_sent_len - len(new_tensor[0])), padding_int) new_tensor = padding_obj(new_tensor) new_tensors_list.append(new_tensor) # special case for the last case - not needed since we should have an ending sign @ the end of the last sentence return torch.cat(new_tensors_list)
def __init__(self, dilation, in_filters: int, out_filters: int): super().__init__() self.dilatation = dilation self.in_filters = in_filters self.out_filters = out_filters self.causal_conv1 = Sequential( ConstantPad1d((dilation, 0), 0), Conv1d(in_filters, out_filters, kernel_size=2, dilation=dilation) ) self.causal_conv2 = Sequential( ConstantPad1d((dilation, 0), 0), Conv1d(in_filters, out_filters, kernel_size=2, dilation=dilation) ) self.tanh = Tanh() self.sigmoid = Sigmoid()
def __init__(self, num_input, num_embeddings, hp): super().__init__() # Parameters ###################################################################################################################### self.num_months_hx = hp.num_months_hx - 1 self.num_rnn_layers = hp.num_rnn_layers self.embedding_dim = hp.embedding_dim # Embedding layers ################################################################################################################ self.embed_codes = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=hp.embedding_dim, padding_idx=0) self.embed_diagt = nn.Embedding(num_embeddings=5, embedding_dim=hp.embedding_dim, padding_idx=0) # RNN ############################################################################################################################# self.embedding_dim = self.embedding_dim + 1 self.pad_fw = ConstantPad1d((1, 0), 0.) self.rnn_fw = GRU(input_size=self.embedding_dim, hidden_size=self.embedding_dim, num_layers=self.num_rnn_layers, batch_first=True, dropout=hp.dropout, bidirectional=False) self.attention_fw = Attention(embedding_dim=self.embedding_dim) # Fully connected layers ########################################################################################################## fc_size = num_input + self.embedding_dim layers = [] layers.append(nn.Linear(fc_size, fc_size)) layers.append(nn.ELU()) layers.append(nn.Linear(fc_size, 1)) self.mlp = nn.Sequential(*layers)
def __init__(self, input_channels, output_channels, kernel, dropout=0.0, activation='identity', dilation=1, groups=1, batch_norm=True): super(ConvBlock, self).__init__() self._groups = groups p = (kernel - 1) * dilation // 2 padding = p if kernel % 2 != 0 else (p, p + 1) layers = [ ConstantPad1d(padding, 0.0), Conv1d(input_channels, output_channels, kernel, padding=0, dilation=dilation, groups=groups, bias=(not batch_norm)) ] if batch_norm: layers += [BatchNorm1d(output_channels)] layers += [get_activation(activation)] layers += [Dropout(dropout)] self._block = Sequential(*layers)
def __init__(self, embedding_dim, bottleneck_dim, input_channels, output_channels, kernel, dropout=0.0, activation='identity', dilation=1, groups=1, batch_norm=True): super(ConvBlockGenerated, self).__init__() self._groups = groups p = (kernel - 1) * dilation // 2 padding = p if kernel % 2 != 0 else (p, p + 1) self._padding = ConstantPad1d(padding, 0.0) self._convolution = Conv1dGenerated(embedding_dim, bottleneck_dim, input_channels, output_channels, kernel, padding=0, dilation=dilation, groups=groups, bias=(not batch_norm)) self._regularizer = BatchNorm1dGenerated( embedding_dim, bottleneck_dim, output_channels, groups=groups) if batch_norm else None self._activation = Sequential(get_activation(activation), Dropout(dropout))
def coll(batch): xs, ys = [], [] for text, label in batch: ys.append(torch.tensor([label], dtype=torch.float32)) x = torch.tensor([vocab[token] for token in tokenizer(text)], dtype=torch.long) if pad_to_length != None: PAD_IDX = vocab["<pad>"] pad = ConstantPad1d((0, pad_to_length - len(x)), PAD_IDX) x = torch.tensor(pad(x), dtype=torch.long) xs.append(x) xs = torch.stack(xs) ys = torch.stack(ys) return xs.to(device), ys.to(device)
def const_pad_tensors_dim1(tensors, max_len=None, padding_value=0): """ tensors: list of tensor in shape: [L, W] or tensor with shape: [B, L, W] padding and cat to [B, MAX_L, W] """ max_len = max([t.shape[0] for t in tensors]) if max_len is None else max_len for i, t in enumerate(tensors): pad_len = max_len - t.shape[0] t = t.unsqueeze(dim=0) t = t.transpose(1, 2) pad = ConstantPad1d((0, pad_len), padding_value) tensors[i] = pad(t).transpose(1, 2) tensors = torch.cat(tensors, dim=0) return tensors
def pad1D(x, max_seq_count): ''' pad first dimension (sequence count) of documents ''' # current sentence count seq_count = x.shape[0] # append zeros, if sequence count too low if seq_count < max_seq_count: padding_back = max_seq_count - seq_count pad = ConstantPad1d((0, padding_back), 0) x = pad(x) # truncate document elif seq_count > max_seq_count: x = x[:max_seq_count] return x
def __process_partition(episodes, p, return_ys=True): res = zip(*[( ep[p][0], ep[p][1], ep[p][0].shape[1], len(ep[p][1]), ) for ep in episodes]) xs, ys, dims_in, lens = res device = xs[0].device if min(dims_in) != max(dims_in): m = max(dims_in) xs = [ConstantPad1d((0, m - x.shape[1]), 0)(x) for x in xs] n_max = max(lens) mask = torch.FloatTensor([([1] * len(x)) + ([0] * (n_max - len(x))) for x in xs]).to(device) xs = pad_sequence(xs, batch_first=True) ys = pad_sequence(ys, batch_first=True) lens = torch.LongTensor(list(lens)).to(device) if return_ys: res = (xs, ys, lens, mask) else: res = (xs, lens, mask) return res
def __init__(self, input_dim, output_dim, bank_size, bank_channels, projection_channels, projection_kernel_size, highway_dim, gru_dim, dropout): super(PostnetCBHG, self).__init__() assert gru_dim % 2 == 0, ( 'Bidirectional GRU dimension must be divisible by 2.') self._bank = ModuleList([ ConvBlock(input_dim, bank_channels, k, dropout, 'relu') for k in range(1, bank_size + 1) ]) self._pool_and_project = Sequential( ConstantPad1d((0, 1), 0.0), MaxPool1d(2, stride=1), ConvBlock(bank_channels * bank_size, projection_channels, projection_kernel_size, dropout, 'relu'), ConvBlock(projection_channels, input_dim, projection_kernel_size, dropout, 'identity')) highways = [HighwayLayer(highway_dim) for _ in range(4)] self._highway_layers = Sequential(Linear(input_dim, highway_dim), ReLU(), *highways) self._gru = GRU(highway_dim, gru_dim // 2, batch_first=True, bidirectional=True) self._output_layer = Linear(gru_dim, output_dim)
def data_pre(): if not os.path.isfile('./tensor_dict.pt'): print('data need process') rate = 48000 names = [file.split('/')[1][0:5] for file in sorted(glob('audio/*'))] for name in tqdm(names): waveform_, sample_rate = torchaudio.load('audio/'+name+'.mp3') #resample waveform to rate if sample_rate != rate: waveform_ = torchaudio.compliance.kaldi.resample_waveform( waveform=waveform_, orig_freq=sample_rate, new_freq=rate) torchaudio.save('audio/'+name+'.mp3',waveform_,sample_rate=48000) rate = 48000 names = [file.split('/')[1][0:5] for file in sorted(glob('audio/*'))] window_size = 6 #sec shift_size = 3 #sec pad_len = window_size*rate typs = ['intro','verse','bridge','outro','break','Refrain','silece','other'] dic = {k:v for v,k in enumerate(typs)} xs = torch.LongTensor([]) ys = torch.LongTensor([]) g = 0 group = np.array([]) for name in tqdm(names): waveform_, sample_rate = torchaudio.load('audio/'+name+'.mp3') #pading zero in sizr window_size/2 on start & end pad_len = (window_size/2)*rate pad = ConstantPad1d(int(pad_len),0) waveform = pad(waveform_[0]) #make a sliding window x = waveform.unfold(dimension = 0, size = window_size*rate, step =shift_size*rate).unsqueeze(1) #get labels f = open('Labels/'+name+'.txt') txts = f.readlines() c = [] i = 0 for txt in txts: tmp = txt.split() typs = dic.get(tmp[2].replace('\n','')) start = float(tmp[0]) end = float(tmp[1]) while start <= i < end: #print(start,end,i,typs) c.append(typs) i += shift_size y = torch.LongTensor(c) f.close() #make label and musiz len is same if x.shape[0] > y.shape[0]: x = x[:y.shape[0]] if y.shape[0] > x.shape[0]: y = y[:x.shape[0]] #make a group for GroupShuffleSplit group = np.append(group,np.repeat(g,y.shape[0])) g += 1 #extand xs = torch.cat((xs,x)) ys = torch.cat((ys,y)) #save dic = {'xs':xs,'ys':ys,'group':group} torch.save(dic, 'tensor_dict.pt') print('data do not need process') dic = torch.load('tensor_dict.pt') xs,ys,group = [dic[c] for c in dic] gss = GroupShuffleSplit(n_splits=2, train_size=.8, random_state=42) for train_index, test_index in gss.split(xs, ys, group): X_train, X_test = xs[train_index], xs[test_index] y_train, y_test = ys[train_index], ys[test_index] dataloader_X_train = DataLoader(X_train,batch_size=64,shuffle=False, num_workers=0,drop_last=True) dataloader_y_train= DataLoader(y_train,batch_size=64,shuffle=False, num_workers=0,drop_last=True) dataloader_X_test = DataLoader(X_test,batch_size=64,shuffle=False, num_workers=0,drop_last=True) dataloader_y_test= DataLoader(y_test,batch_size=64,shuffle=False, num_workers=0,drop_last=True) return dataloader_X_train,dataloader_y_train,dataloader_X_test,dataloader_y_test
def __init__(self, num_input, num_embeddings, hp): super().__init__() # Parameters ###################################################################################################################### self.nonprop_hazards = hp.nonprop_hazards self.add_diagt = hp.add_diagt self.add_month = hp.add_month self.num_months_hx = hp.num_months_hx - 1 self.rnn_type = hp.rnn_type self.num_rnn_layers = hp.num_rnn_layers self.embedding_dim = hp.embedding_dim self.summarize = hp.summarize # Embedding layers ################################################################################################################ self.embed_codes = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=hp.embedding_dim, padding_idx=0) if self.add_month == 'embedding': self.embed_month = nn.Embedding(num_embeddings=hp.num_months_hx, embedding_dim=hp.embedding_dim, padding_idx=0) if self.add_diagt: self.embed_diagt = nn.Embedding(num_embeddings=5, embedding_dim=hp.embedding_dim, padding_idx=0) # RNN ############################################################################################################################# if self.add_month == 'concat': self.embedding_dim = self.embedding_dim + 1 self.pad_fw = ConstantPad1d((1, 0), 0.) self.pad_bw = ConstantPad1d((0, 1), 0.) if self.rnn_type == 'LSTM': self.rnn_fw = LSTM(input_size=self.embedding_dim, hidden_size=self.embedding_dim, num_layers=self.num_rnn_layers, batch_first=True, dropout=hp.dropout, bidirectional=False) self.rnn_bw = LSTM(input_size=self.embedding_dim, hidden_size=self.embedding_dim, num_layers=self.num_rnn_layers, batch_first=True, dropout=hp.dropout, bidirectional=False) else: self.rnn_fw = GRU(input_size=self.embedding_dim, hidden_size=self.embedding_dim, num_layers=self.num_rnn_layers, batch_first=True, dropout=hp.dropout, bidirectional=False) self.rnn_bw = GRU(input_size=self.embedding_dim, hidden_size=self.embedding_dim, num_layers=self.num_rnn_layers, batch_first=True, dropout=hp.dropout, bidirectional=False) if self.summarize == 'output_attention': self.attention_fw = Attention(embedding_dim=self.embedding_dim) self.attention_bw = Attention(embedding_dim=self.embedding_dim) # Fully connected layers ########################################################################################################## fc_size = num_input + 2 * self.embedding_dim layers = [] for i in range(hp.num_mlp_layers): layers.append(nn.Linear(fc_size, fc_size)) layers.append(nn.ELU()) layers.append(nn.Linear(fc_size, 1)) self.mlp = nn.Sequential(*layers)
def speech_collate(batch, pad_val=0.0): r"""Puts each data field into a tensor with outer dimension batch size""" # split features and keys utt_keys = [] inpt_batch = [] target_batch = [] speaker_ints = [] for b in batch: # append values utt_keys.append(b["utt_key"]) inpt_batch.append(b["inpt_feat"]) if "target_feat" in b: target_batch.append(b["target_feat"]) speaker_ints.append(b["speaker_int"]) # max seq length seq_len = [b.size(0) for b in inpt_batch] max_seq = max(seq_len) # pad to max length inpt_batch = [ ConstantPad1d((0, int(max_seq - b.size(0))), value=pad_val)(b.transpose(0, 1)) for b in inpt_batch ] # sort seq & get sorted indices indices = torch.argsort(torch.tensor(seq_len), descending=True) seq_len.sort(reverse=True) # sort batch (descending order) for torch.rnn compatibility inpt_batch = [inpt_batch[i] for i in indices] inpt_batch = torch.stack(inpt_batch, dim=0) # (B, f, T) -> (B, T, f) inpt_batch = inpt_batch.permute(0, 2, 1) # rearrange speaker ints and utt_keys to match batches speaker_ints = torch.tensor([speaker_ints[i] for i in indices]) utt_keys = [utt_keys[i] for i in indices] # Batch Dict batch_dict = { "utt_keys": utt_keys, "seq_len": seq_len, "input_batch": inpt_batch, "speaker_ints": speaker_ints } if "target_feat" in batch[0]: target_batch = [ ConstantPad1d((0, int(max_seq - b.size(0))), value=pad_val)(b.transpose(0, 1)) for b in target_batch ] target_batch = [target_batch[i] for i in indices] target_batch = torch.stack(target_batch, dim=0) # (B, f, T) -> (B, T, f) batch_dict["target_batch"] = target_batch.permute(0, 2, 1) return batch_dict
def wav_to_padded_mspec_flat_tensor(wav, length): assert(length <= max_len) p_len = max_len - length padding = ConstantPad1d((0, p_len), 0) return torch.Tensor(get_mspec(padding(wav).flatten().data.numpy())).view(1, -1)