Example #1
0
def submissions_separation(input_tensor, separator_int, padding_int=1):
    rel_idx = [
        x[0] for x in (input_tensor[0] == separator_int).nonzero().tolist()
    ]
    rel_idx_len = len(rel_idx)
    # special case if the input tensor includes a single case
    if rel_idx_len == 1:
        return torch.narrow(input_tensor, 1, 0, rel_idx[0] - 1)
    sent_length = [j - i - 1 for i, j in zip(rel_idx[:-1], rel_idx[1:])]
    new_tensors_list = []
    max_sent_len = max(sent_length)
    # special case for the first appearance
    new_tensor = torch.narrow(input_tensor, 1, 0, rel_idx[0])
    padding_obj = ConstantPad1d((0, max_sent_len - len(new_tensor[0])),
                                padding_int)
    new_tensor = padding_obj(new_tensor)
    new_tensors_list.append(new_tensor)
    if rel_idx_len > 1:
        for loop_num, (i, sl) in enumerate(
                zip(range(rel_idx_len - 1), sent_length)):
            new_tensor = torch.narrow(input_tensor, 1, rel_idx[i] + 1, sl)
            padding_obj = ConstantPad1d((0, max_sent_len - len(new_tensor[0])),
                                        padding_int)
            new_tensor = padding_obj(new_tensor)
            new_tensors_list.append(new_tensor)
        # special case for the last case - not needed since we should have an ending sign @ the end of the last sentence
    return torch.cat(new_tensors_list)
Example #2
0
 def __init__(self, dilation, in_filters: int, out_filters: int):
     super().__init__()
     self.dilatation = dilation
     self.in_filters = in_filters
     self.out_filters = out_filters
     self.causal_conv1 = Sequential(
         ConstantPad1d((dilation, 0), 0),
         Conv1d(in_filters, out_filters, kernel_size=2,
                                dilation=dilation)
     )
     self.causal_conv2 = Sequential(
         ConstantPad1d((dilation, 0), 0),
         Conv1d(in_filters, out_filters, kernel_size=2, dilation=dilation)
     )        
     self.tanh = Tanh()
     self.sigmoid = Sigmoid()
Example #3
0
 def __init__(self, num_input, num_embeddings, hp):
     super().__init__()
     # Parameters ######################################################################################################################
     self.num_months_hx = hp.num_months_hx - 1
     self.num_rnn_layers = hp.num_rnn_layers
     self.embedding_dim = hp.embedding_dim
     # Embedding layers ################################################################################################################
     self.embed_codes = nn.Embedding(num_embeddings=num_embeddings,
                                     embedding_dim=hp.embedding_dim,
                                     padding_idx=0)
     self.embed_diagt = nn.Embedding(num_embeddings=5,
                                     embedding_dim=hp.embedding_dim,
                                     padding_idx=0)
     # RNN #############################################################################################################################
     self.embedding_dim = self.embedding_dim + 1
     self.pad_fw = ConstantPad1d((1, 0), 0.)
     self.rnn_fw = GRU(input_size=self.embedding_dim,
                       hidden_size=self.embedding_dim,
                       num_layers=self.num_rnn_layers,
                       batch_first=True,
                       dropout=hp.dropout,
                       bidirectional=False)
     self.attention_fw = Attention(embedding_dim=self.embedding_dim)
     # Fully connected layers ##########################################################################################################
     fc_size = num_input + self.embedding_dim
     layers = []
     layers.append(nn.Linear(fc_size, fc_size))
     layers.append(nn.ELU())
     layers.append(nn.Linear(fc_size, 1))
     self.mlp = nn.Sequential(*layers)
Example #4
0
    def __init__(self,
                 input_channels,
                 output_channels,
                 kernel,
                 dropout=0.0,
                 activation='identity',
                 dilation=1,
                 groups=1,
                 batch_norm=True):
        super(ConvBlock, self).__init__()

        self._groups = groups

        p = (kernel - 1) * dilation // 2
        padding = p if kernel % 2 != 0 else (p, p + 1)
        layers = [
            ConstantPad1d(padding, 0.0),
            Conv1d(input_channels,
                   output_channels,
                   kernel,
                   padding=0,
                   dilation=dilation,
                   groups=groups,
                   bias=(not batch_norm))
        ]

        if batch_norm:
            layers += [BatchNorm1d(output_channels)]

        layers += [get_activation(activation)]
        layers += [Dropout(dropout)]

        self._block = Sequential(*layers)
Example #5
0
    def __init__(self,
                 embedding_dim,
                 bottleneck_dim,
                 input_channels,
                 output_channels,
                 kernel,
                 dropout=0.0,
                 activation='identity',
                 dilation=1,
                 groups=1,
                 batch_norm=True):
        super(ConvBlockGenerated, self).__init__()

        self._groups = groups

        p = (kernel - 1) * dilation // 2
        padding = p if kernel % 2 != 0 else (p, p + 1)

        self._padding = ConstantPad1d(padding, 0.0)
        self._convolution = Conv1dGenerated(embedding_dim,
                                            bottleneck_dim,
                                            input_channels,
                                            output_channels,
                                            kernel,
                                            padding=0,
                                            dilation=dilation,
                                            groups=groups,
                                            bias=(not batch_norm))
        self._regularizer = BatchNorm1dGenerated(
            embedding_dim, bottleneck_dim, output_channels,
            groups=groups) if batch_norm else None
        self._activation = Sequential(get_activation(activation),
                                      Dropout(dropout))
Example #6
0
    def coll(batch):
        xs, ys = [], []

        for text, label in batch:
            ys.append(torch.tensor([label], dtype=torch.float32))
            x = torch.tensor([vocab[token] for token in tokenizer(text)],
                             dtype=torch.long)
            if pad_to_length != None:
                PAD_IDX = vocab["<pad>"]
                pad = ConstantPad1d((0, pad_to_length - len(x)), PAD_IDX)
                x = torch.tensor(pad(x), dtype=torch.long)
            xs.append(x)

        xs = torch.stack(xs)
        ys = torch.stack(ys)
        return xs.to(device), ys.to(device)
Example #7
0
def const_pad_tensors_dim1(tensors, max_len=None, padding_value=0):
    """
    tensors: list of tensor in shape: [L, W]
        or tensor with shape: [B, L, W]
    padding and cat to [B, MAX_L, W]
    """
    max_len = max([t.shape[0]
                   for t in tensors]) if max_len is None else max_len
    for i, t in enumerate(tensors):
        pad_len = max_len - t.shape[0]
        t = t.unsqueeze(dim=0)
        t = t.transpose(1, 2)
        pad = ConstantPad1d((0, pad_len), padding_value)
        tensors[i] = pad(t).transpose(1, 2)
    tensors = torch.cat(tensors, dim=0)
    return tensors
Example #8
0
def pad1D(x, max_seq_count):
    '''
    pad first dimension (sequence count) of documents
    '''

    # current sentence count
    seq_count = x.shape[0]

    # append zeros, if sequence count too low
    if seq_count < max_seq_count:
        padding_back = max_seq_count - seq_count
        pad = ConstantPad1d((0, padding_back), 0)
        x = pad(x)

    # truncate document
    elif seq_count > max_seq_count:
        x = x[:max_seq_count]

    return x
Example #9
0
def __process_partition(episodes, p, return_ys=True):
    res = zip(*[(
        ep[p][0],
        ep[p][1],
        ep[p][0].shape[1],
        len(ep[p][1]),
    ) for ep in episodes])
    xs, ys, dims_in, lens = res
    device = xs[0].device
    if min(dims_in) != max(dims_in):
        m = max(dims_in)
        xs = [ConstantPad1d((0, m - x.shape[1]), 0)(x) for x in xs]
    n_max = max(lens)
    mask = torch.FloatTensor([([1] * len(x)) + ([0] * (n_max - len(x)))
                              for x in xs]).to(device)
    xs = pad_sequence(xs, batch_first=True)
    ys = pad_sequence(ys, batch_first=True)
    lens = torch.LongTensor(list(lens)).to(device)
    if return_ys:
        res = (xs, ys, lens, mask)
    else:
        res = (xs, lens, mask)
    return res
Example #10
0
 def __init__(self, input_dim, output_dim, bank_size, bank_channels,
              projection_channels, projection_kernel_size, highway_dim,
              gru_dim, dropout):
     super(PostnetCBHG, self).__init__()
     assert gru_dim % 2 == 0, (
         'Bidirectional GRU dimension must be divisible by 2.')
     self._bank = ModuleList([
         ConvBlock(input_dim, bank_channels, k, dropout, 'relu')
         for k in range(1, bank_size + 1)
     ])
     self._pool_and_project = Sequential(
         ConstantPad1d((0, 1), 0.0), MaxPool1d(2, stride=1),
         ConvBlock(bank_channels * bank_size, projection_channels,
                   projection_kernel_size, dropout, 'relu'),
         ConvBlock(projection_channels, input_dim, projection_kernel_size,
                   dropout, 'identity'))
     highways = [HighwayLayer(highway_dim) for _ in range(4)]
     self._highway_layers = Sequential(Linear(input_dim, highway_dim),
                                       ReLU(), *highways)
     self._gru = GRU(highway_dim,
                     gru_dim // 2,
                     batch_first=True,
                     bidirectional=True)
     self._output_layer = Linear(gru_dim, output_dim)
Example #11
0
def data_pre():
    if not os.path.isfile('./tensor_dict.pt'):
        print('data need process')
        rate = 48000

        names = [file.split('/')[1][0:5] for file in sorted(glob('audio/*'))]

        for name in tqdm(names):
            waveform_, sample_rate = torchaudio.load('audio/'+name+'.mp3')

            #resample waveform to rate
            if sample_rate != rate:
                waveform_ = torchaudio.compliance.kaldi.resample_waveform(
                                                    waveform=waveform_,
                                                    orig_freq=sample_rate,
                                                    new_freq=rate)
                torchaudio.save('audio/'+name+'.mp3',waveform_,sample_rate=48000)

        rate = 48000
        names = [file.split('/')[1][0:5] for file in sorted(glob('audio/*'))]
        window_size = 6 #sec
        shift_size = 3 #sec
        pad_len = window_size*rate
        typs = ['intro','verse','bridge','outro','break','Refrain','silece','other']
        dic = {k:v for v,k in enumerate(typs)}
        xs = torch.LongTensor([])
        ys = torch.LongTensor([])

        g = 0
        group = np.array([])
        for name in tqdm(names):
            waveform_, sample_rate = torchaudio.load('audio/'+name+'.mp3')
            #pading zero in sizr window_size/2 on start & end 
            pad_len = (window_size/2)*rate
            pad = ConstantPad1d(int(pad_len),0)
            waveform = pad(waveform_[0])

            #make a sliding window 
            x = waveform.unfold(dimension = 0,
                                     size = window_size*rate,
                                     step =shift_size*rate).unsqueeze(1)

            #get labels
            f = open('Labels/'+name+'.txt')
            txts = f.readlines()
            c = []
            i = 0
            for txt in txts:
                tmp = txt.split()
                typs = dic.get(tmp[2].replace('\n',''))
                start = float(tmp[0])
                end = float(tmp[1])
                while start <= i < end:
                    #print(start,end,i,typs)
                    c.append(typs)
                    i += shift_size
            y = torch.LongTensor(c)
            f.close()

            #make label and musiz len is same
            if x.shape[0] > y.shape[0]:
                x = x[:y.shape[0]]
            if y.shape[0] > x.shape[0]:
                y = y[:x.shape[0]]

            #make a group for GroupShuffleSplit
            group = np.append(group,np.repeat(g,y.shape[0]))
            g += 1

            #extand 
            xs = torch.cat((xs,x))
            ys = torch.cat((ys,y))   

            #save
            dic = {'xs':xs,'ys':ys,'group':group}
            torch.save(dic, 'tensor_dict.pt')
        
    print('data do not need process')
    dic = torch.load('tensor_dict.pt')
    xs,ys,group = [dic[c] for c in dic]
    gss = GroupShuffleSplit(n_splits=2, train_size=.8, random_state=42)
    for train_index, test_index in gss.split(xs, ys, group):
        X_train, X_test = xs[train_index], xs[test_index]
        y_train, y_test = ys[train_index], ys[test_index]

    dataloader_X_train = DataLoader(X_train,batch_size=64,shuffle=False, num_workers=0,drop_last=True)
    dataloader_y_train= DataLoader(y_train,batch_size=64,shuffle=False, num_workers=0,drop_last=True)

    dataloader_X_test = DataLoader(X_test,batch_size=64,shuffle=False, num_workers=0,drop_last=True)
    dataloader_y_test= DataLoader(y_test,batch_size=64,shuffle=False, num_workers=0,drop_last=True)

    return dataloader_X_train,dataloader_y_train,dataloader_X_test,dataloader_y_test
Example #12
0
 def __init__(self, num_input, num_embeddings, hp):
     super().__init__()
     # Parameters ######################################################################################################################
     self.nonprop_hazards = hp.nonprop_hazards
     self.add_diagt = hp.add_diagt
     self.add_month = hp.add_month
     self.num_months_hx = hp.num_months_hx - 1
     self.rnn_type = hp.rnn_type
     self.num_rnn_layers = hp.num_rnn_layers
     self.embedding_dim = hp.embedding_dim
     self.summarize = hp.summarize
     # Embedding layers ################################################################################################################
     self.embed_codes = nn.Embedding(num_embeddings=num_embeddings,
                                     embedding_dim=hp.embedding_dim,
                                     padding_idx=0)
     if self.add_month == 'embedding':
         self.embed_month = nn.Embedding(num_embeddings=hp.num_months_hx,
                                         embedding_dim=hp.embedding_dim,
                                         padding_idx=0)
     if self.add_diagt:
         self.embed_diagt = nn.Embedding(num_embeddings=5,
                                         embedding_dim=hp.embedding_dim,
                                         padding_idx=0)
     # RNN #############################################################################################################################
     if self.add_month == 'concat':
         self.embedding_dim = self.embedding_dim + 1
         self.pad_fw = ConstantPad1d((1, 0), 0.)
         self.pad_bw = ConstantPad1d((0, 1), 0.)
     if self.rnn_type == 'LSTM':
         self.rnn_fw = LSTM(input_size=self.embedding_dim,
                            hidden_size=self.embedding_dim,
                            num_layers=self.num_rnn_layers,
                            batch_first=True,
                            dropout=hp.dropout,
                            bidirectional=False)
         self.rnn_bw = LSTM(input_size=self.embedding_dim,
                            hidden_size=self.embedding_dim,
                            num_layers=self.num_rnn_layers,
                            batch_first=True,
                            dropout=hp.dropout,
                            bidirectional=False)
     else:
         self.rnn_fw = GRU(input_size=self.embedding_dim,
                           hidden_size=self.embedding_dim,
                           num_layers=self.num_rnn_layers,
                           batch_first=True,
                           dropout=hp.dropout,
                           bidirectional=False)
         self.rnn_bw = GRU(input_size=self.embedding_dim,
                           hidden_size=self.embedding_dim,
                           num_layers=self.num_rnn_layers,
                           batch_first=True,
                           dropout=hp.dropout,
                           bidirectional=False)
     if self.summarize == 'output_attention':
         self.attention_fw = Attention(embedding_dim=self.embedding_dim)
         self.attention_bw = Attention(embedding_dim=self.embedding_dim)
     # Fully connected layers ##########################################################################################################
     fc_size = num_input + 2 * self.embedding_dim
     layers = []
     for i in range(hp.num_mlp_layers):
         layers.append(nn.Linear(fc_size, fc_size))
         layers.append(nn.ELU())
     layers.append(nn.Linear(fc_size, 1))
     self.mlp = nn.Sequential(*layers)
Example #13
0
def speech_collate(batch, pad_val=0.0):
    r"""Puts each data field into a tensor with outer dimension batch size"""

    # split features and keys
    utt_keys = []
    inpt_batch = []
    target_batch = []
    speaker_ints = []

    for b in batch:
        # append values
        utt_keys.append(b["utt_key"])
        inpt_batch.append(b["inpt_feat"])
        if "target_feat" in b:
            target_batch.append(b["target_feat"])
        speaker_ints.append(b["speaker_int"])

    # max seq length
    seq_len = [b.size(0) for b in inpt_batch]
    max_seq = max(seq_len)

    # pad to max length
    inpt_batch = [
        ConstantPad1d((0, int(max_seq - b.size(0))),
                      value=pad_val)(b.transpose(0, 1)) for b in inpt_batch
    ]

    # sort seq & get sorted indices
    indices = torch.argsort(torch.tensor(seq_len), descending=True)
    seq_len.sort(reverse=True)

    # sort batch (descending order) for torch.rnn compatibility
    inpt_batch = [inpt_batch[i] for i in indices]

    inpt_batch = torch.stack(inpt_batch, dim=0)

    # (B, f, T) -> (B, T, f)
    inpt_batch = inpt_batch.permute(0, 2, 1)

    # rearrange speaker ints and utt_keys to match batches
    speaker_ints = torch.tensor([speaker_ints[i] for i in indices])

    utt_keys = [utt_keys[i] for i in indices]

    # Batch Dict
    batch_dict = {
        "utt_keys": utt_keys,
        "seq_len": seq_len,
        "input_batch": inpt_batch,
        "speaker_ints": speaker_ints
    }

    if "target_feat" in batch[0]:
        target_batch = [
            ConstantPad1d((0, int(max_seq - b.size(0))),
                          value=pad_val)(b.transpose(0, 1))
            for b in target_batch
        ]

        target_batch = [target_batch[i] for i in indices]

        target_batch = torch.stack(target_batch, dim=0)

        # (B, f, T) -> (B, T, f)
        batch_dict["target_batch"] = target_batch.permute(0, 2, 1)

    return batch_dict
Example #14
0
def wav_to_padded_mspec_flat_tensor(wav, length):
    assert(length <= max_len)
    p_len = max_len - length
    padding = ConstantPad1d((0, p_len), 0)
    return torch.Tensor(get_mspec(padding(wav).flatten().data.numpy())).view(1, -1)