def augmentation(self):
        """ Apply Spec-Augmentation """
        augment_end_idx = int(0 + (
            (len(self.audio_paths) - 0) * self.augment_ratio))
        logger.info("Applying Augmentation...")

        for idx in range(augment_end_idx):
            self.augment_flags.append(True)
            self.audio_paths.append(self.audio_paths[idx])
            self.label_paths.append(self.label_paths[idx])
Esempio n. 2
0
def load_pickle(filepath, message=""):
    """
    load pickle file

    Args:
        filepath (str): Path to pickle file to load
        message (str): message to print

    Returns: load_result
        -**load_result** : load result of pickle
    """
    with open(filepath, "rb") as f:
        load_result = pickle.load(f)
        logger.info(message)
        return load_result
Esempio n. 3
0
def evaluate(model, queue, criterion, device):
    r"""
    Args:
        model (torch.nn.Module): Model to be evaluated
        queue (queue): queue for threading
        criterion (torch.nn): one of PyTorch’s loss function. Refer to http://pytorch.org/docs/master/nn.html#loss-functions for a list of them.
        device (torch.cuda): device used ('cuda' or 'cpu')

    Returns: loss, cer
        - **loss** (float): loss of evalution
        - **cer** (float): character error rate
    """
    logger.info('evaluate() start')
    total_loss = 0.
    total_num = 0
    total_dist = 0
    total_length = 0
    total_sentence_num = 0

    model.eval()

    with torch.no_grad():
        while True:
            feats, scripts, feat_lengths, script_lengths = queue.get()
            if feats.shape[0] == 0:
                break

            feats = feats.to(device)
            scripts = scripts.to(device)
            target = scripts[:, 1:]

            model.module.flatten_parameters()
            y_hat, logit = model(feats,
                                 scripts,
                                 teacher_forcing_ratio=0.0,
                                 use_beam_search=False)
            loss = criterion(logit.contiguous().view(-1, logit.size(-1)),
                             target.contiguous().view(-1))
            total_loss += loss.item()
            total_num += sum(feat_lengths)

            dist, length = get_distance(target, y_hat, id2char, EOS_TOKEN)
            total_dist += dist
            total_length += length
            total_sentence_num += target.size(0)

    logger.info('evaluate() completed')
    return total_loss / total_num, total_dist / total_length
Esempio n. 4
0
def evaluate(model, queue, perplexity, device):
    logger.info('evaluate() start')

    total_loss = 0
    total_num = 0

    model.eval()

    with torch.no_grad():
        while True:
            loss = perplexity

            inputs, targets, input_lengths, target_lengths = queue.get()

            if inputs.shape[0] == 0:
                break

            inputs = inputs.to(device)
            targets = targets.to(device)

            model.module.flatten_parameters()
            outputs = model(inputs, teacher_forcing_ratio=0.0)

            loss.reset()
            for step, step_output in enumerate(outputs):
                batch_size = targets.size(0)
                loss.eval_batch(step_output.contiguous().view(batch_size, -1),
                                targets[:, step])

            loss = loss.get_loss()

            total_loss += loss
            total_num += sum(input_lens)

    logger.info('evaluate() completed')

    return total_loss / total_num
Esempio n. 5
0
def supervised_train(model, config, epoch, total_time_step, queue,
                     criterion, optimizer, device, train_begin, worker_num,
                     print_every=10, teacher_forcing_ratio=0.90):
    r"""
    Args:
        train_begin: train begin time
        total_time_step: total time step in epoch
        epoch (int): present epoch
        config (Config): configuration
        model (torch.nn.Module): Model to be trained
        optimizer (torch.optim): optimizer for training
        teacher_forcing_ratio (float):  The probability that teacher forcing will be used (default: 0.90)
        print_every (int): Parameters to determine how many steps to output
        queue (Queue.queue): queue for threading
        criterion (torch.nn): one of PyTorch’s loss function.
          Refer to http://pytorch.org/docs/master/nn.html#loss-functions for a list of them.
        device (torch.cuda): device used ('cuda' or 'cpu')
        worker_num (int): the number of cpu cores used

    Returns: loss, cer
        - **loss** (float): loss of present epoch
        - **cer** (float): character error rate
    """
    epoch_loss_total = 0.
    print_loss_total = 0.
    total_num = 0
    total_dist = 0
    total_length = 0
    time_step = 0
    decay_speed = 1.0

    RAMPUP_POWER = 3
    RANMPUP_PERIOD = 3000
    EXP_DECAY_PERIOD = total_time_step * 3

    model.train()
    begin = epoch_begin = time.time()

    while True:
        # LR Wamp-Up
        if config.use_multistep_lr and epoch == 0 and time_step < RANMPUP_PERIOD:
            set_lr(optimizer, lr=config.high_plateau_lr * ((time_step + 1) / RANMPUP_PERIOD) ** RAMPUP_POWER)

        # LR Exponential-Decay
        if config.use_multistep_lr and (epoch == 1 or epoch == 2 or epoch == 3):
            decay_rate = config.low_plateau_lr / config.high_plateau_lr
            decay_speed *= decay_rate ** (1 / EXP_DECAY_PERIOD)
            set_lr(optimizer, config.high_plateau_lr * decay_speed)

        feats, scripts, feat_lens, target_lens = queue.get()

        if feats.shape[0] == 0:
            # empty feats means closing one loader
            worker_num -= 1
            logger.debug('left train_loader: %d' % worker_num)

            if worker_num == 0:
                break
            else:
                continue

        inputs = feats.to(device)
        scripts = scripts.to(device)
        targets = scripts[:, 1:]

        model.module.flatten_parameters()
        y_hat, logit = model(inputs, scripts, teacher_forcing_ratio=teacher_forcing_ratio)

        loss = criterion(logit.contiguous().view(-1, logit.size(-1)), targets.contiguous().view(-1))
        epoch_loss_total += loss.item()
        print_loss_total += loss.item()

        total_num += sum(feat_lens)
        dist, length = get_distance(targets, y_hat, id2char, EOS_TOKEN)
        total_dist += dist
        total_length += length

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        time_step += 1
        torch.cuda.empty_cache()

        if time_step % print_every == 0:
            current = time.time()
            elapsed = current - begin
            epoch_elapsed = (current - epoch_begin) / 60.0
            train_elapsed = (current - train_begin) / 3600.0

            logger.info('timestep: {:4d}/{:4d}, loss: {:.4f}, cer: {:.2f}, elapsed: {:.2f}s {:.2f}m {:.2f}h'.format(
                time_step,
                total_time_step,
                print_loss_total / print_every,
                total_dist / total_length,
                elapsed, epoch_elapsed, train_elapsed)
            )
            print_loss_total = 0
            begin = time.time()

        if time_step % 1000 == 0:
            save_step_result(train_step_result, epoch_loss_total / total_num, total_dist / total_length)

        if time_step % 10000 == 0:
            torch.save(model, "./data/weight_file/epoch_%s_step_%s.pt" % (str(epoch), str(time_step)))

    logger.info('train() completed')

    return epoch_loss_total / total_num, total_dist / total_length
Esempio n. 6
0
def supervised_train(model,
                     hparams,
                     epoch,
                     total_time_step,
                     queue,
                     criterion,
                     optimizer,
                     device,
                     train_begin,
                     worker_num,
                     print_time_step=10,
                     teacher_forcing_ratio=0.90):
    """
    Args:
        model (torch.nn.Module): Model to be trained
        optimizer (torch.optim): optimizer for training
        teacher_forcing_ratio (float):  The probability that teacher forcing will be used (default: 0.90)
        print_time_step (int): Parameters to determine how many steps to output
        queue (Queue.queue): queue for threading
        criterion (torch.nn): one of PyTorch’s loss function. Refer to http://pytorch.org/docs/master/nn.html#loss-functions for a list of them.
        device (torch.cuda): device used ('cuda' or 'cpu')
        worker_num (int): the number of cpu cores used

    Returns: loss, cer
        - **loss** (float): loss of present epoch
        - **cer** (float): character error rate
    """
    total_loss = 0.
    total_num = 0
    total_dist = 0
    total_length = 0
    total_sent_num = 0
    time_step = 0

    model.train()
    begin = epoch_begin = time.time()

    while True:
        if hparams.use_multistep_lr and epoch == 0 and time_step < 1000:
            ramp_up(optimizer, time_step, hparams)
        if hparams.use_multistep_lr and epoch == 1:
            exp_decay(optimizer, total_time_step, hparams)
        feats, targets, feat_lengths, label_lengths = queue.get()
        if feats.shape[0] == 0:
            # empty feats means closing one loader
            worker_num -= 1
            logger.debug('left train_loader: %d' % (worker_num))

            if worker_num == 0:
                break
            else:
                continue
        optimizer.zero_grad()

        feats = feats.to(device)
        targets = targets.to(device)
        target = targets[:, 1:]
        model.module.flatten_parameters()

        y_hat, logit = model(feats,
                             targets,
                             teacher_forcing_ratio=teacher_forcing_ratio)
        loss = criterion(logit.contiguous().view(-1, logit.size(-1)),
                         target.contiguous().view(-1))

        total_loss += loss.item()
        total_num += sum(feat_lengths)
        dist, length = get_distance(target, y_hat, id2char, EOS_TOKEN)
        total_dist += dist
        total_length += length
        total_sent_num += target.size(0)
        loss.backward()
        optimizer.step()

        if time_step % print_time_step == 0:
            current = time.time()
            elapsed = current - begin
            epoch_elapsed = (current - epoch_begin) / 60.0
            train_elapsed = (current - train_begin) / 3600.0

            logger.info(
                'timestep: {:4d}/{:4d}, loss: {:.4f}, cer: {:.2f}, elapsed: {:.2f}s {:.2f}m {:.2f}h'
                .format(time_step, total_time_step, total_loss / total_num,
                        total_dist / total_length, elapsed, epoch_elapsed,
                        train_elapsed))
            begin = time.time()

        if time_step % 1000 == 0:
            save_step_result(train_step_result, total_loss / total_num,
                             total_dist / total_length)

        if time_step % 10000 == 0:
            torch.save(model, "model.pt")
            torch.save(
                model, "./data/weight_file/epoch_%s_step_%s.pt" %
                (str(epoch), str(time_step)))

        time_step += 1
        supervised_train.cumulative_batch_count += 1
        torch.cuda.empty_cache(
        )  # GPU memory free. if you have enough GPU memory, delete this line

    loss = total_loss / total_num
    cer = total_dist / total_length
    logger.info('train() completed')
    return loss, cer
Esempio n. 7
0
 def logger_hparams(self):
     """ print information of hyperparameters """
     logger.info("use_bidirectional : %s" % str(self.use_bidirectional))
     logger.info("use_attention : %s" % str(self.use_attention))
     logger.info("use_pickle : %s" % str(self.use_pickle))
     logger.info("use_augment : %s" % str(self.use_augment))
     logger.info("use_pyramidal : %s" % str(self.use_pyramidal))
     logger.info("augment_ratio : %0.2f" % self.augment_ratio)
     logger.info("input_reverse : %s" % str(self.input_reverse))
     logger.info("hidden_size : %d" % self.hidden_size)
     logger.info("listener_layer_size : %d" % self.listener_layer_size)
     logger.info("speller_layer_size : %d" % self.speller_layer_size)
     logger.info("dropout : %0.2f" % self.dropout)
     logger.info("batch_size : %d" % self.batch_size)
     logger.info("worker_num : %d" % self.worker_num)
     logger.info("max_epochs : %d" % self.max_epochs)
     logger.info("initial learning rate : %0.4f" % self.init_lr)
     if self.use_multistep_lr:
         logger.info("high plateau learning rate : %0.4f" %
                     self.high_plateau_lr)
         logger.info("low plateau learning rate : %0.4f" %
                     self.low_plateau_lr)
     logger.info("teacher_forcing_ratio : %0.2f" % self.teacher_forcing)
     logger.info("seed : %d" % self.seed)
     logger.info("max_len : %d" % self.max_len)
     logger.info("use_cuda : %s" % str(self.use_cuda))
def split_dataset(config,
                  audio_paths,
                  label_paths,
                  valid_ratio=0.05,
                  target_dict=None):
    """
    Dataset split into training and validation Dataset.

    Args:
        valid_ratio: validation set ratio of total dataset
        config (package.config.HyperParams): set of configures
        audio_paths (list): set of audio path
        label_paths (list): set of label path
        target_dict (dict): dictionary of filename and target

    Returns: train_batch_num, train_dataset_list, valid_dataset
        - **train_batch_num** (int): num of batch for training
        - **train_dataset_list** (list): list of training dataset
        - **valid_dataset** (utils.dataset.BaseDataset): validation dataset
    """
    logger.info("split dataset start !!")

    trainset_list = list()
    train_num = math.ceil(len(audio_paths) * (1 - valid_ratio))
    total_time_step = math.ceil(len(audio_paths) / config.batch_size)
    valid_time_step = math.ceil(total_time_step * valid_ratio)
    train_time_step = total_time_step - valid_time_step

    if config.use_augment:
        train_time_step = int(train_time_step * (1 + config.augment_ratio))

    train_num_per_worker = math.ceil(train_num / config.worker_num)

    # audio_paths & label_paths shuffled in the same order
    # for seperating train & validation
    data_paths = list(zip(audio_paths, label_paths))
    random.shuffle(data_paths)
    audio_paths, label_paths = zip(*data_paths)

    # seperating the train dataset by the number of workers
    for idx in range(config.worker_num):
        train_begin_idx = train_num_per_worker * idx
        train_end_idx = min(train_num_per_worker * (idx + 1), train_num)

        trainset_list.append(
            CustomDataset(
                audio_paths=audio_paths[train_begin_idx:train_end_idx],
                label_paths=label_paths[train_begin_idx:train_end_idx],
                sos_id=SOS_TOKEN,
                eos_id=EOS_TOKEN,
                target_dict=target_dict,
                input_reverse=config.input_reverse,
                use_augment=config.use_augment,
                batch_size=config.batch_size,
                augment_ratio=config.augment_ratio))

    validset = CustomDataset(audio_paths=audio_paths[train_num:],
                             label_paths=label_paths[train_num:],
                             sos_id=SOS_TOKEN,
                             eos_id=EOS_TOKEN,
                             batch_size=config.batch_size,
                             target_dict=target_dict,
                             input_reverse=config.input_reverse,
                             use_augment=False)

    save_pickle(trainset_list, './data/pickle/trainset_list')
    save_pickle(validset, './data/pickle/validset')

    logger.info("split dataset complete !!")

    return train_time_step, trainset_list, validset
Esempio n. 9
0
from torch import optim
from package.config import Config
from package.definition import char2id, logger, SOS_token, EOS_token, PAD_token
from package.data_loader import CustomDataset, load_corpus, CustomDataLoader
from package.evaluator import evaluate
from package.loss import Perplexity
from package.trainer import supervised_train
from model import LanguageModel

# Character-level Recurrent Neural Network Language Model implement in Pytorch
# https://github.com/sooftware/char-rnnlm

if __name__ == '__main__':
    os.environ[
        "CUDA_LAUNCH_BLOCKING"] = "1"  # if you use Multi-GPU, delete this line
    logger.info("device : %s" % torch.cuda.get_device_name(0))
    logger.info("CUDA is available : %s" % (torch.cuda.is_available()))
    logger.info("CUDA version : %s" % torch.version.cuda)
    logger.info("PyTorch version : %s" % torch.__version__)

    config = Config(use_cuda=True,
                    hidden_size=512,
                    dropout_p=0.5,
                    n_layers=4,
                    batch_size=16,
                    max_epochs=40,
                    lr=0.0001,
                    teacher_forcing_ratio=1.0,
                    seed=1,
                    max_len=428,
                    worker_num=1)
def get_librosa_mfcc(filepath,
                     n_mfcc=40,
                     del_silence=False,
                     input_reverse=True):
    r""":
    Mel-frequency cepstral coefficients (MFCCs)

    Args:
        filepath (str): specific path of audio file
        n_mfcc (int): number of mel filter
        del_silence (bool): flag indication whether to delete silence or not (default: True)
        input_reverse (bool): flag indication whether to reverse input or not (default: True)

    Feature Parameters:
        - **sample rate**: A.I Hub dataset`s sample rate is 16,000
        - **frame length**: 25ms
        - **stride**: 10ms
        - **overlap**: 15ms
        - **window**: Hamming Window

    .. math::
        \begin{array}{ll}
        NFFT = sr * frame length \\
        HopLength = sr * stride \\
        \end{array}

    Returns: mfcc
        - **mfcc** (torch.Tensor): return mel frequency cepstral coefficient feature

    Examples::
        Generate mfccs from a time series

        >>> get_librosa_mfcc("KaiSpeech_021458.pcm", n_mfcc=40, input_reverse=True)
        Tensor([[ -5.229e+02,  -4.944e+02, ...,  -5.229e+02,  -5.229e+02],
                [  7.105e-15,   3.787e+01, ...,  -7.105e-15,  -7.105e-15],
                ...,
                [  1.066e-14,  -7.500e+00, ...,   1.421e-14,   1.421e-14],
                [  3.109e-14,  -5.058e+00, ...,   2.931e-14,   2.931e-14]])
    """
    if filepath.split('.')[-1] == 'pcm':
        try:
            pcm = np.memmap(filepath, dtype='h', mode='r')
        except:  # exception handling
            logger.info("%s Error Occur !!" % filepath)
            return None
        signal = np.array([float(x) for x in pcm])
    elif filepath.split('.')[-1] == 'wav':
        signal, _ = librosa.core.load(filepath, sr=16000)
    else:
        raise ValueError("Invalid format !!")

    if del_silence:
        non_silence_ids = librosa.effects.split(signal, top_db=30)
        signal = np.concatenate(
            [signal[start:end] for start, end in non_silence_ids])

    mfcc = librosa.feature.mfcc(signal,
                                16000,
                                hop_length=160,
                                n_mfcc=n_mfcc,
                                n_fft=400,
                                window='hamming')
    if input_reverse:
        mfcc = mfcc[:, ::-1]
    mfcc = torch.FloatTensor(np.ascontiguousarray(np.swapaxes(mfcc, 0, 1)))
    return mfcc
def get_librosa_melspectrogram(filepath,
                               n_mels=128,
                               del_silence=False,
                               input_reverse=True,
                               mel_type='log_mel'):
    r"""
    Compute a mel-scaled soectrigram (or Log-Mel).

    Args:
        filepath (str): specific path of audio file
        n_mels (int): number of mel filter
        del_silence (bool): flag indication whether to delete silence or not (default: True)
        mel_type (str): if 'log_mel' return log-mel (default: 'log_mel')
        input_reverse (bool): flag indication whether to reverse input or not (default: True)

    Feature Parameters:
        - **sample rate**: A.I Hub dataset`s sample rate is 16,000
        - **frame length**: 25ms
        - **stride**: 10ms
        - **overlap**: 15ms
        - **window**: Hamming Window

    .. math::
        \begin{array}{ll}
        NFFT = sr * frame length \\
        Hop Length = sr * stride \\
        \end{array}

    Returns: mel_spectrogram
        - **mel_spectrogram** (torch.Tensor): return Mel-Spectrogram (or Log-Mel) feature

    Examples::
        Generate mel spectrogram from a time series

    >>> get_librosa_melspectrogram("KaiSpeech_021458.pcm", n_mels=128, input_reverse=True)
    Tensor([[  2.891e-07,   2.548e-03, ...,   8.116e-09,   5.633e-09],
            [  1.986e-07,   1.162e-02, ...,   9.332e-08,   6.716e-09],
            ...,
            [  3.668e-09,   2.029e-08, ...,   3.208e-09,   2.864e-09],
            [  2.561e-10,   2.096e-09, ...,   7.543e-10,   6.101e-10]])
    """
    if filepath.split('.')[-1] == 'pcm':
        try:
            pcm = np.memmap(filepath, dtype='h', mode='r')
        except:  # exception handling
            logger.info("%s Error Occur !!" % filepath)
            return None
        signal = np.array([float(x) for x in pcm])
    elif filepath.split('.')[-1] == 'wav':
        signal, _ = librosa.core.load(filepath, sr=16000)
    else:
        raise ValueError("Invalid format !!")

    if del_silence:
        non_silence_ids = librosa.effects.split(y=signal, top_db=30)
        signal = np.concatenate(
            [signal[start:end] for start, end in non_silence_ids])

    mel_spectrogram = librosa.feature.melspectrogram(signal,
                                                     sr=16000,
                                                     n_mels=n_mels,
                                                     n_fft=400,
                                                     hop_length=160,
                                                     window='hamming')

    if mel_type == 'log_mel':
        mel_spectrogram = librosa.amplitude_to_db(mel_spectrogram, ref=np.max)
    if input_reverse:
        mel_spectrogram = mel_spectrogram[:, ::-1]
    mel_spectrogram = torch.FloatTensor(
        np.ascontiguousarray(np.swapaxes(mel_spectrogram, 0, 1)))
    return mel_spectrogram
Esempio n. 12
0
def supervised_train(model, queue, perplexity, optimizer, device, print_every,
                     epoch, teacher_forcing_ratio, worker_num, total_time_step,
                     train_begin):
    print_loss_total = 0  # Reset every print_every
    epoch_loss_total = 0  # Reset every epoch
    total_num = 0
    time_step = 0

    model.train()
    begin = epoch_begin = time.time()

    while True:
        loss = perplexity
        inputs, targets, input_lens, target_lens = queue.get()

        if inputs.shape[0] == 0:
            # empty feats means closing one loader
            worker_num -= 1
            logger.debug('left train_loader: %d' % worker_num)

            if worker_num == 0:
                break
            else:
                continue

        inputs = inputs.to(device)
        targets = targets.to(device)

        model.module.flatten_parameters()
        outputs = model(inputs, teacher_forcing_ratio=teacher_forcing_ratio)

        # Get loss
        loss.reset()
        for step, step_output in enumerate(outputs):
            batch_size = targets.size(0)
            loss.eval_batch(step_output.contiguous().view(batch_size, -1),
                            targets[:, step])
        # Backpropagation
        model.zero_grad()
        loss.backward()
        optimizer.step()
        loss = loss.get_loss()

        epoch_loss_total += loss
        print_loss_total += loss
        total_num += sum(input_lens)

        time_step += 1
        torch.cuda.empty_cache()

        if time_step % print_every == 0:
            current = time.time()
            elapsed = current - begin
            epoch_elapsed = (current - epoch_begin) / 60.0
            train_elapsed = (current - train_begin) / 3600.0

            logger.info(
                'timestep: {:4d}/{:4d}, perplexity: {:.4f}, elapsed: {:.2f}s {:.2f}m {:.2f}h'
                .format(time_step, total_time_step,
                        print_loss_total / print_every, elapsed, epoch_elapsed,
                        train_elapsed))
            print_loss_total = 0
            begin = time.time()

        if time_step % 50000 == 0:
            torch.save(model,
                       "./data/epoch%s_%s.pt" % (str(epoch), str(time_step)))

    logger.info('train() completed')

    return epoch_loss_total / total_num