#!/usr/bin/env python # Copyright 2020 Jian Wu # License: Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) import math from typing import List, Optional from torch.optim import lr_scheduler as lr, Optimizer from aps.libs import Register LrScheduler = Register("lr_scheduler") @LrScheduler.register("reduce_lr") class ReduceLROnPlateau(lr.ReduceLROnPlateau): """ Wrapper for lr.ReduceLROnPlateau """ def __init__(self, *args, **kwargs): super(ReduceLROnPlateau, self).__init__(*args, **kwargs) @LrScheduler.register("step_lr") class StepLR(lr.StepLR): """ Wrapper for lr.StepLR """ def __init__(self, *args, **kwargs): super(StepLR, self).__init__(*args, **kwargs)
#!/usr/bin/env python # Copyright 2019 Jian Wu # License: Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) import torch as th import torch.nn as nn import torch.nn.functional as tf from typing import Optional, Tuple from aps.libs import Register AsrAtt = Register("asr_att") def padding_mask(vec: th.Tensor, device: th.device = None) -> th.Tensor: """ Generate padding masks In [1]: a = th.tensor([5, 3, 2, 6, 1]) In [2]: padding_mask(a) Out[2]: tensor([[False, False, False, False, False, True], [False, False, False, True, True, True], [False, False, True, True, True, True], [False, False, False, False, False, False], [False, True, True, True, True, True]]) """ N = vec.nelement() # vector may not in sorted order
# Copyright 2020 Jian Wu # License: Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) """ Implementaion of multi-head attention & transformer encoder variants """ import copy import torch as th import torch.nn as nn import torch.nn.functional as tf from typing import Optional, Tuple from aps.libs import Register from aps.asr.xfmr.pose import digit_shift TransformerEncoderLayers = Register("xfmr_encoder_layer") MHSAReturnType = Tuple[th.Tensor, Optional[th.Tensor]] class Swish(nn.Module): """ Swish activation """ def __init__(self): super(Swish, self).__init__() def forward(self, inp: th.Tensor) -> th.Tensor: return inp * th.sigmoid(inp) def _get_activation_fn(activation: str) -> nn.Module:
#!/usr/bin/env python # Copyright 2020 Jian Wu # License: Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) import torch as th import torch.nn as nn import torch.nn.functional as tf from aps.asr.base.layer import Conv1d, Conv2d, Normalize1d from aps.libs import Register from typing import Optional, NoReturn, Dict XfmrProjLayer = Register("xfmr_proj_layer") def get_xfmr_proj(proj_name: str, in_features: int, att_dim: int, kwargs: Optional[Dict] = None) -> nn.Module: """ Return projection layers """ if proj_name not in XfmrProjLayer: raise ValueError(f"Unsupported projection layer: {proj_name}") if kwargs is None: return XfmrProjLayer[proj_name](in_features, att_dim) else: return XfmrProjLayer[proj_name](in_features, att_dim, **kwargs)
# Copyright 2019 Jian Wu # License: Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) import torch as th import torch.nn as nn import torch.nn.functional as tf from typing import Optional, Tuple, Union, List, Dict from aps.asr.base.layer import VariantRNN, FSMN, Conv1d, Conv2d, PyTorchRNN from aps.asr.base.layer import var_len_rnn_forward, rnn_output_nonlinear from aps.asr.base.jit import LSTM from aps.libs import Register BaseEncoder = Register("base_encoder") EncRetType = Tuple[th.Tensor, Optional[th.Tensor]] def encoder_instance(enc_type: str, inp_features: int, out_features: int, enc_kwargs: Dict) -> nn.Module: """ Return encoder instance """ def encoder(enc_type, inp_features, **kwargs): if enc_type not in BaseEncoder: raise RuntimeError(f"Unknown encoder type: {enc_type}") enc_cls = BaseEncoder[enc_type] return enc_cls(inp_features, **kwargs) if enc_type != "concat":
#!/usr/bin/env python # Copyright 2020 Jian Wu # License: Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) import math import torch as th import torch.nn as nn import torch.nn.functional as tf from aps.libs import Register PosEncodings = Register("pos_encodings") def get_xfmr_pose(enc_type: str, dim: int, nhead: int = 4, radius: int = 16, dropout: float = 0.1, scale_embed: bool = False) -> nn.Module: """ Return position encodings layer Args: enc_type (str): transformer encoder type, {xfmr|cfmr}_{abs|rel|xl} """ pose = enc_type.split("_")[-1] if pose not in PosEncodings: raise ValueError(f"Unsupported enc_type: {enc_type}") pose_cls = PosEncodings[pose] if pose == "abs":
""" Convolution based multi-channel front-end processing """ import torch as th import torch.nn as nn import torch.nn.functional as tf from typing import Optional, Union from aps.transform.utils import mel_filter from aps.asr.base.encoder import PyTorchRNNEncoder from aps.libs import Register from aps.cplx import ComplexTensor EnhFrontEnds = Register("enh_filter") class ComplexConvXd(nn.Module): """ Complex convolution layer """ def __init__(self, conv_ins, *args, **kwargs): super(ComplexConvXd, self).__init__() self.real = conv_ins(*args, **kwargs) self.imag = conv_ins(*args, **kwargs) def forward(self, x: ComplexTensor, add_abs: bool = False,