def add_args(parser, args): group = parser.add_argument_group("RNN encoder") parser = ConvEncoder.add_args(parser, args) group.add_argument('--enc_n_units', type=int, default=512, help='number of units in each encoder RNN layer') group.add_argument( '--enc_n_projs', type=int, default=0, help= 'number of units in the projection layer after each encoder RNN layer' ) group.add_argument( '--bidirectional_sum_fwd_bwd', type=strtobool, default=False, help='sum forward and backward RNN outputs for dimension reduction' ) # streaming group.add_argument( '--lc_chunk_size_left', type=str, default="0", help='left chunk size for latency-controlled RNN encoder') group.add_argument( '--lc_chunk_size_right', type=str, default="0", help='right chunk size for latency-controlled RNN encoder') return parser
def add_args(parser, args): """Add arguments.""" group = parser.add_argument_group("Transformer encoder") if 'conv' in args.enc_type: parser = ConvEncoder.add_args(parser, args) # Transformer common if not hasattr(args, 'transformer_layer_norm_eps'): group.add_argument('--transformer_ffn_bottleneck_dim', type=int, default=0, help='bottleneck dimension in the FFN layer') group.add_argument('--transformer_input_bottleneck_dim', type=int, default=0, help='bottleneck dimension in the FFN layer') group.add_argument('--transformer_layer_norm_eps', type=float, default=1e-12, help='epsilon value for layer normalization') group.add_argument('--transformer_ffn_activation', type=str, default='relu', choices=['relu', 'gelu', 'gelu_accurate', 'glu', 'swish'], help='nonlinear activation for the FFN layer') group.add_argument('--transformer_param_init', type=str, default='xavier_uniform', choices=['xavier_uniform', 'pytorch'], help='parameter initialization') # Transformer encoder specific group.add_argument('--transformer_enc_d_model', type=int, default=256, help='number of units in the MHA layer for Transformer encoder') group.add_argument('--transformer_enc_d_ff', type=int, default=2048, help='number of units in the FFN layer for Transformer encoder') group.add_argument('--transformer_enc_n_heads', type=int, default=4, help='number of heads in the MHA layer for Transformer encoder') group.add_argument('--transformer_enc_pe_type', type=str, default='add', choices=['add', 'none', 'relative', 'relative_xl'], help='type of positional encoding for Transformer encoder') group.add_argument('--dropout_enc_layer', type=float, default=0.0, help='LayerDrop probability for Transformer encoder layers') group.add_argument('--transformer_enc_clamp_len', type=int, default=-1, help='maximum length for relative positional encoding. -1 means infinite length.') # streaming group.add_argument('--transformer_enc_lookaheads', type=str, default="0_0_0_0_0_0_0_0_0_0_0_0", help='lookahead frames per layer for unidirectional Transformer encoder') group.add_argument('--lc_chunk_size_left', type=str, default="0", help='left chunk size for latency-controlled Transformer encoder') group.add_argument('--lc_chunk_size_current', type=str, default="0", help='current chunk size (and hop size) for latency-controlled Transformer encoder') group.add_argument('--lc_chunk_size_right', type=str, default="0", help='right chunk size for latency-controlled Transformer encoder') group.add_argument('--lc_type', type=str, default='reshape', choices=['reshape', 'mask'], help='implementation methods of latency-controlled Transformer encoder') return parser
def add_args(parser, args): """Add arguments.""" group = parser.add_argument_group("Transformer encoder") if 'conv' in args.enc_type: parser = ConvEncoder.add_args(parser, args) # Transformer common if not hasattr(args, 'transformer_d_model'): group.add_argument('--transformer_d_model', type=int, default=256, help='number of units in the MHA layer') if not hasattr(args, 'transformer_d_ff'): group.add_argument('--transformer_d_ff', type=int, default=2048, help='number of units in the FFN layer') if not hasattr(args, 'transformer_d_ff_bottleneck_dim'): group.add_argument('--transformer_d_ff_bottleneck_dim', type=int, default=0, help='bottleneck dimension in the FFN layer') if not hasattr(args, 'transformer_n_heads'): group.add_argument('--transformer_n_heads', type=int, default=4, help='number of heads in the MHA layer') if not hasattr(args, 'transformer_layer_norm_eps'): group.add_argument('--transformer_layer_norm_eps', type=float, default=1e-12, help='epsilon value for layer normalization') if not hasattr(args, 'transformer_ffn_activation'): group.add_argument('--transformer_ffn_activation', type=str, default='relu', choices=['relu', 'gelu', 'gelu_accurate', 'glu', 'swish'], help='nonlinear activation for the FFN layer') if not hasattr(args, 'transformer_param_init'): group.add_argument('--transformer_param_init', type=str, default='xavier_uniform', choices=['xavier_uniform', 'pytorch'], help='parameter initializatin') # NOTE: These checks are important to avoid conflict with args in Transformer decoder # Conformer encoder specific group.add_argument('--transformer_enc_pe_type', type=str, default='relative', choices=['relative'], help='type of positional encoding for the Transformer encoder') group.add_argument('--conformer_kernel_size', type=int, default=32, help='kernel size for depthwise convolution in convolution module for Conformer encoder layers') group.add_argument('--dropout_enc_layer', type=float, default=0.0, help='LayerDrop probability for Conformer encoder layers') # streaming group.add_argument('--lc_chunk_size_left', type=int, default=0, help='left chunk size for latency-controlled Conformer encoder') group.add_argument('--lc_chunk_size_current', type=int, default=0, help='current chunk size (and hop size) for latency-controlled Conformer encoder') group.add_argument('--lc_chunk_size_right', type=int, default=0, help='right chunk size for latency-controlled Conformer encoder') return parser
def add_args(parser, args): group = parser.add_argument_group("RNN encoder") parser = ConvEncoder.add_args(parser, args) group.add_argument('--enc_n_units', type=int, default=512, help='number of units in each encoder RNN layer') group.add_argument( '--enc_n_projs', type=int, default=0, help= 'number of units in the projection layer after each encoder RNN layer' ) group.add_argument( '--bidirectional_sum_fwd_bwd', type=strtobool, default=False, help='sum forward and backward RNN outputs for dimension reduction' ) # streaming group.add_argument( '--lc_chunk_size_left', type=str, default="-1", help='current chunk size for latency-controlled RNN encoder') group.add_argument( '--lc_chunk_size_right', type=str, default="0", help='right chunk size for latency-controlled RNN encoder') group.add_argument('--cnn_lookahead', type=strtobool, default=True, help='disable lookahead frames in CNN layers') group.add_argument('--rsp_prob_enc', type=float, default=0.0, help='probability for Random State Passing (RSP)') return parser
def add_args(parser, args): # group = parser.add_argument_group("TDS encoder") parser = ConvEncoder.add_args(parser, args) return parser