Esempio n. 1
0
 def add_args(parser, args):
     group = parser.add_argument_group("RNN encoder")
     parser = ConvEncoder.add_args(parser, args)
     group.add_argument('--enc_n_units',
                        type=int,
                        default=512,
                        help='number of units in each encoder RNN layer')
     group.add_argument(
         '--enc_n_projs',
         type=int,
         default=0,
         help=
         'number of units in the projection layer after each encoder RNN layer'
     )
     group.add_argument(
         '--bidirectional_sum_fwd_bwd',
         type=strtobool,
         default=False,
         help='sum forward and backward RNN outputs for dimension reduction'
     )
     # streaming
     group.add_argument(
         '--lc_chunk_size_left',
         type=str,
         default="0",
         help='left chunk size for latency-controlled RNN encoder')
     group.add_argument(
         '--lc_chunk_size_right',
         type=str,
         default="0",
         help='right chunk size for latency-controlled RNN encoder')
     return parser
Esempio n. 2
0
    def add_args(parser, args):
        """Add arguments."""
        group = parser.add_argument_group("Transformer encoder")
        if 'conv' in args.enc_type:
            parser = ConvEncoder.add_args(parser, args)
        # Transformer common
        if not hasattr(args, 'transformer_layer_norm_eps'):
            group.add_argument('--transformer_ffn_bottleneck_dim', type=int, default=0,
                               help='bottleneck dimension in the FFN layer')
            group.add_argument('--transformer_input_bottleneck_dim', type=int, default=0,
                               help='bottleneck dimension in the FFN layer')
            group.add_argument('--transformer_layer_norm_eps', type=float, default=1e-12,
                               help='epsilon value for layer normalization')
            group.add_argument('--transformer_ffn_activation', type=str, default='relu',
                               choices=['relu', 'gelu', 'gelu_accurate', 'glu', 'swish'],
                               help='nonlinear activation for the FFN layer')
            group.add_argument('--transformer_param_init', type=str, default='xavier_uniform',
                               choices=['xavier_uniform', 'pytorch'],
                               help='parameter initialization')

        # Transformer encoder specific
        group.add_argument('--transformer_enc_d_model', type=int, default=256,
                           help='number of units in the MHA layer for Transformer encoder')
        group.add_argument('--transformer_enc_d_ff', type=int, default=2048,
                           help='number of units in the FFN layer for Transformer encoder')
        group.add_argument('--transformer_enc_n_heads', type=int, default=4,
                           help='number of heads in the MHA layer for Transformer encoder')
        group.add_argument('--transformer_enc_pe_type', type=str, default='add',
                           choices=['add', 'none', 'relative', 'relative_xl'],
                           help='type of positional encoding for Transformer encoder')
        group.add_argument('--dropout_enc_layer', type=float, default=0.0,
                           help='LayerDrop probability for Transformer encoder layers')
        group.add_argument('--transformer_enc_clamp_len', type=int, default=-1,
                           help='maximum length for relative positional encoding. -1 means infinite length.')
        # streaming
        group.add_argument('--transformer_enc_lookaheads', type=str, default="0_0_0_0_0_0_0_0_0_0_0_0",
                           help='lookahead frames per layer for unidirectional Transformer encoder')
        group.add_argument('--lc_chunk_size_left', type=str, default="0",
                           help='left chunk size for latency-controlled Transformer encoder')
        group.add_argument('--lc_chunk_size_current', type=str, default="0",
                           help='current chunk size (and hop size) for latency-controlled Transformer encoder')
        group.add_argument('--lc_chunk_size_right', type=str, default="0",
                           help='right chunk size for latency-controlled Transformer encoder')
        group.add_argument('--lc_type', type=str, default='reshape',
                           choices=['reshape', 'mask'],
                           help='implementation methods of latency-controlled Transformer encoder')
        return parser
Esempio n. 3
0
    def add_args(parser, args):
        """Add arguments."""
        group = parser.add_argument_group("Transformer encoder")
        if 'conv' in args.enc_type:
            parser = ConvEncoder.add_args(parser, args)
        # Transformer common
        if not hasattr(args, 'transformer_d_model'):
            group.add_argument('--transformer_d_model', type=int, default=256,
                               help='number of units in the MHA layer')
        if not hasattr(args, 'transformer_d_ff'):
            group.add_argument('--transformer_d_ff', type=int, default=2048,
                               help='number of units in the FFN layer')
        if not hasattr(args, 'transformer_d_ff_bottleneck_dim'):
            group.add_argument('--transformer_d_ff_bottleneck_dim', type=int, default=0,
                               help='bottleneck dimension in the FFN layer')
        if not hasattr(args, 'transformer_n_heads'):
            group.add_argument('--transformer_n_heads', type=int, default=4,
                               help='number of heads in the MHA layer')
        if not hasattr(args, 'transformer_layer_norm_eps'):
            group.add_argument('--transformer_layer_norm_eps', type=float, default=1e-12,
                               help='epsilon value for layer normalization')
        if not hasattr(args, 'transformer_ffn_activation'):
            group.add_argument('--transformer_ffn_activation', type=str, default='relu',
                               choices=['relu', 'gelu', 'gelu_accurate', 'glu', 'swish'],
                               help='nonlinear activation for the FFN layer')
        if not hasattr(args, 'transformer_param_init'):
            group.add_argument('--transformer_param_init', type=str, default='xavier_uniform',
                               choices=['xavier_uniform', 'pytorch'],
                               help='parameter initializatin')
        # NOTE: These checks are important to avoid conflict with args in Transformer decoder

        # Conformer encoder specific
        group.add_argument('--transformer_enc_pe_type', type=str, default='relative',
                           choices=['relative'],
                           help='type of positional encoding for the Transformer encoder')
        group.add_argument('--conformer_kernel_size', type=int, default=32,
                           help='kernel size for depthwise convolution in convolution module for Conformer encoder layers')
        group.add_argument('--dropout_enc_layer', type=float, default=0.0,
                           help='LayerDrop probability for Conformer encoder layers')
        # streaming
        group.add_argument('--lc_chunk_size_left', type=int, default=0,
                           help='left chunk size for latency-controlled Conformer encoder')
        group.add_argument('--lc_chunk_size_current', type=int, default=0,
                           help='current chunk size (and hop size) for latency-controlled Conformer encoder')
        group.add_argument('--lc_chunk_size_right', type=int, default=0,
                           help='right chunk size for latency-controlled Conformer encoder')
        return parser
Esempio n. 4
0
 def add_args(parser, args):
     group = parser.add_argument_group("RNN encoder")
     parser = ConvEncoder.add_args(parser, args)
     group.add_argument('--enc_n_units',
                        type=int,
                        default=512,
                        help='number of units in each encoder RNN layer')
     group.add_argument(
         '--enc_n_projs',
         type=int,
         default=0,
         help=
         'number of units in the projection layer after each encoder RNN layer'
     )
     group.add_argument(
         '--bidirectional_sum_fwd_bwd',
         type=strtobool,
         default=False,
         help='sum forward and backward RNN outputs for dimension reduction'
     )
     # streaming
     group.add_argument(
         '--lc_chunk_size_left',
         type=str,
         default="-1",
         help='current chunk size for latency-controlled RNN encoder')
     group.add_argument(
         '--lc_chunk_size_right',
         type=str,
         default="0",
         help='right chunk size for latency-controlled RNN encoder')
     group.add_argument('--cnn_lookahead',
                        type=strtobool,
                        default=True,
                        help='disable lookahead frames in CNN layers')
     group.add_argument('--rsp_prob_enc',
                        type=float,
                        default=0.0,
                        help='probability for Random State Passing (RSP)')
     return parser
Esempio n. 5
0
 def add_args(parser, args):
     # group = parser.add_argument_group("TDS encoder")
     parser = ConvEncoder.add_args(parser, args)
     return parser