def test_sequential(): class Masked(torch.nn.Module): def forward(self, x, m): return x, m from espnet.nets.pytorch_backend.transformer.repeat import MultiSequential f = MultiSequential(Masked(), Masked()) x = torch.randn(2, 3) m = torch.randn(2, 3) > 0 assert len(f(x, m)) == 2 if torch.cuda.is_available(): f = torch.nn.DataParallel(f) f.cuda() assert len(f(x.cuda(), m.cuda())) == 2
def build_blocks( net_part, idim, input_layer, blocks_arch, repeat_block=0, self_attn_type="self_attn", positional_encoding_type="abs_pos", positionwise_layer_type="linear", positionwise_activation_type="relu", conv_mod_activation_type="relu", dropout_rate_embed=0.0, padding_idx=-1, ): """Build block for transformer-based models. Args: net_part (str): either 'encoder' or 'decoder' idim (int): dimension of inputs input_layer (str): input layer type blocks_arch (list): list of blocks for network part (type and parameters) repeat_block (int): repeat provided blocks N times if N > 1 positional_encoding_type (str): positional encoding layer type positionwise_layer_type (str): linear positionwise_activation_type (str): positionwise activation type conv_mod_activation_type (str): convolutional module activation type dropout_rate_embed (float): dropout rate for embedding padding_idx (int): padding index for embedding input layer (if specified) Returns: in_layer (torch.nn.*): input layer all_blocks (MultiSequential): all blocks for network part out_dim (int): dimension of last block output """ fn_modules = [] ( input_layer, input_layer_odim, input_dropout_rate, input_pos_dropout_rate, out_dim, ) = check_and_prepare(net_part, blocks_arch, input_layer) pos_enc_class, self_attn_class = get_pos_enc_and_att_class( net_part, positional_encoding_type, self_attn_type) in_layer = build_input_layer( input_layer, idim, input_layer_odim, pos_enc_class, dropout_rate_embed, input_dropout_rate, input_pos_dropout_rate, padding_idx, ) for i in range(len(blocks_arch)): block_type = blocks_arch[i]["type"] if block_type == "tdnn": module = build_tdnn_block(blocks_arch[i]) elif block_type == "transformer": module = build_transformer_block( net_part, blocks_arch[i], positionwise_layer_type, positionwise_activation_type, ) elif block_type == "conformer": module = build_conformer_block( blocks_arch[i], self_attn_class, pos_enc_class, positionwise_layer_type, positionwise_activation_type, conv_mod_activation_type, ) elif block_type == "causal-conv1d": module = build_causal_conv1d_block(blocks_arch[i]) fn_modules.append(module) if repeat_block > 1: fn_modules = fn_modules * repeat_block return in_layer, MultiSequential(*[fn() for fn in fn_modules]), out_dim
def build_blocks( net_part: str, idim: int, input_layer_type: str, blocks: List[Dict[str, Any]], repeat_block: int = 0, self_attn_type: str = "self_attn", positional_encoding_type: str = "abs_pos", positionwise_layer_type: str = "linear", positionwise_activation_type: str = "relu", conv_mod_activation_type: str = "relu", input_layer_dropout_rate: float = 0.0, input_layer_pos_enc_dropout_rate: float = 0.0, padding_idx: int = -1, ) -> Tuple[Union[Conv2dSubsampling, VGG2L, torch.nn.Sequential], MultiSequential, int, int]: """Build custom model blocks. Args: net_part: Network part, either 'encoder' or 'decoder'. idim: Input dimension. input_layer: Input layer type. blocks: Blocks parameters for network part. repeat_block: Number of times provided blocks are repeated. positional_encoding_type: Positional encoding layer type. positionwise_layer_type: Positionwise layer type. positionwise_activation_type: Positionwise activation type. conv_mod_activation_type: Convolutional module activation type. input_layer_dropout_rate: Dropout rate for input layer. input_layer_pos_enc_dropout_rate: Dropout rate for input layer pos. enc. padding_idx: Padding symbol ID for embedding layer. Returns: in_layer: Input layer all_blocks: Encoder/Decoder network. out_dim: Network output dimension. conv_subsampling_factor: Subsampling factor in frontend CNN. """ fn_modules = [] pos_enc_class, self_attn_class = get_pos_enc_and_att_class( net_part, positional_encoding_type, self_attn_type) input_block = prepare_input_layer( input_layer_type, idim, blocks, input_layer_dropout_rate, input_layer_pos_enc_dropout_rate, ) out_dim = prepare_body_model(net_part, blocks) input_layer, conv_subsampling_factor = build_input_layer( input_block, pos_enc_class, padding_idx, ) for i in range(len(blocks)): block_type = blocks[i]["type"] if block_type in ("causal-conv1d", "conv1d"): module = build_conv1d_block(blocks[i], block_type) elif block_type == "conformer": module = build_conformer_block( blocks[i], self_attn_class, positionwise_layer_type, positionwise_activation_type, conv_mod_activation_type, ) elif block_type == "transformer": module = build_transformer_block( net_part, blocks[i], positionwise_layer_type, positionwise_activation_type, ) fn_modules.append(module) if repeat_block > 1: fn_modules = fn_modules * repeat_block return ( input_layer, MultiSequential(*[fn() for fn in fn_modules]), out_dim, conv_subsampling_factor, )
def build_blocks( net_part: str, idim: int, input_layer_type: str, blocks_arch: List, repeat_block: int = 0, self_attn_type: str = "self_attn", positional_encoding_type: str = "abs_pos", positionwise_layer_type: str = "linear", positionwise_activation_type: str = "relu", conv_mod_activation_type: str = "relu", dropout_rate_embed: float = 0.0, padding_idx: int = -1, ) -> Tuple[ Union[Conv2dSubsampling, VGG2L, torch.nn.Sequential], MultiSequential, int, int ]: """Build block for customizable architecture. Args: net_part: Network part, either 'encoder' or 'decoder'. idim: Input dimension. input_layer: Input layer type. blocks_arch: Block architecture (types and parameters) for network part. repeat_block: Number of times blocks_arch is repeated. positional_encoding_type: Positional encoding layer type. positionwise_layer_type: Positionwise layer type. positionwise_activation_type: Positionwise activation type. conv_mod_activation_type: Convolutional module activation type. dropout_rate_embed: Dropout rate for embedding layer. padding_idx: Padding symbol ID for embedding layer. Returns: in_layer: Input layer all_blocks: (Encoder or Decoder) network. out_dim: Network output dimension. conv_subsampling_factor: Subsampling factor in frontend CNN. """ fn_modules = [] ( input_layer_type, input_layer_odim, input_dropout_rate, input_pos_dropout_rate, out_dim, ) = check_and_prepare(net_part, blocks_arch, input_layer_type) pos_enc_class, self_attn_class = get_pos_enc_and_att_class( net_part, positional_encoding_type, self_attn_type ) in_layer, conv_subsampling_factor = build_input_layer( input_layer_type, idim, input_layer_odim, pos_enc_class, dropout_rate_embed, input_dropout_rate, input_pos_dropout_rate, padding_idx, ) for i in range(len(blocks_arch)): block_type = blocks_arch[i]["type"] if block_type == "tdnn": module = build_tdnn_block(blocks_arch[i]) elif block_type == "transformer": module = build_transformer_block( net_part, blocks_arch[i], positionwise_layer_type, positionwise_activation_type, ) elif block_type == "conformer": module = build_conformer_block( blocks_arch[i], self_attn_class, positionwise_layer_type, positionwise_activation_type, conv_mod_activation_type, ) elif block_type == "causal-conv1d": module = build_causal_conv1d_block(blocks_arch[i]) fn_modules.append(module) if repeat_block > 1: fn_modules = fn_modules * repeat_block return ( in_layer, MultiSequential(*[fn() for fn in fn_modules]), out_dim, conv_subsampling_factor, )