def test_Encoder_forward_backward( input_layer, positionwise_layer_type, interctc_layer_idx, interctc_use_conditioning, ): encoder = TransformerEncoder( 20, output_size=40, input_layer=input_layer, positionwise_layer_type=positionwise_layer_type, interctc_layer_idx=interctc_layer_idx, interctc_use_conditioning=interctc_use_conditioning, ) if input_layer == "embed": x = torch.randint(0, 10, [2, 10]) else: x = torch.randn(2, 10, 20, requires_grad=True) x_lens = torch.LongTensor([10, 8]) if len(interctc_layer_idx) > 0: ctc = None if interctc_use_conditioning: vocab_size = 5 output_size = encoder.output_size() ctc = CTC(odim=vocab_size, encoder_output_size=output_size) encoder.conditioning_layer = torch.nn.Linear( vocab_size, output_size) y, _, _ = encoder(x, x_lens, ctc=ctc) y = y[0] else: y, _, _ = encoder(x, x_lens) y.sum().backward()
def test_Encoder_output_size(): encoder = TransformerEncoder(20, output_size=256) assert encoder.output_size() == 256
hop_length=16, n_mels=32, ) diar_encoder = TransformerEncoder( input_layer="linear", num_blocks=1, linear_units=32, output_size=16, attention_heads=2, input_size=tcn_separator.output_dim + diar_frontend.output_size(), ) diar_decoder = LinearDecoder( num_spk=2, encoder_output_size=diar_encoder.output_size(), ) @pytest.mark.parametrize("label_aggregator", [label_aggregator]) @pytest.mark.parametrize("enh_encoder, enh_decoder", [(enh_encoder, enh_decoder)]) @pytest.mark.parametrize("enh_separator", [tcn_separator]) @pytest.mark.parametrize("mask_module", [mask_module]) @pytest.mark.parametrize("training", [True, False]) @pytest.mark.parametrize("loss_wrappers", [[fix_order_solver]]) @pytest.mark.parametrize("diar_frontend", [diar_frontend]) @pytest.mark.parametrize("diar_encoder, diar_decoder", [(diar_encoder, diar_decoder)]) def test_enh_diar_model( enh_encoder,
hop_length=16, n_mels=10, ) encoder = TransformerEncoder( input_size=10, input_layer="linear", num_blocks=1, linear_units=32, output_size=16, attention_heads=2, ) decoder = LinearDecoder( num_spk=2, encoder_output_size=encoder.output_size(), ) rnn_attractor = RnnAttractor(unit=16, encoder_output_size=encoder.output_size()) label_aggregator = LabelAggregate( win_length=32, hop_length=16, ) @pytest.mark.parametrize( "frontend, encoder, decoder, label_aggregator", [(frontend, encoder, decoder, label_aggregator)], )