コード例 #1
0
ファイル: jasper.py プロジェクト: yoks/NeMo
    def __init__(
        self,
        feat_in,
        num_classes,
        emb_sizes=[1024, 1024],
        pool_mode='xvector',
        init_mode="xavier_uniform",
    ):
        super().__init__()

        if type(emb_sizes) is str:
            emb_sizes = emb_sizes.split(',')
        else:
            emb_sizes = list(emb_sizes)

        self._num_classes = num_classes
        self._pooling = StatsPoolLayer(feat_in=feat_in, pool_mode=pool_mode)
        self._feat_in = self._pooling.feat_in

        shapes = [self._feat_in]
        for size in emb_sizes:
            shapes.append(int(size))

        emb_layers = []
        for shape_in, shape_out in zip(shapes[:-1], shapes[1:]):
            layer = self.affineLayer(shape_in, shape_out, learn_mean=False)
            emb_layers.append(layer)

        self.emb_layers = nn.ModuleList(emb_layers)

        self.final = nn.Linear(shapes[-1], self._num_classes)

        self.apply(lambda x: init_weights(x, mode=init_mode))
        self.to(self._device)
コード例 #2
0
ファイル: conv_asr.py プロジェクト: vadam5/NeMo
    def __init__(
        self,
        feat_in: int,
        num_classes: int,
        init_mode: Optional[str] = "xavier_uniform",
        return_logits: bool = True,
        pooling_type='avg',
    ):
        super().__init__()

        self._feat_in = feat_in
        self._return_logits = return_logits
        self._num_classes = num_classes

        if pooling_type == 'avg':
            self.pooling = torch.nn.AdaptiveAvgPool1d(1)
        elif pooling_type == 'max':
            self.pooling = torch.nn.AdaptiveMaxPool1d(1)
        else:
            raise ValueError(
                'Pooling type chosen is not valid. Must be either `avg` or `max`'
            )

        self.decoder_layers = torch.nn.Sequential(
            torch.nn.Linear(self._feat_in, self._num_classes, bias=True))
        self.apply(lambda x: init_weights(x, mode=init_mode))
コード例 #3
0
ファイル: jasper.py プロジェクト: yoks/NeMo
    def __init__(self,
                 *,
                 feat_in,
                 num_classes,
                 init_mode="xavier_uniform",
                 return_logits=True,
                 pooling_type='avg',
                 **kwargs):
        TrainableNM.__init__(self, **kwargs)

        self._feat_in = feat_in
        self._return_logits = return_logits
        self._num_classes = num_classes

        if pooling_type == 'avg':
            self.pooling = nn.AdaptiveAvgPool1d(1)
        elif pooling_type == 'max':
            self.pooling = nn.AdaptiveMaxPool1d(1)
        else:
            raise ValueError(
                'Pooling type chosen is not valid. Must be either `avg` or `max`'
            )

        self.decoder_layers = nn.Sequential(
            nn.Linear(self._feat_in, self._num_classes, bias=True))
        self.apply(lambda x: init_weights(x, mode=init_mode))
        self.to(self._device)
コード例 #4
0
ファイル: conv_asr.py プロジェクト: kssteven418/Q-ASR
    def __init__(self,
                 feat_in,
                 num_classes,
                 init_mode="xavier_uniform",
                 vocabulary=None,
                 quant_mode='none',
                 quant_bit=8):
        super().__init__()
        self.quant_mode = quant_mode

        if vocabulary is not None:
            if num_classes != len(vocabulary):
                raise ValueError(
                    f"If vocabulary is specified, it's length should be equal to the num_classes. Instead got: num_classes={num_classes} and len(vocabulary)={len(vocabulary)}"
                )
            self.__vocabulary = vocabulary
        self._feat_in = feat_in
        # Add 1 for blank char
        self._num_classes = num_classes + 1
        self.act = QuantAct(quant_bit,
                            quant_mode=self.quant_mode,
                            per_channel=False)
        conv = torch.nn.Conv1d(self._feat_in,
                               self._num_classes,
                               kernel_size=1,
                               bias=True)
        qconv = QuantConv1d(quant_bit,
                            bias_bit=32,
                            quant_mode=self.quant_mode,
                            per_channel=True)
        qconv.set_param(conv)

        self.decoder_layers = torch.nn.Sequential(qconv)
        self.apply(lambda x: init_weights(x, mode=init_mode))
コード例 #5
0
ファイル: conv_asr.py プロジェクト: wgfi110/NeMo
    def __init__(self, feat_in, num_classes, init_mode="xavier_uniform", vocabulary=None):
        super().__init__()

        if vocabulary is not None:
            if num_classes != len(vocabulary):
                raise ValueError(
                    f"If vocabulary is specified, it's length should be equal to the num_classes. Instead got: num_classes={num_classes} and len(vocabulary)={len(vocabulary)}"
                )
            self.__vocabulary = vocabulary
        self._feat_in = feat_in
        # Add 1 for blank char
        self._num_classes = num_classes + 1

        self.decoder_layers = torch.nn.Sequential(
            torch.nn.Conv1d(self._feat_in, self._num_classes, kernel_size=1, bias=True)
        )
        self.apply(lambda x: init_weights(x, mode=init_mode))
コード例 #6
0
ファイル: conv_asr.py プロジェクト: vadam5/NeMo
    def __init__(
        self,
        feat_in,
        num_classes,
        emb_sizes=None,
        pool_mode='xvector',
        angular=False,
        init_mode="xavier_uniform",
    ):
        super().__init__()
        self.angular = angular
        self.emb_id = 2
        if self.angular:
            bias = False
        else:
            bias = True

        if type(emb_sizes) is str:
            emb_sizes = emb_sizes.split(',')
        elif type(emb_sizes) is int:
            emb_sizes = [emb_sizes]
        else:
            emb_sizes = [512, 512]

        self.input_feat_in = feat_in
        self._num_classes = num_classes
        self._pooling = StatsPoolLayer(feat_in=feat_in, pool_mode=pool_mode)
        self._feat_in = self._pooling.feat_in

        shapes = [self._feat_in]
        for size in emb_sizes:
            shapes.append(int(size))

        emb_layers = []
        for shape_in, shape_out in zip(shapes[:-1], shapes[1:]):
            layer = self.affineLayer(shape_in, shape_out, learn_mean=False)
            emb_layers.append(layer)

        self.emb_layers = nn.ModuleList(emb_layers)

        self.final = nn.Linear(shapes[-1], self._num_classes, bias=bias)

        self.apply(lambda x: init_weights(x, mode=init_mode))
コード例 #7
0
ファイル: jasper.py プロジェクト: yidong72/NeMo
    def __init__(self,
                 feat_in,
                 num_classes,
                 emb_sizes=[1024, 1024],
                 pool_mode='xvector',
                 init_mode="xavier_uniform"):
        TrainableNM.__init__(self)
        self._feat_in = 0
        if pool_mode == 'gram':
            gram = True
            super_vector = False
        elif pool_mode == 'superVector':
            gram = True
            super_vector = True
        else:
            gram = False
            super_vector = False

        if gram:
            self._feat_in += feat_in**2
        else:
            self._feat_in += 2 * feat_in

        if super_vector and gram:
            self._feat_in += 2 * feat_in

        self._midEmbd1 = int(emb_sizes[0])  # Spkr Vector Embedding Shape
        self._midEmbd2 = int(emb_sizes[1]) if len(
            emb_sizes) > 1 else 0  # Spkr Vector Embedding Shape

        self._num_classes = num_classes
        self._pooling = StatsPoolLayer(gram=gram, super_vector=super_vector)

        self.mid1 = self.affineLayer(self._feat_in,
                                     self._midEmbd1,
                                     learn_mean=False)
        self.mid2 = self.affineLayer(self._midEmbd1,
                                     self._midEmbd2,
                                     learn_mean=False)
        self.final = nn.Linear(self._midEmbd2, self._num_classes)

        self.apply(lambda x: init_weights(x, mode=init_mode))
        self.to(self._device)
コード例 #8
0
ファイル: conv_asr.py プロジェクト: vadam5/NeMo
    def __init__(
        self,
        jasper,
        activation: str,
        feat_in: int,
        normalization_mode: str = "batch",
        residual_mode: str = "add",
        norm_groups: int = -1,
        conv_mask: bool = True,
        frame_splicing: int = 1,
        init_mode: Optional[str] = 'xavier_uniform',
        quantize: bool = False,
    ):
        super().__init__()
        if isinstance(jasper, ListConfig):
            jasper = OmegaConf.to_container(jasper)

        activation = jasper_activations[activation]()
        feat_in = feat_in * frame_splicing

        self._feat_in = feat_in

        residual_panes = []
        encoder_layers = []
        self.dense_residual = False
        for lcfg in jasper:
            dense_res = []
            if lcfg.get('residual_dense', False):
                residual_panes.append(feat_in)
                dense_res = residual_panes
                self.dense_residual = True
            groups = lcfg.get('groups', 1)
            separable = lcfg.get('separable', False)
            heads = lcfg.get('heads', -1)
            residual_mode = lcfg.get('residual_mode', residual_mode)
            se = lcfg.get('se', False)
            se_reduction_ratio = lcfg.get('se_reduction_ratio', 8)
            se_context_window = lcfg.get('se_context_size', -1)
            se_interpolation_mode = lcfg.get('se_interpolation_mode',
                                             'nearest')
            kernel_size_factor = lcfg.get('kernel_size_factor', 1.0)
            stride_last = lcfg.get('stride_last', False)
            future_context = lcfg.get('future_context', -1)
            encoder_layers.append(
                JasperBlock(
                    feat_in,
                    lcfg['filters'],
                    repeat=lcfg['repeat'],
                    kernel_size=lcfg['kernel'],
                    stride=lcfg['stride'],
                    dilation=lcfg['dilation'],
                    dropout=lcfg['dropout'],
                    residual=lcfg['residual'],
                    groups=groups,
                    separable=separable,
                    heads=heads,
                    residual_mode=residual_mode,
                    normalization=normalization_mode,
                    norm_groups=norm_groups,
                    activation=activation,
                    residual_panes=dense_res,
                    conv_mask=conv_mask,
                    se=se,
                    se_reduction_ratio=se_reduction_ratio,
                    se_context_window=se_context_window,
                    se_interpolation_mode=se_interpolation_mode,
                    kernel_size_factor=kernel_size_factor,
                    stride_last=stride_last,
                    future_context=future_context,
                    quantize=quantize,
                ))
            feat_in = lcfg['filters']

        self._feat_out = feat_in

        self.encoder = torch.nn.Sequential(*encoder_layers)
        self.apply(lambda x: init_weights(x, mode=init_mode))
コード例 #9
0
 def __init__(self, feat_in=128 * 8, emb_size=128, init_mode="xavier_uniform"):
     super().__init__()
     self.linear = nn.Linear(feat_in, emb_size)
     self.apply(lambda x: init_weights(x, mode=init_mode))
     self.to(self._device)
     self.emb_size= emb_size
コード例 #10
0
ファイル: jasper.py プロジェクト: yoks/NeMo
    def __init__(
        self,
        jasper,
        activation,
        feat_in,
        normalization_mode="batch",
        residual_mode="add",
        norm_groups=-1,
        conv_mask=True,
        frame_splicing=1,
        init_mode='xavier_uniform',
    ):
        super().__init__()

        activation = jasper_activations[activation]()
        feat_in = feat_in * frame_splicing

        self.__feat_in = feat_in

        residual_panes = []
        encoder_layers = []
        self.dense_residual = False
        for lcfg in jasper:
            dense_res = []
            if lcfg.get('residual_dense', False):
                residual_panes.append(feat_in)
                dense_res = residual_panes
                self.dense_residual = True
            groups = lcfg.get('groups', 1)
            separable = lcfg.get('separable', False)
            heads = lcfg.get('heads', -1)
            residual_mode = lcfg.get('residual_mode', residual_mode)
            se = lcfg.get('se', False)
            se_reduction_ratio = lcfg.get('se_reduction_ratio', 8)
            se_context_window = lcfg.get('se_context_window', -1)
            se_interpolation_mode = lcfg.get('se_interpolation_mode',
                                             'nearest')
            kernel_size_factor = lcfg.get('kernel_size_factor', 1.0)
            stride_last = lcfg.get('stride_last', False)
            encoder_layers.append(
                JasperBlock(
                    feat_in,
                    lcfg['filters'],
                    repeat=lcfg['repeat'],
                    kernel_size=lcfg['kernel'],
                    stride=lcfg['stride'],
                    dilation=lcfg['dilation'],
                    dropout=lcfg['dropout'],
                    residual=lcfg['residual'],
                    groups=groups,
                    separable=separable,
                    heads=heads,
                    residual_mode=residual_mode,
                    normalization=normalization_mode,
                    norm_groups=norm_groups,
                    activation=activation,
                    residual_panes=dense_res,
                    conv_mask=conv_mask,
                    se=se,
                    se_reduction_ratio=se_reduction_ratio,
                    se_context_window=se_context_window,
                    se_interpolation_mode=se_interpolation_mode,
                    kernel_size_factor=kernel_size_factor,
                    stride_last=stride_last,
                ))
            feat_in = lcfg['filters']

        self.encoder = nn.Sequential(*encoder_layers)
        self.apply(lambda x: init_weights(x, mode=init_mode))
        self.to(self._device)