def make_aggregator(): if cfg.aggregator == "cnn": agg_layers = eval(cfg.conv_aggregator_layers) agg_dim = agg_layers[-1][0] feature_aggregator = ConvAggegator( conv_layers=agg_layers, embed=embed, dropout=cfg.dropout, skip_connections=cfg.skip_connections_agg, residual_scale=cfg.residual_scale, non_affine_group_norm=cfg.non_affine_group_norm, conv_bias=not cfg.no_conv_bias, zero_pad=cfg.agg_zero_pad, activation=activation, ) elif cfg.aggregator == "gru": agg_dim = cfg.gru_dim feature_aggregator = nn.Sequential( TransposeLast(), nn.GRU( input_size=embed, hidden_size=agg_dim, num_layers=1, dropout=cfg.dropout, ), TransposeLast(deconstruct_idx=0), ) else: raise Exception("unknown aggregator type " + cfg.aggregator) return feature_aggregator, agg_dim
def __init__(self, input_dim, output_dim, cfg: Wav2vec_UConfig): super().__init__() self.cfg = cfg self.output_dim = output_dim self.stride = cfg.generator_stride self.dropout = nn.Dropout(cfg.generator_dropout) self.batch_norm = cfg.generator_batch_norm != 0 self.residual = cfg.generator_residual padding = (cfg.generator_kernel // 2 if cfg.generator_pad < 0 else cfg.generator_pad) self.proj = nn.Sequential( TransposeLast(), nn.Conv1d( input_dim, output_dim, kernel_size=cfg.generator_kernel, stride=cfg.generator_stride, dilation=cfg.generator_dilation, padding=padding, bias=cfg.generator_bias, ), TransposeLast(), ) if self.batch_norm: self.bn = nn.BatchNorm1d(input_dim) self.bn.weight.data.fill_(cfg.generator_batch_norm) if self.residual: self.in_proj = nn.Linear(input_dim, input_dim)
def norm_block(is_layer_norm, dim, affine=True): if is_layer_norm: mod = nn.Sequential( TransposeLast(), Fp32LayerNorm(dim, elementwise_affine=affine), TransposeLast(), ) else: mod = Fp32GroupNorm(1, dim, affine=affine) return mod
def block( n_in, n_out, k, stride, padding, is_layer_norm=False, is_group_norm=False, conv_bias=False, ): def make_conv(): assert len(k) == 2 conv = nn.Conv2d(n_in, n_out, k, stride=stride, bias=conv_bias, padding=padding) nn.init.kaiming_normal_(conv.weight) return conv assert (is_layer_norm and is_group_norm ) == False, "layer norm and group norm are exclusive" if is_layer_norm: assert False # JCh: didn't check teh shapes return nn.Sequential( make_conv(), nn.Dropout(p=dropout), nn.Sequential( TransposeLast(), Fp32LayerNorm(dim, elementwise_affine=True), TransposeLast(), ), nn.GELU(), ) elif is_group_norm: return nn.Sequential( make_conv(), nn.Dropout(p=dropout), Fp32GroupNorm(dim, dim, affine=True), nn.GELU(), ) else: return nn.Sequential(make_conv(), nn.Dropout(p=dropout), nn.GELU())
def make_conv_block(e, k, g, l): return nn.Sequential(*[ nn.Sequential( nn.Conv1d( e, e, kernel_size=k, padding=k // 2, groups=g, ), SamePad(k), TransposeLast(), LayerNorm(e, elementwise_affine=False), TransposeLast(), nn.GELU(), ) for _ in range(l) ])
def __init__(self, input_dim, output_dim, cfg: Wav2vec_UConfig): super().__init__() self.cfg = cfg self.output_dim = output_dim self.stride = cfg.generator_stride self.dropout = nn.Dropout(cfg.generator_dropout) padding = cfg.generator_kernel // 2 self.proj = nn.Sequential( TransposeLast(), nn.Conv1d( input_dim, output_dim, kernel_size=cfg.generator_kernel, stride=cfg.generator_stride, dilation=cfg.generator_dilation, padding=padding, bias=cfg.generator_bias, ), TransposeLast(), )
def conv_1d_block(n_in, n_out, k, stride, dropout=0.0, is_layer_norm=False, is_group_norm=False, conv_bias=False): def make_conv(): conv = nn.Conv1d(n_in, n_out, k, stride=stride, bias=conv_bias) nn.init.kaiming_normal_(conv.weight) return conv assert (is_layer_norm and is_group_norm) == False, "layer norm and group norm are exclusive" if is_layer_norm: return nn.Sequential( make_conv(), nn.Dropout(p=dropout), nn.Sequential( TransposeLast(), Fp32LayerNorm(n_out, elementwise_affine=True), TransposeLast(), ), nn.GELU(), ) elif is_group_norm: return nn.Sequential( make_conv(), nn.Dropout(p=dropout), Fp32GroupNorm(n_out, n_out, affine=True), nn.GELU(), ) else: return nn.Sequential(make_conv(), nn.Dropout(p=dropout), nn.GELU())