コード例 #1
0
    def __init__(self,
                 n_fft=2048,
                 hop_length=None,
                 win_length=None,
                 window='hann',
                 center=True,
                 pad_mode='reflect',
                 freeze_parameters=True):
        """Calculate spectrogram using pytorch. The STFT is implemented with 
        Conv1d. The function has the same output of librosa.core.stft
        """
        super(ISTFT, self).__init__()

        assert pad_mode in ['constant', 'reflect']

        self.n_fft = n_fft
        self.hop_length = hop_length
        self.win_length = win_length
        self.window = window
        self.center = center
        self.pad_mode = pad_mode

        # By default, use the entire frame
        if win_length is None:
            win_length = n_fft

        # Set the default hop, if it's not already specified
        if hop_length is None:
            hop_length = int(win_length // 4)

        ifft_window = librosa.filters.get_window(window,
                                                 win_length,
                                                 fftbins=True)

        # Pad the window out to n_fft size
        ifft_window = librosa.util.pad_center(ifft_window, n_fft)

        # DFT & IDFT matrix
        self.W = self.idft_matrix(n_fft) / n_fft

        self.conv_real = nn.Conv1d(in_channels=n_fft,
                                   out_channels=n_fft,
                                   kernel_size=1,
                                   stride=1,
                                   padding=0,
                                   dilation=1,
                                   groups=1,
                                   bias=False)

        self.conv_imag = nn.Conv1d(in_channels=n_fft,
                                   out_channels=n_fft,
                                   kernel_size=1,
                                   stride=1,
                                   padding=0,
                                   dilation=1,
                                   groups=1,
                                   bias=False)

        self.conv_real.weight.data = torch.Tensor(
            np.real(self.W * ifft_window[None, :]).T)[:, :, None]
        # (n_fft // 2 + 1, 1, n_fft)

        self.conv_imag.weight.data = torch.Tensor(
            np.imag(self.W * ifft_window[None, :]).T)[:, :, None]
        # (n_fft // 2 + 1, 1, n_fft)

        if freeze_parameters:
            for param in self.parameters():
                param.requires_grad = False
コード例 #2
0
ファイル: frontend.py プロジェクト: dvisockas/leaf
  def __init__(
      self,
      learn_pooling: bool = True,
      learn_filters: bool = True,
      conv1d_cls=convolution.GaborConv1D,
      activation=activations.SquaredModulus(),
      pooling_cls=pooling.GaussianLowpass,
      n_filters: int = 40,
      sample_rate: int = 16000,
      window_len: float = 25.,
      window_stride: float = 10.,
      compression_fn = postprocessing.PCEN(
          alpha=0.96,
          smooth_coef=0.04,
          delta=2.0,
          floor=1e-12,
          trainable=True,
          learn_smooth_coef=True,
          per_channel_smooth_coef=True),
      preemp: bool = False,
      preemp_init = initializers.PreempInit,
      complex_conv_init = initializers.GaborInit(
          sample_rate=16000, min_freq=60.0, max_freq=7800.0),
      pooling_init = initializers.ConstInit(0.4),
      regularizer_fn = None,
      mean_var_norm: bool = False,
      spec_augment: bool = False):
    super(Leaf, self).__init__()

    window_size = int(sample_rate * window_len // 1000 + 1)
    window_stride = int(sample_rate * window_stride // 1000)

    #TODO: All tf 'SAME' paddings are set to 0, check if it's ok
    if preemp:
      self._preemp_conv = nn.Conv1d(
        in_channels=1,
        out_channels=1,
        kernel_size=2,
        stride=1,
        padding=0,
        bias=False,
      )

      for parameter in self._preemp_conv.parameters:
        parameter.requires_grad = learn_filters

    self._complex_conv = conv1d_cls(
        filters=2 * n_filters,
        kernel_size=window_size,
        strides=1,
        padding=0,
        use_bias=False,
        #input_shape=(None, None, 1),
        kernel_initializer=complex_conv_init,
        kernel_regularizer=regularizer_fn if learn_filters else None,
        trainable=learn_filters)

    self._activation = activation
    self._pooling = pooling_cls(
        kernel_size=window_size,
        strides=window_stride,
        padding=0,
        use_bias=False,
        kernel_initializer=pooling_init,
        kernel_regularizer=regularizer_fn if learn_pooling else None,
        trainable=learn_pooling)

    if mean_var_norm:
      self._instance_norm = nn.InstanceNorm1d(n_filters, affine=True, eps=1e-6)

    self._compress_fn = compression_fn if compression_fn else torch.clone
    self._spec_augment_fn = postprocessing.SpecAugment() if spec_augment else torch.clone

    self._preemp = preemp
コード例 #3
0
ファイル: model.py プロジェクト: CookiePPP/VocoderComparisons
 def __init__(self, input_size, output_size):
   super().__init__()
   self.encoding = PositionalEncoding(input_size)
   self.input_conv = nn.Conv1d(input_size, input_size, 3, padding=1)
   self.output_conv = nn.Conv1d(input_size, output_size * 2, 3, padding=1)
   self.reset_parameters()
コード例 #4
0
 def __init__(self, input_size, use_stn=False, use_attention=False):
     super(PPG2ECG, self).__init__()
     self.use_stn = use_stn
     self.use_attention = use_attention
     # build main transformer
     self.main = nn.Sequential(
         # encoder
         nn.Conv1d(1, 32, kernel_size=31, stride=2, padding=15),
         nn.PReLU(32),
         nn.Conv1d(32, 64, 31, 1, 15),
         nn.PReLU(64),
         nn.Conv1d(64, 128, 31, 2, 15),
         nn.PReLU(128),
         nn.Conv1d(128, 256, 31, 1, 15),
         nn.PReLU(256),
         nn.Conv1d(256, 512, 31, 2, 15),
         nn.PReLU(512),
         # decoder
         nn.ConvTranspose1d(
             512, 256, kernel_size=31, stride=2,
             padding=15, output_padding=1),
         nn.PReLU(256),
         nn.ConvTranspose1d(256, 128, 31, 1, 15),
         nn.PReLU(128),
         nn.ConvTranspose1d(128, 64, 31, 2, 15, 1),
         nn.PReLU(64),
         nn.ConvTranspose1d(64, 32, 31, 1, 15),
         nn.PReLU(32),
         nn.ConvTranspose1d(32, 1, 31, 2, 15, 1),
         nn.Tanh(),
     )
     # build stn (optional)
     if use_stn:
         # pylint: disable=not-callable
         self.restriction = torch.tensor(
             [1, 0, 0, 0], dtype=torch.float, requires_grad=False)
         self.register_buffer('restriction_const', self.restriction)
         self.stn_conv = nn.Sequential(
             nn.Conv1d(
                 in_channels=1, out_channels=8, kernel_size=7, stride=1),
             nn.MaxPool1d(kernel_size=2, stride=2),
             nn.Conv1d(
                 in_channels=8, out_channels=10, kernel_size=5, stride=1),
             nn.MaxPool1d(kernel_size=2, stride=2),
         )
         n_stn_conv = self.get_stn_conv_out(input_size)
         self.stn_fc = nn.Sequential(
             Flatten(),
             nn.Linear(n_stn_conv, 32),
             nn.ReLU(True),
             nn.Linear(32, 4)
         )
         self.stn_fc[3].weight.data.zero_()
         self.stn_fc[3].bias.data = torch.FloatTensor([1, 0, 1, 0])
     # build attention network (optional)
     if use_attention:
         self.attn = nn.Sequential(
             nn.Linear(input_size, input_size),
             nn.ReLU(),
             nn.Linear(input_size, input_size)
         )
         self.attn_len = input_size
コード例 #5
0
 def __init__(self, in_c, out_c, ks=3, stride=1, padding=1, bias=False):
     super(Conv, self).__init__()
     self.conv = nn.Conv1d(in_channels=in_c, out_channels=out_c, kernel_size=ks, stride=stride, bias=bias, padding=padding)
     self.act = nn.LeakyReLU()
     self.in_size = in_c
     self.out_size = out_c
コード例 #6
0
 def __init__(self):
     super(ImageNet, self).__init__()
     self.conv1 = nn.Conv1d(in_channels=51, out_channels=1, kernel_size=1)
     self.fc1 = nn.Linear(2053, 128)
     self.tanh = torch.nn.Tanh()
コード例 #7
0
ファイル: model.py プロジェクト: ArthLeu/beta-capsnet
 def __init__(self, latent_caps_size, latent_vec_size, num_classes):
     super(CapsSegNet, self).__init__()
     self.num_classes = num_classes
     self.latent_caps_size = latent_caps_size
     self.seg_convs = nn.Conv1d(latent_vec_size + 16, num_classes, 1)
コード例 #8
0
ファイル: model.py プロジェクト: Xirzag/demucs
    def __init__(self,
                 sources=4,
                 audio_channels=2,
                 channels=64,
                 depth=6,
                 rewrite=True,
                 glu=True,
                 upsample=False,
                 rescale=0.1,
                 kernel_size=8,
                 stride=4,
                 growth=2.,
                 lstm_layers=2,
                 context=3):
        """
        Args:
            sources (int): number of sources to separate
            audio_channels (int): stereo or mono
            channels (int): first convolution channels
            depth (int): number of encoder/decoder layers
            rewrite (bool): add 1x1 convolution to each encoder layer
                and a convolution to each decoder layer.
                For the decoder layer, `context` gives the kernel size.
            glu (bool): use glu instead of ReLU
            upsample (bool): use linear upsampling with convolutions
                Wave-U-Net style, instead of transposed convolutions
            rescale (int): rescale initial weights of convolutions
                to get their standard deviation closer to `rescale`
            kernel_size (int): kernel size for convolutions
            stride (int): stride for convolutions
            growth (float): multiply (resp divide) number of channels by that
                for each layer of the encoder (resp decoder)
            lstm_layers (int): number of lstm layers, 0 = no lstm
            context (int): kernel size of the convolution in the
                decoder before the transposed convolution. If > 1,
                will provide some context from neighboring time
                steps.
        """

        super().__init__()

        n_d = [
            sources, audio_channels, channels, depth, rewrite, glu, upsample,
            rescale, kernel_size, stride, growth, lstm_layers, context
        ]
        n_s = [
            'sources', 'audio_channels', 'channels', 'depth', 'rewrite', 'glu',
            'upsample', 'rescale', 'kernel_size', 'stride', 'growth',
            'lstm_layers', 'context'
        ]

        [print(s, n, '\n') for n, s in zip(n_d, n_s)]

        self.audio_channels = audio_channels
        self.sources = sources
        self.kernel_size = kernel_size
        self.context = context
        self.stride = stride
        self.depth = depth
        self.upsample = upsample
        self.channels = channels

        self.encoder = nn.ModuleList()
        self.decoder = nn.ModuleList()

        self.final = None
        if upsample:
            self.final = nn.Conv1d(channels + audio_channels,
                                   sources * audio_channels, 1)
            stride = 1

        if glu:
            activation = nn.GLU(dim=1)
            ch_scale = 2
        else:
            activation = nn.ReLU()
            ch_scale = 1
        in_channels = audio_channels
        for index in range(depth):
            encode = []
            encode += [
                nn.Conv1d(in_channels, channels, kernel_size, stride),
                nn.ReLU()
            ]
            if rewrite:
                encode += [
                    nn.Conv1d(channels, ch_scale * channels, 1), activation
                ]
            self.encoder.append(nn.Sequential(*encode))

            decode = []
            if index > 0:
                out_channels = in_channels
            else:
                if upsample:
                    out_channels = channels
                else:
                    out_channels = sources * audio_channels
            if rewrite:
                decode += [
                    nn.Conv1d(channels, ch_scale * channels, context),
                    activation
                ]
            if upsample:
                decode += [
                    nn.Conv1d(channels, out_channels, kernel_size, stride=1),
                ]
            else:
                decode += [
                    nn.ConvTranspose1d(channels, out_channels, kernel_size,
                                       stride)
                ]
            if index > 0:
                decode.append(nn.ReLU())
            self.decoder.insert(0, nn.Sequential(*decode))
            in_channels = channels
            channels = int(growth * channels)

        channels = in_channels

        if lstm_layers:
            self.lstm = BLSTM(channels, lstm_layers)
        else:
            self.lstm = None

        if rescale:
            rescale_module(self, reference=rescale)
コード例 #9
0
ファイル: wav2letter.py プロジェクト: yoyololicon/audio
    def __init__(self,
                 num_classes: int = 40,
                 input_type: str = "waveform",
                 num_features: int = 1) -> None:
        super(Wav2Letter, self).__init__()

        acoustic_num_features = 250 if input_type == "waveform" else num_features
        acoustic_model = nn.Sequential(
            nn.Conv1d(in_channels=acoustic_num_features,
                      out_channels=250,
                      kernel_size=48,
                      stride=2,
                      padding=23), nn.ReLU(inplace=True),
            nn.Conv1d(in_channels=250,
                      out_channels=250,
                      kernel_size=7,
                      stride=1,
                      padding=3), nn.ReLU(inplace=True),
            nn.Conv1d(in_channels=250,
                      out_channels=250,
                      kernel_size=7,
                      stride=1,
                      padding=3), nn.ReLU(inplace=True),
            nn.Conv1d(in_channels=250,
                      out_channels=250,
                      kernel_size=7,
                      stride=1,
                      padding=3), nn.ReLU(inplace=True),
            nn.Conv1d(in_channels=250,
                      out_channels=250,
                      kernel_size=7,
                      stride=1,
                      padding=3), nn.ReLU(inplace=True),
            nn.Conv1d(in_channels=250,
                      out_channels=250,
                      kernel_size=7,
                      stride=1,
                      padding=3), nn.ReLU(inplace=True),
            nn.Conv1d(in_channels=250,
                      out_channels=250,
                      kernel_size=7,
                      stride=1,
                      padding=3), nn.ReLU(inplace=True),
            nn.Conv1d(in_channels=250,
                      out_channels=250,
                      kernel_size=7,
                      stride=1,
                      padding=3), nn.ReLU(inplace=True),
            nn.Conv1d(in_channels=250,
                      out_channels=2000,
                      kernel_size=32,
                      stride=1,
                      padding=16), nn.ReLU(inplace=True),
            nn.Conv1d(in_channels=2000,
                      out_channels=2000,
                      kernel_size=1,
                      stride=1,
                      padding=0), nn.ReLU(inplace=True),
            nn.Conv1d(in_channels=2000,
                      out_channels=num_classes,
                      kernel_size=1,
                      stride=1,
                      padding=0), nn.ReLU(inplace=True))

        if input_type == "waveform":
            waveform_model = nn.Sequential(
                nn.Conv1d(in_channels=num_features,
                          out_channels=250,
                          kernel_size=250,
                          stride=160,
                          padding=45), nn.ReLU(inplace=True))
            self.acoustic_model = nn.Sequential(waveform_model, acoustic_model)

        if input_type in ["power_spectrum", "mfcc"]:
            self.acoustic_model = acoustic_model
コード例 #10
0
ファイル: model.py プロジェクト: monster912/CycleGAN-VC3
    def __init__(self):
        super(Generator, self).__init__()

        # 2D Conv Layer
        self.conv1 = nn.Conv2d(
            in_channels=1,  # TODO 1 ?
            out_channels=128,
            kernel_size=(5, 15),
            stride=(1, 1),
            padding=(2, 7))

        self.conv1_gates = nn.Conv2d(
            in_channels=1,  # TODO 1 ?
            out_channels=128,
            kernel_size=(5, 15),
            stride=1,
            padding=(2, 7))

        # 2D Downsample Layer
        self.downSample1 = downSample_Generator(in_channels=128,
                                                out_channels=256,
                                                kernel_size=5,
                                                stride=2,
                                                padding=2)

        self.downSample2 = downSample_Generator(in_channels=256,
                                                out_channels=256,
                                                kernel_size=5,
                                                stride=2,
                                                padding=2)

        # 2D -> 1D Conv
        self.conv2dto1dLayer = nn.Sequential(
            nn.Conv1d(in_channels=2304,
                      out_channels=256,
                      kernel_size=1,
                      stride=1,
                      padding=0),
            nn.InstanceNorm1d(num_features=256, affine=True))

        # Residual Blocks
        self.residualLayer1 = ResidualLayer(in_channels=256,
                                            out_channels=512,
                                            kernel_size=3,
                                            stride=1,
                                            padding=1)
        self.residualLayer2 = ResidualLayer(in_channels=256,
                                            out_channels=512,
                                            kernel_size=3,
                                            stride=1,
                                            padding=1)
        self.residualLayer3 = ResidualLayer(in_channels=256,
                                            out_channels=512,
                                            kernel_size=3,
                                            stride=1,
                                            padding=1)
        self.residualLayer4 = ResidualLayer(in_channels=256,
                                            out_channels=512,
                                            kernel_size=3,
                                            stride=1,
                                            padding=1)
        self.residualLayer5 = ResidualLayer(in_channels=256,
                                            out_channels=512,
                                            kernel_size=3,
                                            stride=1,
                                            padding=1)
        self.residualLayer6 = ResidualLayer(in_channels=256,
                                            out_channels=512,
                                            kernel_size=3,
                                            stride=1,
                                            padding=1)

        # 1D -> 2D Conv
        self.conv1dto2dLayer = nn.Sequential(
            nn.Conv1d(in_channels=256,
                      out_channels=2304,
                      kernel_size=1,
                      stride=1,
                      padding=0),
            nn.InstanceNorm1d(num_features=2304, affine=True))

        # UpSample Layer
        self.upSample1 = self.upSample(in_channels=256,
                                       out_channels=1024,
                                       kernel_size=5,
                                       stride=1,
                                       padding=2)

        self.upSample2 = self.upSample(in_channels=256,
                                       out_channels=512,
                                       kernel_size=5,
                                       stride=1,
                                       padding=2)

        self.lastConvLayer = nn.Conv2d(in_channels=128,
                                       out_channels=1,
                                       kernel_size=(5, 15),
                                       stride=(1, 1),
                                       padding=(2, 7))
コード例 #11
0
 def __init__(self, dropout=0.5):
     super(Conv, self).__init__()
     self.dropout = nn.Dropout(dropout)
     self.conv = nn.Conv1d(256, 256, 5, padding=2)
    def __init__(self, args):
        super(HPLFlowNetShallow, self).__init__()
        self.scales_filter_map = args.scales_filter_map
        assert len(self.scales_filter_map) == 5

        conv_module = Conv1dReLU

        self.conv1 = nn.Sequential(
            conv_module(args.dim, 32, use_leaky=args.use_leaky),
            conv_module(32, 32, use_leaky=args.use_leaky),
            conv_module(32, 64, use_leaky=args.use_leaky), )

        self.bcn1 = BilateralConvFlex(args.dim, self.scales_filter_map[0][1],
                                      64 + args.dim + 1, [64],
                                      args.DEVICE,
                                      use_bias=args.bcn_use_bias,
                                      use_leaky=args.use_leaky,
                                      use_norm=args.bcn_use_norm,
                                      do_splat=True,
                                      do_slice=False,
                                      last_relu=args.last_relu)

        self.bcn1_ = BilateralConvFlex(args.dim, self.scales_filter_map[0][1],
                                       args.dim + 1 + 64 + 64, [128],
                                       args.DEVICE,
                                       use_bias=args.bcn_use_bias,
                                       use_leaky=args.use_leaky,
                                       use_norm=args.bcn_use_norm,
                                       do_splat=False,
                                       do_slice=True,
                                       last_relu=args.last_relu)

        self.bcn2 = BilateralConvFlex(args.dim, self.scales_filter_map[1][1],
                                      64 + args.dim + 1, [64],
                                      args.DEVICE,
                                      use_bias=args.bcn_use_bias,
                                      use_leaky=args.use_leaky,
                                      use_norm=args.bcn_use_norm,
                                      do_splat=True,
                                      do_slice=False,
                                      last_relu=args.last_relu)

        self.bcn2_ = BilateralConvFlex(args.dim, self.scales_filter_map[1][1],
                                       args.dim + 1 + 64 + 64, [64],
                                       args.DEVICE,
                                       use_bias=args.bcn_use_bias,
                                       use_leaky=args.use_leaky,
                                       use_norm=args.bcn_use_norm,
                                       do_splat=False,
                                       do_slice=True,
                                       last_relu=args.last_relu)

        self.bcn3 = BilateralConvFlex(args.dim, self.scales_filter_map[2][1],
                                      64 + args.dim + 1, [64],
                                      args.DEVICE,
                                      use_bias=args.bcn_use_bias,
                                      use_leaky=args.use_leaky,
                                      use_norm=args.bcn_use_norm,
                                      do_splat=True,
                                      do_slice=False,
                                      last_relu=args.last_relu)

        self.bcn3_ = BilateralConvFlex(args.dim, self.scales_filter_map[2][1],
                                       args.dim + 1 + 64 * 2 + 64, [64],
                                       args.DEVICE,
                                       use_bias=args.bcn_use_bias,
                                       use_leaky=args.use_leaky,
                                       use_norm=args.bcn_use_norm,
                                       do_splat=False,
                                       do_slice=True,
                                       last_relu=args.last_relu)

        self.corr1 = BilateralCorrelationFlex(args.dim,
                                              self.scales_filter_map[2][2], self.scales_filter_map[2][3],
                                              64, [32], [32],
                                              args.DEVICE,
                                              use_bias=args.bcn_use_bias,
                                              use_leaky=args.use_leaky,
                                              use_norm=args.bcn_use_norm,
                                              prev_corr_dim=0,
                                              last_relu=args.last_relu)

        self.corr1_refine = nn.Sequential(conv_module(32 + args.dim + 1, 64, use_leaky=args.use_leaky),
                                          conv_module(64, 64, use_leaky=args.use_leaky),
                                          conv_module(64, 64, use_leaky=args.use_leaky),
                                          )

        self.bcn4 = BilateralConvFlex(args.dim, self.scales_filter_map[3][1],
                                      64 + args.dim + 1, [64], args.DEVICE,
                                      use_bias=args.bcn_use_bias,
                                      use_leaky=args.use_leaky,
                                      use_norm=args.bcn_use_norm,
                                      do_splat=True,
                                      do_slice=False,
                                      last_relu=args.last_relu)

        self.bcn4_ = BilateralConvFlex(args.dim, self.scales_filter_map[3][1],
                                       args.dim + 1 + 64 * 2 + 64, [64],
                                       args.DEVICE,
                                       use_bias=args.bcn_use_bias,
                                       use_leaky=args.use_leaky,
                                       use_norm=args.bcn_use_norm,
                                       do_splat=False,
                                       do_slice=True,
                                       last_relu=args.last_relu)

        self.corr2 = BilateralCorrelationFlex(args.dim,
                                              self.scales_filter_map[3][2], self.scales_filter_map[3][3],
                                              64, [32], [32],
                                              args.DEVICE,
                                              use_bias=args.bcn_use_bias,
                                              use_leaky=args.use_leaky,
                                              use_norm=args.bcn_use_norm,
                                              prev_corr_dim=64,
                                              last_relu=args.last_relu)

        self.corr2_refine = nn.Sequential(conv_module(32 + args.dim + 1, 64, use_leaky=args.use_leaky),
                                          conv_module(64, 64, use_leaky=args.use_leaky),
                                          conv_module(64, 64, use_leaky=args.use_leaky),
                                          )

        self.bcn5 = BilateralConvFlex(args.dim, self.scales_filter_map[4][1],
                                      64 + args.dim + 1, [64],
                                      args.DEVICE,
                                      use_bias=args.bcn_use_bias,
                                      use_leaky=args.use_leaky,
                                      use_norm=args.bcn_use_norm,
                                      do_splat=True,
                                      do_slice=False,
                                      last_relu=args.last_relu)

        self.bcn5_ = BilateralConvFlex(args.dim, self.scales_filter_map[4][1],
                                       64 + 64, [64],
                                       args.DEVICE,
                                       use_bias=args.bcn_use_bias,
                                       use_leaky=args.use_leaky,
                                       use_norm=args.bcn_use_norm,
                                       do_splat=False,
                                       do_slice=True,
                                       last_relu=args.last_relu)

        self.corr3 = BilateralCorrelationFlex(args.dim,
                                              self.scales_filter_map[4][2], self.scales_filter_map[4][3],
                                              64, [32], [32],
                                              args.DEVICE,
                                              use_bias=args.bcn_use_bias,
                                              use_leaky=args.use_leaky,
                                              use_norm=args.bcn_use_norm,
                                              prev_corr_dim=64,
                                              last_relu=args.last_relu)
        self.corr3_refine = nn.Sequential(conv_module(32, 64, use_leaky=args.use_leaky),
                                          conv_module(64, 64, use_leaky=args.use_leaky),
                                          conv_module(64, 64, use_leaky=args.use_leaky),
                                          )

        self.conv2 = conv_module(128, 1024, use_leaky=args.use_leaky)
        self.conv3 = conv_module(1024, 512, use_leaky=args.use_leaky)
        self.conv4 = nn.Conv1d(512, 3, kernel_size=1)
コード例 #13
0
ファイル: lenet_1d.py プロジェクト: zhanwenchen/beam_nn
    def __init__(self, input_channel, output_size, batch_norm, use_pooling,
                 pooling_method, conv1_kernel_size, conv1_num_kernels,
                 conv1_stride, conv1_dropout, pool1_kernel_size, pool1_stride,
                 conv2_kernel_size, conv2_num_kernels, conv2_stride,
                 conv2_dropout, pool2_kernel_size, pool2_stride,
                 fcs_hidden_size, fcs_num_hidden_layers, fcs_dropout):

        super(LeNet_1D, self).__init__()

        # Instance attributes for use in self.forward() later.
        self.input_channel = input_channel
        self.batch_norm = batch_norm
        output_size = output_size

        input_size = output_size / self.input_channel
        if input_size.is_integer():
            input_size = int(input_size)
        else:
            raise ValueError(
                'output_size / input_channel = {} / {} = {}'.format(
                    output_size, input_channel, input_size))

        # If not using pooling, set all pooling operations to 1 by 1.
        if use_pooling is False:
            # warnings.warn('lenet: not using pooling')
            pool1_kernel_size = 1
            pool1_stride = 1
            pool2_kernel_size = 1
            pool2_stride = 1

        # Conv1
        conv1_output_size = (conv1_num_kernels,
                             (input_size - conv1_kernel_size) / conv1_stride +
                             1)

        self.conv1 = nn.Conv1d(
            input_channel,
            conv1_num_kernels,
            conv1_kernel_size,
            stride=conv1_stride
        )  # NOTE: THIS IS CORRECT!!!! CONV doesn't depend on num_features!
        nn.init.kaiming_normal_(self.conv1.weight.data)
        self.conv1.bias.data.fill_(0)

        self.conv1_drop = nn.Dropout2d(p=conv1_dropout)
        if batch_norm is True:
            self.batch_norm1 = nn.BatchNorm1d(conv1_num_kernels)

        # Pool1
        pool1_output_size = (
            conv1_num_kernels,
            (conv1_output_size[1] - pool1_kernel_size) / pool1_stride + 1)

        self.pool1 = nn.MaxPool1d(
            pool1_kernel_size,
            stride=pool1_stride)  # stride=pool1_kernel_size by default

        # Conv2
        conv2_output_size = (
            conv2_num_kernels,
            (pool1_output_size[1] - conv2_kernel_size) / conv2_stride + 1)

        self.conv2 = nn.Conv1d(
            conv1_num_kernels,
            conv2_num_kernels,
            conv2_kernel_size,
            stride=conv2_stride
        )  # NOTE: THIS IS CORRECT!!!! CONV doesn't depend on num_features!
        nn.init.kaiming_normal_(self.conv2.weight.data)
        self.conv2.bias.data.fill_(0)

        self.conv2_drop = nn.Dropout2d(p=conv2_dropout)
        if batch_norm is True:
            self.batch_norm2 = nn.BatchNorm1d(conv2_num_kernels)

        # Pool2
        pool2_output_size = (
            conv2_num_kernels,
            (conv2_output_size[1] - pool2_kernel_size) / pool2_stride + 1)

        self.pool2 = nn.MaxPool1d(
            pool2_kernel_size,
            stride=pool2_stride)  # stride=pool1_kernel_size by default

        # FCs
        fcs_input_size = pool2_output_size[0] * pool2_output_size[1]
        self.fcs = FullyConnectedNet(fcs_input_size, output_size, fcs_dropout,
                                     batch_norm, fcs_hidden_size,
                                     fcs_num_hidden_layers)
コード例 #14
0
    def __init__(self,
                 class_size,
                 style_size,
                 hidden_size=128,
                 n_out=1,
                 emb_style=0):
        super(CountCNN, self).__init__()

        self.cnn = nn.Sequential(
            nn.Conv1d(class_size + style_size,
                      hidden_size,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.GroupNorm(getGroupSize(hidden_size), hidden_size),
            nn.Dropout2d(0.1),
            nn.ReLU(inplace=True),
            nn.Conv1d(hidden_size,
                      hidden_size // 2,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.GroupNorm(getGroupSize(hidden_size // 2), hidden_size // 2),
            nn.Dropout2d(0.1),
            nn.ReLU(inplace=True),
            nn.Conv1d(hidden_size // 2,
                      hidden_size // 4,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.GroupNorm(getGroupSize(hidden_size // 4), hidden_size // 4),
            nn.ReLU(inplace=True),
            nn.Conv1d(hidden_size // 4,
                      n_out,
                      kernel_size=1,
                      stride=1,
                      padding=0),
        )

        if n_out == 1 or n_out > 2:
            self.mean = nn.Parameter(torch.FloatTensor(1, n_out).fill_(2))
            self.std = nn.Parameter(torch.FloatTensor(1, n_out).fill_(1))
        else:
            self.mean = nn.Parameter(
                torch.FloatTensor([2.0, 0.0])
            )  #These are educated guesses to give the net a good place to start
            self.std = nn.Parameter(torch.FloatTensor([1.5, 0.5]))

        if emb_style > 0:
            if type(emb_style) is float:
                drop = 0.125
            else:
                drop = 0.5
            layers = [PixelNorm()]
            for i in range(int(emb_style)):
                layers.append(nn.Linear(style_size, style_size))
                layers.append(nn.Dropout(drop, True))
                layers.append(nn.LeakyReLU(0.2, True))
            self.emb_style = nn.Sequential(*layers)
        else:
            self.emb_style = None
コード例 #15
0
ファイル: modeling_squeezebert.py プロジェクト: vuiseng9/lpot
 def __init__(self, cin, cout, groups, act):
     super().__init__()
     self.conv1d = nn.Conv1d(in_channels=cin, out_channels=cout, kernel_size=1, groups=groups)
     self.act = ACT2FN[act]
コード例 #16
0
ファイル: models.py プロジェクト: GokuMohandas/MLOps
    def __init__(
        self,
        embedding_dim: int,
        vocab_size: int,
        num_filters: int,
        filter_sizes: list,
        hidden_dim: int,
        dropout_p: float,
        num_classes: int,
        padding_idx: int = 0,
    ) -> None:
        """A [convolutional neural network](https://madewithml.com/courses/foundations/convolutional-neural-networks/){:target="_blank"} architecture
        created for natural language processing tasks where filters convolve across the given text inputs.

        ![text CNN](https://raw.githubusercontent.com/GokuMohandas/MadeWithML/main/images/foundations/embeddings/model.png)

        Usage:

        ```python
        # Initialize model
        filter_sizes = list(range(1, int(params.max_filter_size) + 1))
        model = models.CNN(
            embedding_dim=int(params.embedding_dim),
            vocab_size=int(vocab_size),
            num_filters=int(params.num_filters),
            filter_sizes=filter_sizes,
            hidden_dim=int(params.hidden_dim),
            dropout_p=float(params.dropout_p),
            num_classes=int(num_classes),
        )
        model = model.to(device)
        ```

        Args:
            embedding_dim (int): Embedding dimension for tokens.
            vocab_size (int): Number of unique tokens in vocabulary.
            num_filters (int): Number of filters per filter size.
            filter_sizes (list): List of filter sizes for the CNN.
            hidden_dim (int): Hidden dimension for fully-connected (FC) layers.
            dropout_p (float): Dropout proportion for FC layers.
            num_classes (int): Number of unique classes to classify into.
            padding_idx (int, optional): Index representing the `<PAD>` token. Defaults to 0.
        """
        super().__init__()

        # Initialize embeddings
        self.embeddings = nn.Embedding(
            embedding_dim=embedding_dim,
            num_embeddings=vocab_size,
            padding_idx=padding_idx,
        )

        # Conv weights
        self.filter_sizes = filter_sizes
        self.conv = nn.ModuleList(
            [
                nn.Conv1d(
                    in_channels=embedding_dim,
                    out_channels=num_filters,
                    kernel_size=f,
                )
                for f in filter_sizes
            ]
        )

        # FC weights
        self.dropout = nn.Dropout(dropout_p)
        self.fc1 = nn.Linear(num_filters * len(filter_sizes), hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, num_classes)
コード例 #17
0
    def __init__(self, n_mel_channels, max_seq_len, n_symbols, padding_idx,
                 symbols_embedding_dim, in_fft_n_layers, in_fft_n_heads,
                 in_fft_d_head,
                 in_fft_conv1d_kernel_size, in_fft_conv1d_filter_size,
                 in_fft_output_size,
                 p_in_fft_dropout, p_in_fft_dropatt, p_in_fft_dropemb,
                 out_fft_n_layers, out_fft_n_heads, out_fft_d_head,
                 out_fft_conv1d_kernel_size, out_fft_conv1d_filter_size,
                 out_fft_output_size,
                 p_out_fft_dropout, p_out_fft_dropatt, p_out_fft_dropemb,
                 dur_predictor_kernel_size, dur_predictor_filter_size,
                 p_dur_predictor_dropout, dur_predictor_n_layers,
                 pitch_predictor_kernel_size, pitch_predictor_filter_size,
                 p_pitch_predictor_dropout, pitch_predictor_n_layers,
                 pitch_embedding_kernel_size, n_speakers, speaker_emb_weight):
        super(FastPitch, self).__init__()
        del max_seq_len  # unused

        self.encoder = FFTransformer(
            n_layer=in_fft_n_layers, n_head=in_fft_n_heads,
            d_model=symbols_embedding_dim,
            d_head=in_fft_d_head,
            d_inner=in_fft_conv1d_filter_size,
            kernel_size=in_fft_conv1d_kernel_size,
            dropout=p_in_fft_dropout,
            dropatt=p_in_fft_dropatt,
            dropemb=p_in_fft_dropemb,
            embed_input=True,
            d_embed=symbols_embedding_dim,
            n_embed=n_symbols,
            padding_idx=padding_idx)

        if n_speakers > 1:
            self.speaker_emb = nn.Embedding(n_speakers, symbols_embedding_dim)
        else:
            self.speaker_emb = None
        self.speaker_emb_weight = speaker_emb_weight

        self.duration_predictor = TemporalPredictor(
            in_fft_output_size,
            filter_size=dur_predictor_filter_size,
            kernel_size=dur_predictor_kernel_size,
            dropout=p_dur_predictor_dropout, n_layers=dur_predictor_n_layers
        )

        self.decoder = FFTransformer(
            n_layer=out_fft_n_layers, n_head=out_fft_n_heads,
            d_model=symbols_embedding_dim,
            d_head=out_fft_d_head,
            d_inner=out_fft_conv1d_filter_size,
            kernel_size=out_fft_conv1d_kernel_size,
            dropout=p_out_fft_dropout,
            dropatt=p_out_fft_dropatt,
            dropemb=p_out_fft_dropemb,
            embed_input=False,
            d_embed=symbols_embedding_dim
        )

        self.pitch_predictor = TemporalPredictor(
            in_fft_output_size,
            filter_size=pitch_predictor_filter_size,
            kernel_size=pitch_predictor_kernel_size,
            dropout=p_pitch_predictor_dropout, n_layers=pitch_predictor_n_layers
        )

        self.pitch_emb = nn.Conv1d(
            1, symbols_embedding_dim,
            kernel_size=pitch_embedding_kernel_size,
            padding=int((pitch_embedding_kernel_size - 1) / 2))

        # Store values precomputed for training data within the model
        self.register_buffer('pitch_mean', torch.zeros(1))
        self.register_buffer('pitch_std', torch.zeros(1))

        self.proj = nn.Linear(out_fft_output_size, n_mel_channels, bias=True)
コード例 #18
0
    def __init__(
        self,
        device,
        num_nodes,
        dropout=0.3,
        supports=None,
        gcn_bool=True,
        att_bool=True,
        addaptadj=True,
        aptinit=None,
        in_dim=2,
        out_dim=12,
        residual_channels=32,
        dilation_channels=32,
        skip_channels=256,
        end_channels=512,
        kernel_size=2,
        blocks=4,
        layers=2,
    ):
        super(gwnet, self).__init__()
        self.dropout = dropout
        self.blocks = blocks
        self.layers = layers
        self.gcn_bool = True
        self.att_bool = True
        self.addaptadj = True

        self.supports = supports
        self.filter_convs = nn.ModuleList()
        self.gate_convs = nn.ModuleList()

        self.residual_convs = nn.ModuleList()
        self.skip_convs = nn.ModuleList()

        self.bn = nn.ModuleList()
        self.avwgconv = nn.ModuleList()
        self.att_conv = nn.ModuleList()

        self.start_conv = nn.Conv2d(in_channels=in_dim,
                                    out_channels=residual_channels,
                                    kernel_size=(1, 1))

        self.supports_len = 0
        if supports is not None:
            self.supports_len += len(supports)

        receptive_field = 1

        if self.gcn_bool and self.addaptadj:
            self.node_embedding = nn.Parameter(torch.randn(num_nodes, 10),
                                               requires_grad=True)

        for b in range(blocks):
            additional_scope = kernel_size - 1
            new_dilation = 1
            for i in range(layers):
                # dilated convolutions
                self.filter_convs.append(
                    nn.Conv2d(
                        in_channels=residual_channels,
                        out_channels=dilation_channels,
                        kernel_size=(1, kernel_size),
                        dilation=new_dilation,
                    ))

                self.gate_convs.append(
                    nn.Conv1d(
                        in_channels=residual_channels,
                        out_channels=dilation_channels,
                        kernel_size=(1, kernel_size),
                        dilation=new_dilation,
                    ))

                # 1x1 convolution for residual connection
                self.residual_convs.append(
                    nn.Conv1d(
                        in_channels=dilation_channels,
                        out_channels=residual_channels,
                        kernel_size=(1, 1),
                    ))

                # 1x1 convolution for skip connection
                self.skip_convs.append(
                    nn.Conv1d(
                        in_channels=dilation_channels,
                        out_channels=skip_channels,
                        kernel_size=(1, 1),
                    ))
                self.bn.append(nn.BatchNorm2d(residual_channels))
                new_dilation *= 2
                receptive_field += additional_scope
                additional_scope *= 2
                if self.gcn_bool:
                    if (i + 1) % 2 == 1:
                        self.avwgconv.append(
                            AVWGCN(
                                dilation_channels,
                                residual_channels,
                                dropout,
                                support_len=self.supports_len,
                            ))
                        self.att_conv.append(
                            ST_Attention(2, residual_channels,
                                         residual_channels))
                    else:
                        self.avwgconv.append(
                            AVWGCN(dilation_channels, residual_channels,
                                   dropout))

        self.end_conv_1 = nn.Conv2d(
            in_channels=skip_channels,
            out_channels=end_channels,
            kernel_size=(1, 1),
            bias=True,
        )

        self.end_conv_2 = nn.Conv2d(
            in_channels=end_channels,
            out_channels=out_dim,
            kernel_size=(1, 1),
            bias=True,
        )

        self.receptive_field = receptive_field
コード例 #19
0
ファイル: sincnet.py プロジェクト: ShreeshaN/AlcoAudio
    def __init__(self, options):
        super(SincNet, self).__init__()
        # self.saved_model = \
        #     torch.load(options['sincnet_saved_model'], map_location='gpu' if torch.cuda.is_available() else 'cpu')[
        #         'CNN_model_par']
        # print(self.saved_model.keys())
        # exit()
        self.batch_size = options['batch_size']
        self.cnn_N_filt = options['cnn_N_filt']
        self.cnn_len_filt = options['cnn_len_filt']
        self.cnn_max_pool_len = options['cnn_max_pool_len']

        self.cnn_act = options['cnn_act']
        self.cnn_drop = options['cnn_drop']

        self.cnn_use_laynorm = options['cnn_use_laynorm']
        self.cnn_use_batchnorm = options['cnn_use_batchnorm']
        self.cnn_use_laynorm_inp = options['cnn_use_laynorm_inp']
        self.cnn_use_batchnorm_inp = options['cnn_use_batchnorm_inp']

        self.input_dim = options['input_dim']

        self.fs = options['sampling_rate']

        self.N_cnn_lay = len(options['cnn_N_filt'])
        self.conv = nn.ModuleList([])
        self.bn = nn.ModuleList([])
        self.ln = nn.ModuleList([])
        self.act = nn.ModuleList([])
        self.drop = nn.ModuleList([])

        if self.cnn_use_laynorm_inp:
            self.ln0 = LayerNorm(self.input_dim)
            # self.ln0.beta = nn.Parameter(self.saved_model['ln0.beta'])
            # self.ln0.gamma = nn.Parameter(self.saved_model['ln0.gamma'])

        if self.cnn_use_batchnorm_inp:
            self.bn0 = nn.BatchNorm1d([self.input_dim], momentum=0.05)
            # self.bn0.weight = nn.Parameter(self.saved_model['bn0.weight'])
            # self.bn0.bias = nn.Parameter(self.saved_model['bn0.bias'])
            # self.bn0.running_mean = nn.Parameter(self.saved_model['bn0.running_mean'])
            # self.bn0.running_var = nn.Parameter(self.saved_model['bn0.running_var'])
            # self.bn0.num_batches_tracked = nn.Parameter(self.saved_model['bn0.num_batches_tracked'])

        current_input = self.input_dim

        for i in range(self.N_cnn_lay):

            N_filt = int(self.cnn_N_filt[i])
            len_filt = int(self.cnn_len_filt[i])

            # dropout
            self.drop.append(nn.Dropout(p=self.cnn_drop[i]))

            # activation
            self.act.append(act_fun(self.cnn_act[i]))

            # layer norm initialization
            ln = LayerNorm([N_filt, int((current_input - self.cnn_len_filt[i] + 1) / self.cnn_max_pool_len[i])])
            # ln.beta = self.saved_model['ln' + str(i) + '.beta']
            # ln.gamma = self.saved_model['ln' + str(i) + '.gamma']
            self.ln.append(ln)

            bn = nn.BatchNorm1d(N_filt, int((current_input - self.cnn_len_filt[i] + 1) / self.cnn_max_pool_len[i]),
                                momentum=0.05)
            # bn.weight = nn.Parameter(self.saved_model['bn' + str(i) + '.weight'])
            # bn.bias = nn.Parameter(self.saved_model['bn' + str(i) + '.bias'])
            # bn.running_mean = nn.Parameter(self.saved_model['bn' + str(i) + '.running_mean'])
            # bn.running_var = nn.Parameter(self.saved_model['bn' + str(i) + '.running_var'])
            # bn.num_batches_tracked = nn.Parameter(self.saved_model['bn' + str(i) + '.num_batches_tracked'])
            self.bn.append(bn)

            if i == 0:
                self.conv.append(SincConv_fast(self.cnn_N_filt[0], self.cnn_len_filt[0], self.fs))

            else:
                self.conv.append(nn.Conv1d(self.cnn_N_filt[i - 1], self.cnn_N_filt[i], self.cnn_len_filt[i]))

            current_input = int((current_input - self.cnn_len_filt[i] + 1) / self.cnn_max_pool_len[i])

        self.out_dim = current_input * N_filt

        # self.conv1 = nn.Conv1d(1, 40, 4, 3)
        # self.bn1 = nn.BatchNorm1d(40)
        # self.pool1 = nn.MaxPool1d(2, 2)
        # self.conv2 = nn.Conv1d(40, 40, 4, 3)
        # self.bn2 = nn.BatchNorm1d(40)
        # self.pool2 = nn.MaxPool1d(2, 2)

        self.conv1 = nn.Conv2d(1, 30, (2, 3), 2)
        self.bn1 = nn.BatchNorm2d(30)
        self.pool1 = nn.MaxPool2d((1, 2), 2)
        self.conv2 = nn.Conv2d(30, 30, (2, 3), 2)
        self.bn2 = nn.BatchNorm2d(30)
        self.pool2 = nn.MaxPool2d((1, 2), 1)

        self.fc1 = nn.Linear(55650, 4096)
        self.ffn_bn1 = nn.BatchNorm1d(4096)
        self.drp1 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(4096, 512)
        self.ffn_bn2 = nn.BatchNorm1d(512)
        self.drp2 = nn.Dropout(0.3)
        self.fc3 = nn.Linear(512, 1)
コード例 #20
0
 def __init__(self, input_dim, conv_dim=64):
     super(Inception3, self).__init__()
     self.cnn = nn.Sequential(nn.Conv1d(input_dim, conv_dim, kernel_size=1),
                              nn.ReLU(),
                              nn.Conv1d(conv_dim, conv_dim, kernel_size=3),
                              nn.ReLU())
コード例 #21
0
layer_after = nn.Linear(2, 2)
model1 = nn.Sequential(layer_before, op_layer, layer_after)

print('Composed model 1:')
print(model1)
print('')

inp = autograd.Variable(torch.ones(3))[None, ...]
print('Model 1 evaluated on a 1x3 tensor variable:')
print(model1(inp))

# We can also use convolutional layers with extra channel axes. Since
# convolutions without padding reduce the size of the input by
# `kernel_size - 1`, the input has to have size 4 here, and the output
# will have size 1.
layer_before = nn.Conv1d(1, 2, 2)
layer_after = nn.Conv1d(2, 1, 2)
model2 = nn.Sequential(layer_before, op_layer, layer_after)

print('Composed model 2:')
print(model2)
print('')

# Add extra batch and channel axes
inp = autograd.Variable(torch.ones(4))[None, None, ...]
print('Model 2 evaluated on a 1x3 tensor variable:')
print(model2(inp))

# --- Backward --- #

# Define a loss function and targets to compare against
コード例 #22
0
ファイル: __init__.py プロジェクト: ZhongxiaYan/util
 def __init__(self, in_depth, out_depth, kernel_size, dilation=1, stride=1, groups=1):
     super(CausalConv1d, self).__init__()
     self.padding = (kernel_size - 1) * dilation
     self.conv = nn.Conv1d(in_depth, out_depth, kernel_size, stride=stride, dilation=dilation, groups=groups)
コード例 #23
0
 def __init__(self):
     super(SimpleConvolutionalEncoder, self).__init__()
     self.c1 = nn.Conv1d(1, 5, kernel_size=3)
     self.pool = nn.AdaptiveMaxPool1d(10)
     self.act = nn.LeakyReLU(negative_slope=0.3)
     self.out = nn.Linear(50, 3)
コード例 #24
0
    def __init__(self):
        super(ConvAE, self).__init__()

        self.encoder = nn.Sequential(
            nn.Conv1d(1, 32, 3, stride=2, padding=1, dilation=1),
            nn.ReLU(True),
            # nn.LeakyReLU(True),
            # nn.MaxPool1d(2, stride=2)
            nn.Conv1d(32, 64, 5, stride=2, padding=1, dilation=1),
            nn.ReLU(True),
            # nn.LeakyReLU(True),
            # nn.MaxPool1d(2, stride=1)
            nn.Conv1d(64, 64, 4, stride=2, padding=1, dilation=1),
            nn.ReLU(True),
            # nn.LeakyReLU(True),
            nn.Conv1d(64, 128, 9, stride=2, padding=1, dilation=1),
            nn.ReLU(True),
            # nn.LeakyReLU(True),
            nn.Conv1d(128, 128, 9, stride=2, padding=1, dilation=1),
            nn.ReLU(True),
            # nn.LeakyReLU(True),
            nn.Conv1d(128, 256, 9, stride=2, padding=1, dilation=1),
            nn.ReLU(True),
            # nn.LeakyReLU(True),
            nn.Conv1d(256, 256, 9, stride=2, padding=1, dilation=1),
            nn.ReLU(True),
            # nn.LeakyReLU(True),
            nn.Conv1d(256, 512, 9, stride=2, padding=1, dilation=1),
            nn.ReLU(True),
            # nn.LeakyReLU(True),
            nn.Conv1d(512, 512, 9, stride=2, padding=1, dilation=1),
            nn.ReLU(True),
            # nn.LeakyReLU(True),
            #nn.MaxPool1d(2)
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose1d(512, 512, 9, stride=2, padding=1, dilation=1),
            nn.ReLU(True),
            # nn.LeakyReLU(True),
            nn.ConvTranspose1d(512, 256, 9, stride=2, padding=1, dilation=1),
            nn.ReLU(True),
            # nn.LeakyReLU(True),
            nn.ConvTranspose1d(256, 256, 9, stride=2, padding=1, dilation=1),
            nn.ReLU(True),
            # nn.LeakyReLU(True),
            nn.ConvTranspose1d(256, 128, 10, stride=2, padding=1, dilation=1),
            nn.ReLU(True),
            # nn.LeakyReLU(True),
            nn.ConvTranspose1d(128, 64, 20, stride=2, padding=1, dilation=1),
            nn.ReLU(True),
            # nn.LeakyReLU(True),
            nn.ConvTranspose1d(64, 64, 20, stride=2, padding=1, dilation=1),
            nn.ReLU(True),
            # nn.LeakyReLU(True),
            nn.ConvTranspose1d(64, 32, 30, stride=2, padding=1, dilation=1),
            nn.ReLU(True),
            # nn.LeakyReLU(True),
            nn.ConvTranspose1d(32, 32, 40, stride=2, padding=1, dilation=1),
            nn.ReLU(True),
            # nn.LeakyReLU(True),
            nn.ConvTranspose1d(32, 1, 40, stride=2, padding=1, dilation=1),
        )
コード例 #25
0
    def __init__(self, args_dict=wavenet_default_settings):

        super(WaveNetModel, self).__init__()

        self.layers = args_dict["layers"]
        self.blocks = args_dict["blocks"]
        self.dilation_channels = args_dict["dilation_channels"]
        self.residual_channels = args_dict["residual_channels"]
        self.skip_channels = args_dict["skip_channels"]
        self.end_channels = args_dict["end_channels"]
        self.output_channels = args_dict["output_channels"]
        self.output_length = args_dict["output_length"]
        self.kernel_size = args_dict["kernel_size"]
        self.dilation_factor = args_dict["dilation_factor"]
        self.dtype = args_dict["dtype"]
        self.use_bias = args_dict["bias"]

        # build model
        receptive_field = 1
        init_dilation = 1

        self.dilations = []
        self.residual_convs = nn.ModuleList()
        self.skip_convs = nn.ModuleList()
        self.end_layers = nn.ModuleList()

        # 1x1 convolution to create channels
        self.start_conv = nn.Conv1d(
            in_channels=1,  #self.in_classes,
            out_channels=self.residual_channels,
            kernel_size=1,
            bias=self.use_bias)

        for b in range(self.blocks):
            additional_scope = self.kernel_size - 1
            new_dilation = 1
            for i in range(self.layers):
                # dilations of this layer
                self.dilations.append((new_dilation, init_dilation))

                # 1x1 convolution for residual connection
                self.residual_convs.append(
                    nn.Conv1d(in_channels=self.dilation_channels,
                              out_channels=self.residual_channels,
                              kernel_size=1,
                              bias=self.use_bias))

                # 1x1 convolution for skip connection
                self.skip_convs.append(
                    nn.Conv1d(in_channels=self.dilation_channels,
                              out_channels=self.skip_channels,
                              kernel_size=1,
                              bias=self.use_bias))

                receptive_field += additional_scope
                additional_scope *= self.dilation_factor
                init_dilation = new_dilation
                new_dilation *= self.dilation_factor

        in_channels = self.skip_channels
        for end_channel in self.end_channels:
            self.end_layers.append(
                nn.Conv1d(in_channels=in_channels,
                          out_channels=end_channel,
                          kernel_size=1,
                          bias=True))
            in_channels = end_channel

        self.end_layers.append(
            nn.Conv1d(in_channels=in_channels,
                      out_channels=self.output_channels,
                      kernel_size=1,
                      bias=True))

        # self.output_length = 2 ** (layers - 1)
        self.receptive_field = receptive_field
        self.activation_unit_init()
コード例 #26
0
ファイル: deepgmr.py プロジェクト: zivzone/learning3d
	def __init__(self, in_planes, out_planes):
		super(Conv1dBNReLU, self).__init__(
			nn.Conv1d(in_planes, out_planes, kernel_size=1, bias=False),
			nn.BatchNorm1d(out_planes),
			nn.ReLU(inplace=True))
コード例 #27
0
ファイル: submodules.py プロジェクト: swstarlab/DAN-VisDial
 def __init__(self, d_in, d_hid, dropout=0.1):
     super().__init__()
     self.w_1 = nn.Conv1d(d_in, d_hid, 1)  # position-wise
     self.w_2 = nn.Conv1d(d_hid, d_in, 1)  # position-wise
     self.layer_norm = LayerNorm(d_in)
     self.dropout = nn.Dropout(dropout)
コード例 #28
0
ファイル: modeling_squeezebert.py プロジェクト: vuiseng9/lpot
    def __init__(self, cin, cout, groups, dropout_prob):
        super().__init__()

        self.conv1d = nn.Conv1d(in_channels=cin, out_channels=cout, kernel_size=1, groups=groups)
        self.layernorm = SqueezeBertLayerNorm(cout)
        self.dropout = nn.Dropout(dropout_prob)
コード例 #29
0
    def __init__(self,
                 num_classes=50,
                 points=2048,
                 embed_dim=128,
                 normal_channel=True,
                 pre_blocks=[2, 2, 2, 2],
                 pos_blocks=[2, 2, 2, 2],
                 k_neighbors=[32, 32, 32, 32],
                 reducers=[2, 2, 2, 2],
                 **kwargs):
        super(get_model, self).__init__()
        # self.stages = len(pre_blocks)
        self.num_classes = num_classes
        self.points = points
        input_channel = 6 if normal_channel else 3
        self.embedding = nn.Sequential(FCBNReLU1D(input_channel, embed_dim),
                                       FCBNReLU1D(embed_dim, embed_dim))

        self.encoder_stage1 = encoder_stage(anchor_points=points // 4,
                                            channel=128,
                                            reduce=False,
                                            pre_blocks=3,
                                            pos_blocks=3,
                                            k_neighbor=32)
        self.encoder_stage2 = encoder_stage(anchor_points=points // 8,
                                            channel=256,
                                            reduce=True,
                                            pre_blocks=3,
                                            pos_blocks=3,
                                            k_neighbor=32)
        self.encoder_stage3 = encoder_stage(anchor_points=points // 16,
                                            channel=256,
                                            reduce=False,
                                            pre_blocks=3,
                                            pos_blocks=3,
                                            k_neighbor=32)
        self.encoder_stage4 = encoder_stage(anchor_points=points // 32,
                                            channel=512,
                                            reduce=True,
                                            pre_blocks=3,
                                            pos_blocks=3,
                                            k_neighbor=32)

        self.fp4 = PointNetFeaturePropagation(in_channel=(512 + 512),
                                              mlp=[512, 256, 256])
        self.fp3 = PointNetFeaturePropagation(in_channel=256 + 256,
                                              mlp=[512, 256, 256])
        self.fp2 = PointNetFeaturePropagation(in_channel=256 + 256,
                                              mlp=[256, 256])
        self.fp1 = PointNetFeaturePropagation(in_channel=256 + 128 + 128,
                                              mlp=[256, 256])

        self.info_encoder = nn.Sequential(
            FCBNReLU1D(16 + 3 + input_channel, 128),
            FCBNReLU1D(128, 128),
        )
        self.global_encoder = nn.Sequential(
            FCBNReLU1D(512, 256),
            FCBNReLU1D(256, 128),
        )

        self.conv0 = nn.Conv1d(256, 256, 1)
        self.bn0 = nn.BatchNorm1d(256)
        self.drop0 = nn.Dropout(0.4)
        self.conv1 = nn.Conv1d(256, 128, 1)
        self.bn1 = nn.BatchNorm1d(128)
        self.drop1 = nn.Dropout(0.4)
        self.conv2 = nn.Conv1d(128, num_classes, 1)
コード例 #30
0
 def get_layer(in_size, out_size, conv=False):
     if conv:
         return nn.Conv1d(in_size, out_size, 1)
     else:
         return nn.Linear(in_size, out_size)