def __init__(self, config):
        super(ResNet18, self).__init__()

        self.classes = config['num_classes']
        self.in_shape = config['input_shape']
        self.channels = 3

        self.pad1 = ZeroPad2d(padding=3)
        self.conv1 = Conv2d(self.channels, 64, kernel_size=(7, 7), stride=2)
        self.bn1 = BatchNorm2d(64)
        self.pad2 = ZeroPad2d(padding=1)
        self.pool1 = MaxPool2d(kernel_size=3, stride=2)

        # stage 2
        self.conv_block_s2 = ResNetConv(64, filters=64, stride=1)
        self.identity_s2_1 = ResNetIdentity(64, filters=64)
        self.identity_s2_2 = ResNetIdentity(64, filters=64)

        # stage 3
        self.conv_block_s3 = ResNetConv(64, filters=128)
        self.identity_s3_1 = ResNetIdentity(128, filters=128)
        self.identity_s3_2 = ResNetIdentity(128, filters=128)

        # stage 4
        self.conv_block_s4 = ResNetConv(128, filters=256)
        self.identity_s4_1 = ResNetIdentity(256, filters=256)
        self.identity_s4_2 = ResNetIdentity(256, filters=256)

        # stage 5
        self.conv_block_s5 = ResNetConv(256, filters=512)
        self.identity_s5_1 = ResNetIdentity(512, filters=512)
        self.identity_s5_2 = ResNetIdentity(512, filters=512)

        # final stage
        self.linear = Linear(512, self.classes)
예제 #2
0
    def __init__(self, base_filters=16):

        super(DiscriminatorROI, self).__init__()

        self.conv_layers = Sequential(
            ZeroPad2d((1, 2, 1, 2)),
            Conv2d(3, base_filters, kernel_size=4, stride=2, bias=False),
            LeakyReLU(0.2, inplace=True), ZeroPad2d((1, 2, 1, 2)),
            Conv2d(base_filters,
                   2 * base_filters,
                   kernel_size=4,
                   stride=2,
                   bias=False), BatchNorm2d(2 * base_filters, momentum=0.8),
            LeakyReLU(0.2, inplace=True), ZeroPad2d((1, 2, 1, 2)),
            Conv2d(2 * base_filters,
                   4 * base_filters,
                   kernel_size=4,
                   stride=2,
                   bias=False), BatchNorm2d(4 * base_filters, momentum=0.8),
            LeakyReLU(0.2, inplace=True))

        self.roi_pool = RoIAlign(output_size=(3, 3),
                                 spatial_scale=0.125,
                                 sampling_ratio=-1)

        self.classifier = Sequential(
            Conv2d(4 * base_filters, 1, kernel_size=3, padding=0, bias=False))
예제 #3
0
 def __init__(self):
     super(UpConv_7, self).__init__()
     self.act_fn = nn.LeakyReLU(0.1, inplace=False)
     self.offset = 7  # because of 0 padding
     from torch.nn import ZeroPad2d
     self.pad = ZeroPad2d(self.offset)
     m = [
         nn.Conv2d(3, 16, 3, 1, 0),
         self.act_fn,
         nn.Conv2d(16, 32, 3, 1, 0),
         self.act_fn,
         nn.Conv2d(32, 64, 3, 1, 0),
         self.act_fn,
         nn.Conv2d(64, 128, 3, 1, 0),
         self.act_fn,
         nn.Conv2d(128, 128, 3, 1, 0),
         self.act_fn,
         nn.Conv2d(128, 256, 3, 1, 0),
         self.act_fn,
         # in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=
         nn.ConvTranspose2d(256,
                            3,
                            kernel_size=4,
                            stride=2,
                            padding=3,
                            bias=False)
     ]
     self.Sequential = nn.Sequential(*m)
    def __init__(self, 
                 shape,
                 list_zernike_ft,
                 list_zernike_direct,
                 padding_coeff = 0., 
                 deformation = 'single',
                 features = None):
        # Here we define the type of Model we want to be using, the number of polynoms and if we want to implement a deformation.
        super(Aberration, self).__init__()
        
        #Check whether the model is given the lists of zernike polynoms to use or simply the total number to use
        if type(list_zernike_direct) not in [list, np.ndarray]:
            list_zernike_direct = range(0,list_zernike_direct)
        if type(list_zernike_ft) not in [list, np.ndarray]:
            list_zernike_ft = range(0,list_zernike_ft)

        self.nxy = shape
        
        # padding layer, to have a good FFT resolution
        # (requires to crop after IFFT)
        padding = int(padding_coeff*self.nxy)
        self.pad = ZeroPad2d(padding)
        
        # scaling x, y
        if deformation == 'single':
            self.deformation = ComplexDeformation()
        elif deformation == 'scaling':
            self.deformation = ComplexScaling()
        else:
            self.deformation = Identity()
        
        self.zernike_ft = Sequential(*(ComplexZernike(j=j + 1) for j in list_zernike_ft))
        self.zernike_direct = Sequential(*(ComplexZernike(j=j + 1) for j in list_zernike_direct))
예제 #5
0
	def __init__(self, N_side_in, **kwargs):

		self.N_side = N_side_in
		self.canv_shape = (self.N_side, self.N_side)

		self.op_dict = {
			'union' : union,
			'rect' : self.primitive_rect
		}
		#'subtract' : subtract,

		self.op_str_list = list(self.op_dict.keys())
		#print(self.op_str_list)
		self.N_ops = len(self.op_str_list)
		self.N_non_primitive_ops = 1
		self.N_params = 4

		self.zero_pad = ZeroPad2d(1)
		#self.peaky_noise = Beta(0.03*torch.ones(self.canv_shape), 0.47*torch.ones(self.canv_shape))
		#self.peaky_noise = Beta(1*torch.ones(self.canv_shape), 8*torch.ones(self.canv_shape))
		self.peaky_noise = Beta(0.05*torch.ones(self.canv_shape), 0.45*torch.ones(self.canv_shape))

		self.canv_dist = kwargs.get('canv_dist', 'bernoulli')

		assert self.canv_dist in ['bernoulli', 'beta'], 'Canv dist must be either bernoulli or beta!'

		noise_methods = {
			'bernoulli' : 'bern',
			'beta' : 'peaky_blur',
		}

		self.noise_method = noise_methods[self.canv_dist]
예제 #6
0
파일: util.py 프로젝트: Dajamante/event_cnn
    def __init__(self, width, height, num_encoders, safety_margin=0):

        self.height = height
        self.width = width
        self.num_encoders = num_encoders
        self.width_crop_size = optimal_crop_size(self.width, num_encoders,
                                                 safety_margin)
        self.height_crop_size = optimal_crop_size(self.height, num_encoders,
                                                  safety_margin)

        self.padding_top = ceil(0.5 * (self.height_crop_size - self.height))
        self.padding_bottom = floor(0.5 *
                                    (self.height_crop_size - self.height))
        self.padding_left = ceil(0.5 * (self.width_crop_size - self.width))
        self.padding_right = floor(0.5 * (self.width_crop_size - self.width))
        self.pad = ZeroPad2d((self.padding_left, self.padding_right,
                              self.padding_top, self.padding_bottom))

        self.cx = floor(self.width_crop_size / 2)
        self.cy = floor(self.height_crop_size / 2)

        self.ix0 = self.cx - floor(self.width / 2)
        self.ix1 = self.cx + ceil(self.width / 2)
        self.iy0 = self.cy - floor(self.height / 2)
        self.iy1 = self.cy + ceil(self.height / 2)
예제 #7
0
    def forward(self, phonemes, spectrograms, len_phonemes, training=False):
        """
        :param phonemes: (batch, alphabet, time), padded phonemes
        :param spectrograms: (batch, freq, time), padded spectrograms
        :param len_phonemes: list of phoneme lengths
        :return: decoded_spectrograms, attention_weights
        """
        spectrs = ZeroPad2d(
            (0, 0, 1, 0))(spectrograms)[:, :-1, :]  # move this to encoder?
        keys, values = self.txt_encoder(phonemes)
        queries = self.audio_encoder(spectrs)

        att_mask = mask(shape=(len(keys), queries.shape[1], keys.shape[1]),
                        lengths=len_phonemes,
                        dim=-1).to(self.device)

        if hp.positional_encoding:
            keys += positional_encoding(keys.shape[-1], keys.shape[1],
                                        w=hp.w).to(self.device)
            queries += positional_encoding(queries.shape[-1],
                                           queries.shape[1],
                                           w=1).to(self.device)

        attention, weights = self.attention(queries,
                                            keys,
                                            values,
                                            mask=att_mask)
        decoded = self.audio_decoder(attention + queries)
        return decoded, weights
예제 #8
0
    def __init__(self, file_shape: tuple, output_filters=8, output_channels=2):
        super(Generator, self).__init__()

        # DownSampling
        self.down1 = UNetDown(file_shape[0], output_filters, normalize=False)
        self.down2 = UNetDown(output_filters, output_filters * 2)
        self.down3 = UNetDown(output_filters * 2, output_filters * 4)
        self.down4 = UNetDown(output_filters * 4, output_filters * 8)
        self.down5 = UNetDown(output_filters * 8, output_filters * 8)
        self.down6 = UNetDown(output_filters * 8, output_filters * 8)
        self.down7 = UNetDown(output_filters * 8, output_filters * 8)
        self.down8 = UNetDown(output_filters * 8, output_filters * 8)

        # UpSampling
        self.up1 = UNetUp(output_filters * 8, output_filters * 8)
        self.up2 = UNetUp(output_filters * 16, output_filters * 8)
        self.up3 = UNetUp(output_filters * 16, output_filters * 8)
        self.up4 = UNetUp(output_filters * 16, output_filters * 8)
        self.up5 = UNetUp(output_filters * 16, output_filters * 4)
        self.up6 = UNetUp(output_filters * 8, output_filters * 2)
        self.up7 = UNetUp(output_filters * 4, output_filters)

        self.last = nn.Sequential(
            Upsample(scale_factor=(2, 4)),
            ZeroPad2d((0, 0, 1, 0)),
            Conv2d(output_filters * 2,
                   output_channels,
                   kernel_size=(4, 4),
                   stride=1,
                   padding=(1, 0)),
            Sigmoid(),
        )
예제 #9
0
    def forward(self, data):
        x_name, x_behavior = data.split([1000, 4000], 1)

        x_name = self.embedder1(x_name)
        x_behavior = self.embedder2(x_behavior)

        x_name = x_name.unsqueeze(1)
        x_behavior = x_behavior.unsqueeze(1)

        pad = ZeroPad2d(padding=(0, 0, 2, 1))
        x_name_pad = pad(x_name)

        x_name_cnn1 = F.relu(self.cnn1_1(x_name)).squeeze(-1).permute(0, 2, 1)
        x_name_cnn2 = F.relu(self.cnn1_2(x_name_pad)).squeeze(-1).permute(
            0, 2, 1)
        x_name_cnn3 = F.relu(self.cnn1_3(x_name)).squeeze(-1).permute(0, 2, 1)

        x_behavior = F.relu(self.cnn2(x_behavior)).squeeze(-1).permute(0, 2, 1)

        x = torch.cat([x_name_cnn1, x_name_cnn2, x_name_cnn3, x_behavior],
                      dim=-1)

        x, (h_n, c_n) = self.lstm(x)

        x = h_n[-1, :, :]

        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=0.2, training=self.training)
        x = F.relu(self.lin2(x))
        x = F.dropout(x, p=0.2, training=self.training)
        x = F.sigmoid(self.lin3(x))

        return x
예제 #10
0
    def __init__(self, in_ch, out_ch):

        super(Upward, self).__init__()

        self.depool = Sequential(
            UpsamplingNearest2d(scale_factor=2), ZeroPad2d((1, 2, 1, 2)),
            Conv2d(in_ch, out_ch, kernel_size=4, stride=1), ReLU(inplace=True),
            InstanceNorm2d(out_ch))
예제 #11
0
 def end_block(self):
     return Sequential(
         Conv2d(in_channels=256, out_channels=256, kernel_size=(1, 8), groups=256),
         ZeroPad2d((4, 3, 0, 0)),
         Conv2d(in_channels=256, out_channels=512, kernel_size=(1, 1)),
         self.activation(),
         Gate(),
         Dropout(p=0.4)
     )
예제 #12
0
    def __init__(self, in_ch, out_ch, stride=2, normalise=True):

        super(ConvBlock, self).__init__()

        self.pool = Sequential(
            ZeroPad2d((1, 2, 1, 2)),
            Conv2d(in_ch, out_ch, kernel_size=4, stride=stride),
            LeakyReLU(negative_slope=0.2, inplace=True))

        if normalise:
            self.pool.add_module('instance_norm', InstanceNorm2d(out_ch))
예제 #13
0
    def __init__(self, in_ch, out_ch, normalise=True):

        super(Downward, self).__init__()

        self.pool = Sequential(ZeroPad2d((1, 2, 1, 2)),
                               Conv2d(in_ch, out_ch, kernel_size=4, stride=2),
                               LeakyReLU(negative_slope=0.2, inplace=True))

        if normalise:
            self.pool.add_module('batch_norm', BatchNorm2d(out_ch,
                                                           momentum=0.8))
예제 #14
0
    def __init__(self, base_filters=64):

        super(PatchGAN, self).__init__()

        self.down1 = ConvBlock(1, base_filters, normalise=False)
        self.down2 = ConvBlock(base_filters, 2 * base_filters)
        self.down3 = ConvBlock(2 * base_filters, 4 * base_filters)
        self.down4 = ConvBlock(4 * base_filters, 8 * base_filters)

        self.validity = Sequential(
            ZeroPad2d((1, 2, 1, 2)),
            Conv2d(8 * base_filters, 1, kernel_size=4, stride=1))
예제 #15
0
    def __init__(self, nb_classes, nb_channels, base_filters=16):

        super(PatchGAN, self).__init__()

        self.down1 = Downward(nb_classes + nb_channels,
                              base_filters,
                              normalise=False)
        self.down2 = Downward(base_filters, 2 * base_filters)
        self.down3 = Downward(2 * base_filters, 4 * base_filters)

        self.padding = ZeroPad2d((1, 2, 1, 2))
        self.validity = Sequential(
            Conv2d(4 * base_filters, 1, kernel_size=4, stride=1), Sigmoid())
def get_transform():

    transform = nn.Sequential(
        ZeroPad2d(150),
        RandomAffine(degrees=(-20, 20),
                     translate=(0.25, 0.25),
                     scale=(1.1, 1.5)),
    )

    def transform_fn(image, batch_size):
        b_image = image.repeat(batch_size, 1, 1, 1)
        return transform(b_image)

    return transform_fn
예제 #17
0
	def __init__(self, *args, **kwargs):
		super().__init__(*args, **kwargs)		
		
		self.data = self.data.unsqueeze(1).float().div(255)
		from torch.nn import ZeroPad2d
		pad = ZeroPad2d(2)
		self.data = torch.stack([pad(sample.data) for sample in self.data])

		self.targets = self.targets.long()

		self.data = self.data.sub_(self.data.mean()).div_(self.data.std())
		# self.data = self.data.sub_(0.1307).div_(0.3081)
		# Put both data and targets on GPU in advance
		self.data, self.targets = self.data, self.targets
		print('MNIST data shape {}, targets shape {}'.format(self.data.shape, self.targets.shape))
예제 #18
0
    def __init__(self, file_shape: tuple, output_filters=8):
        super(Discriminator, self).__init__()

        self.model = Sequential(
            *self.build_block(file_shape[0] * 2,
                              output_filters,
                              normalization=False),
            *self.build_block(output_filters, output_filters * 2),
            *self.build_block(output_filters * 2, output_filters * 4),
            *self.build_block(output_filters * 4, output_filters * 8),
            ZeroPad2d((0, 0, 1, 0)),
            Conv2d(output_filters * 8,
                   1,
                   kernel_size=(4, 1),
                   padding=(1, 0),
                   stride=1),
        )
예제 #19
0
    def __init__(self, nb_classes, nb_channels, base_filters=32):

        super(Generator, self).__init__()

        self.down1 = Downward(nb_classes, base_filters, normalise=False)
        self.down2 = Downward(base_filters, 2 * base_filters)
        self.down3 = Downward(2 * base_filters, 4 * base_filters)
        self.down4 = Downward(4 * base_filters, 8 * base_filters)

        self.up1 = Upward(8 * base_filters, 4 * base_filters)
        self.up2 = Upward(8 * base_filters, 2 * base_filters)
        self.up3 = Upward(4 * base_filters, base_filters)

        self.out_conv = Sequential(
            UpsamplingNearest2d(scale_factor=2), ZeroPad2d((1, 1, 1, 1)),
            Conv2d(2 * base_filters, nb_channels, kernel_size=3, stride=1),
            Tanh())
    def forward(ctx, input, selector, weight, bias, stride=1):
        # Pre-pad the input.
        input = ZeroPad2d(weight.shape[-1] // 2)(input)

        # Build hard attention mask from selector input
        b, s, h, w = selector.shape

        mask = selector.argmax(dim=1).int()
        import switched_conv_cuda_naive
        output = switched_conv_cuda_naive.forward(input, mask, weight, bias,
                                                  stride)

        ctx.stride = stride
        ctx.breadth = s
        ctx.save_for_backward(
            *[input, output.detach().clone(), mask, weight, bias])
        return output
예제 #21
0
    def create_module(self):
        dims = list(self.dims)  # convert or copy
        if isinstance(self.kernel_size, Iterable):
            kernel_sizes = self.kernel_size
        else:
            kernel_sizes = [
                self.kernel_size,
            ] * len(dims)
        if isinstance(self.activation, Iterable):
            activations = self.activation
        else:
            activations = [
                self.activation,
            ] * len(dims)
        if isinstance(self.dropout, Iterable):
            dropouts = self.dropout
        else:
            dropouts = [
                self.dropout,
            ] * len(dims)

        dims.insert(0, prod(self.pre_dim))
        for i in range(len(dims) - 1):
            if dims[i + 1] is None:
                dims[i + 1] = dims[i]
        layers = []
        for dim_in, dim_out, kernel_size, activation, dropout in zip(
                dims[:-1], dims[1:], kernel_sizes, activations, dropouts):
            layers.append(Permute(0, 3, 1, 2))  # (b,l,l,c) -> (b,c,l,l)
            padding_t = padding_l = int(np.floor((float(kernel_size) - 1) / 2))
            padding_b = padding_r = int(np.ceil((float(kernel_size) - 1) / 2))
            layers.append(
                ZeroPad2d(padding=(padding_l, padding_r, padding_t,
                                   padding_b)))
            layers.append(
                Conv2d(in_channels=dim_in,
                       out_channels=dim_out,
                       kernel_size=kernel_size))
            layers.append(Permute(0, 2, 3, 1))  # (b,c,l,l) -> (b,l,l,c)
            if activation is not None:
                layers.append(activation)
            layers.append(Dropout(dropout))

        module = Sequential(*layers)
        return module
예제 #22
0
    def __init__(self, input_size: int, output_filters: int, dropout=0.0):
        super(UNetUp, self).__init__()

        self.model = Sequential(
            Upsample(scale_factor=(2, 1)),
            ZeroPad2d((0, 0, 1, 0)),
            Conv2d(input_size,
                   output_filters,
                   kernel_size=(4, 1),
                   stride=1,
                   padding=(1, 0),
                   bias=False),
            ReLU(inplace=True),
            BatchNorm2d(output_filters, momentum=0.8),
        )

        if dropout:
            self.model.add_module("Dropout", Dropout(dropout))
    def __init__(self, pad_size, pre_pad=False, pad_mode='zero'):
        """
            Padding which allows to simultaneously pad in a reflection fashion
            and map to complex.
            Parameters
            ----------
            pad_size : int
                size of padding to apply.
            pre_pad : boolean
                if set to true, then there is no padding, one simply adds the imaginarty part.
        """
        self.pre_pad = pre_pad
        if pad_mode == 'Reflect':
#            print('use reflect pad')
            self.padding_module = ReflectionPad2d(pad_size)
        else:
#            print('use zero pad')
            self.padding_module = ZeroPad2d(pad_size)
예제 #24
0
    def init_network(self):
        """Initialize network parameters. This is an actor-critic build on top of a RNN cell. The
        actor is a fully connected layer, and the critic consists of two fully connected layers"""
        self.rnn = LSTMCell(self.action_space, self.hidden_size)
        for p in self.rnn.parameters():
            uniform_(p, self.uniform_init[0], self.uniform_init[1])

        self.actor = Linear(self.hidden_size, self.action_space)
        for p in self.actor.parameters():
            uniform_(p, self.uniform_init[0], self.uniform_init[1])

        self.middle_critic = Linear(self.hidden_size, self.hidden_size // 2)
        for p in self.middle_critic.parameters():
            uniform_(p, self.uniform_init[0], self.uniform_init[1])

        self.critic = Linear(self.hidden_size // 2, 1)
        for p in self.critic.parameters():
            uniform_(p, self.uniform_init[0], self.uniform_init[1])

        self.encoder = resnet34(**{"num_classes": self.embedding})

        self.padding = ZeroPad2d((30, 20, 0, 0))
예제 #25
0
def data(device="cpu"):
    N, C, Hin, Win = 100, 10, 32, 32
    padding = [1,2,3,4]
    Hout = Hin + padding[2] + padding[3]
    Wout = Win + padding[0] + padding[1]

    X = randn(N, C, Hin, Win, requires_grad=True, device=device)
    module = extend(ZeroPad2d(padding)).to(device=device)
    out = module(X)

    vout = randn(N, C, Hin, Win, device=device)
    vin = randn(N, C, Hout, Wout, device=device)

    return {
        "X": X,
        "module": module,
        "output": out,
        "vout_ag": vout,
        "vout_bp": vout.view(N, -1, 1),
        "vin_ag": vin,
        "vin_bp": vin.view(N, -1, 1),
    }
예제 #26
0
    def __init__(self, base_filters=32):

        super(Generator, self).__init__()

        self.in_conv = Sequential(
            ConvBlock(1, base_filters, stride=1),
            ConvBlock(base_filters, base_filters, stride=1))

        self.down1 = ConvBlock(base_filters, 2 * base_filters)
        self.down2 = ConvBlock(2 * base_filters, 4 * base_filters)
        self.down3 = ConvBlock(4 * base_filters, 8 * base_filters)
        self.down4 = ConvBlock(8 * base_filters, 8 * base_filters)

        self.up1 = Upward(8 * base_filters, 8 * base_filters)
        self.up2 = Upward(16 * base_filters, 4 * base_filters)
        self.up3 = Upward(8 * base_filters, 2 * base_filters)
        self.up4 = Upward(4 * base_filters, base_filters)

        self.out_conv = Sequential(
            ConvBlock(2 * base_filters, base_filters, stride=1),
            ZeroPad2d((1, 2, 1, 2)),
            Conv2d(base_filters, 1, kernel_size=4, stride=1), Tanh())
예제 #27
0
    def build_layer_dict(self) -> OrderedDict:
        """Compiles a block-specific dictionary of network layers.
        This could be overwritten by derived layers
        (e.g. to get a 'BatchNormalizedConvolutionBlock').

        :return: Ordered dictionary of torch modules [str, nn.Module]
        """
        layer_dict = OrderedDict()

        # treat first layer
        layer_dict["conv_0"] = self.convolution_nn(in_channels=self.input_channels,
                                                   out_channels=self.hidden_channels[0],
                                                   kernel_size=self.hidden_kernels[0],
                                                   stride=self.hidden_strides[0],
                                                   padding=self.hidden_padding[0],
                                                   dilation=self.hidden_dilations[0],
                                                   padding_mode=self.padding_mode)
        layer_dict[f"{self.non_lin.__name__}_0"] = self.non_lin()

        # treat remaining layers
        for ii in range(1, len(self.hidden_channels)):
            padding_to_use = self.hidden_padding[ii]
            if isinstance(self.hidden_padding[ii], list) and len(self.hidden_padding[ii]) == 4 and \
                    self.padding_mode == 'zeros':
                layer_dict[f'padding_{ii}'] = ZeroPad2d(self.hidden_padding[ii])
                padding_to_use = 0

            layer_dict[f"conv_{ii}"] = self.convolution_nn(in_channels=self.hidden_channels[ii - 1],
                                                           out_channels=self.hidden_channels[ii],
                                                           kernel_size=self.hidden_kernels[ii],
                                                           stride=self.hidden_strides[ii],
                                                           padding=padding_to_use,
                                                           dilation=self.hidden_dilations[ii],
                                                           padding_mode=self.padding_mode)
            layer_dict[f"{self.non_lin.__name__}_{ii}"] = self.non_lin()

        return layer_dict
예제 #28
0
    def generate(self,
                 phonemes,
                 len_phonemes,
                 steps=False,
                 window=3,
                 spectrograms=None):
        """Sequentially generate spectrogram from phonemes

        If spectrograms are provided, they are used on input instead of self-generated frames (teacher forcing)
        If steps are provided with spectrograms, only 'steps' frames will be generated in supervised fashion
        Uses layer-level caching for faster inference.

        :param phonemes: Padded phoneme indices
        :param len_phonemes: Length of each sentence in `phonemes` (list of lengths)
        :param steps: How many steps to generate
        :param window: Window size for attention masking
        :param spectrograms: Padded spectrograms
        :return: Generated spectrograms
        """
        self.generating(True)
        self.train(False)

        assert steps or (spectrograms is not None)
        steps = steps if steps else spectrograms.shape[1]

        with torch.no_grad():
            phonemes = torch.as_tensor(phonemes)
            keys, values = self.txt_encoder(phonemes)

            if hp.positional_encoding:
                keys += positional_encoding(keys.shape[-1],
                                            keys.shape[1],
                                            w=hp.w).to(self.device)
                pe = positional_encoding(hp.channels, steps,
                                         w=1).to(self.device)

            if spectrograms is None:
                dec = torch.zeros(len(phonemes),
                                  1,
                                  hp.out_channels,
                                  device=self.device)
            else:
                input = ZeroPad2d((0, 0, 1, 0))(spectrograms)[:, :-1, :]

            weights, decoded = None, None

            if window is not None:
                shape = (len(phonemes), 1, phonemes.shape[-1])
                idx = torch.zeros(len(phonemes), 1,
                                  phonemes.shape[-1]).to(phonemes.device)
                att_mask = idx_mask(shape, idx, window)
            else:
                att_mask = mask(shape=(len(phonemes), 1, keys.shape[1]),
                                lengths=len_phonemes,
                                dim=-1).to(self.device)

            for i in range(steps):
                if spectrograms is None:
                    queries = self.audio_encoder(dec)
                else:
                    queries = self.audio_encoder(input[:, i:i + 1, :])

                if hp.positional_encoding:
                    queries += pe[i]

                att, w = self.attention(queries, keys, values, att_mask)
                dec = self.audio_decoder(att + queries)
                weights = w if weights is None else torch.cat(
                    (weights, w), dim=1)
                decoded = dec if decoded is None else torch.cat(
                    (decoded, dec), dim=1)
                if window is not None:
                    idx = torch.argmax(w, dim=-1).unsqueeze(2).float()
                    att_mask = idx_mask(shape, idx, window)

        self.generating(False)
        return decoded, weights
예제 #29
0
         in_channels=32,
         out_channels=64,
         kernel_size=2,
         stride=2,
         max_alpha=0.),  # 1
]


def make_info_layer(idx):
    return (INFO_ARGS[idx]['output_size'],
            InformationDropoutLayer(Conv2d, **INFO_ARGS[idx]))


SHAPES_LAYERS = [  # (output_shape, layer) tuples. Cross-checked in `.forward`.
    ((2, 75, 75), None),  # Input layer
    ((2, 76, 76), ZeroPad2d((0, 1, 0, 1))),
    make_info_layer(0),
    # ((32, 36, 72), ZeroPad2d((0, 1, 0, 1))),
    ((32, 19, 19), MaxPool2d(kernel_size=2, stride=2)),
    ((32, 20, 20), ZeroPad2d((0, 1, 0, 1))),
    make_info_layer(1),
    ((64, 5, 5), MaxPool2d(kernel_size=2, stride=2)),
    ((1600, ), Flatten()),
    ((2, ), Linear(in_features=1600, out_features=2)),
]


class BentesModel(Module):
    """Implementation of Bentes et.al 2016"""
    def __init__(self) -> None:
        super(BentesModel, self).__init__()
예제 #30
0
 def __init__(self, in_f, f):
     super(ConvStride2NormRelu, self).__init__()
     
     self.zero = ZeroPad2d((1, 0, 1, 0))
     self.conv = Conv2d(in_f, f, 3, stride=2, bias=False)
     self.bnorm = BatchNorm2d(f, eps=1e-03)