Beispiel #1
0
    def __init__(self,
                 size_vocab,
                 size,
                 depth=1,
                 recur_depth=1,
                 bidirectional=False,
                 filter_length=6,
                 filter_size=64,
                 stride=2,
                 drop_i=0.75,
                 drop_s=0.25):
        super(Encoder, self).__init__()
        util.autoassign(locals())
        self.h0 = torch.autograd.Variable(torch.zeros(self.depth, 1,
                                                      self.size))

        self.Conv = conv.Convolution1D(self.size_vocab,
                                       self.filter_length,
                                       self.filter_size,
                                       stride=self.stride)
        # self.RNN = nn.GRU(self.filter_size, self.size, self.depth, batch_first=True)
        self.RNN = stacked_gru.StackedGRU(self.filter_size,
                                          self.size,
                                          self.depth,
                                          bidirectional=bidirectional,
                                          residual=True,
                                          batch_first=True)
Beispiel #2
0
    def __init__(self,
                 size_vocab,
                 size,
                 depth=1,
                 recur_depth=1,
                 filter_length=6,
                 filter_size=64,
                 stride=2,
                 drop_i=0.75,
                 drop_s=0.25,
                 residual=False,
                 seed=1):
        super(Encoder, self).__init__()
        util.autoassign(locals())
        #self.h0 = torch.autograd.Variable(torch.zeros(self.depth, 1, self.size))

        self.Conv = conv.Convolution1D(self.size_vocab,
                                       self.filter_length,
                                       self.filter_size,
                                       stride=self.stride)
        self.RHN = rhn.StackedRHNH0(self.filter_size,
                                    self.size,
                                    depth=self.depth,
                                    recur_depth=self.recur_depth,
                                    drop_i=self.drop_i,
                                    drop_s=self.drop_s,
                                    residual=self.residual,
                                    seed=self.seed)
Beispiel #3
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 num_layers,
                 residual=False,
                 bidirectional=False,
                 **kwargs):
        super(StackedGRU, self).__init__()
        assert num_layers > 0
        util.autoassign(locals())
        self.bottom = nn.GRU(input_size,
                             hidden_size,
                             1,
                             bidirectional=bidirectional,
                             **kwargs)
        self.layers = nn.ModuleList()
        if bidirectional:
            self.downscale = nn.Linear(hidden_size * 2, hidden_size)

        for i in range(num_layers - 1):
            layer = nn.GRU(hidden_size,
                           hidden_size,
                           1,
                           bidirectional=self.bidirectional,
                           **kwargs)
            self.layers.append(layer)
Beispiel #4
0
 def __init__(self, config):
     super(Segmatch, self).__init__()
     util.autoassign(locals())
     self.Encode = Encoder(**config['encoder'])        
     self.ProjBeg = nn.Linear(config['segmatch']['size'], config['segmatch']['size_target'])
     self.ProjEnd = nn.Linear(config['segmatch']['size'], config['segmatch']['size_target'])
     self.optimizer = optim.Adam(self.parameters(), lr=config['segmatch']['lr'])
Beispiel #5
0
 def __init__(self, size_feature, size, depth=1):
     super(Decoder, self).__init__()
     util.autoassign(locals())
     self.h0 = torch.autograd.Variable(torch.zeros(self.depth, 1,
                                                   self.size))
     self.RNN = nn.GRU(self.size, self.size, self.depth, batch_first=True)
     self.Proj = nn.Linear(self.size, self.size_feature)
Beispiel #6
0
 def __init__(self, size_in, length, size, stride=1, padding=None):
     super(Convolution1D, self).__init__()
     util.autoassign(locals())
     padding = padding if padding is not None else self.length
     self.Conv = nn.Conv1d(self.size_in, self.size, self.length, stride=self.stride, padding=padding, bias=False)
     # use Glorot uniform initialization
     self.Conv.weight.data = init.glorot_uniform((self.size, self.size_in, self.length, 1)).squeeze()
Beispiel #7
0
 def __init__(self, size):
     super(FixedZeros, self).__init__()
     util.autoassign(locals())
     self.zeros = torch.autograd.Variable(torch.zeros(self.size),
                                          requires_grad=True)
     if torch.cuda.is_available():
         self.zeros = self.zeros.cuda()
Beispiel #8
0
 def __init__(self,
              size_in,
              length,
              size,
              stride=1,
              padding=None,
              maxpool=False,
              relu=False):
     super(Convolution2D, self).__init__()
     util.autoassign(locals())
     padding = padding if padding is not None else self.length
     #self.Conv = nn.Conv2d(self.size_in, self.size, self.length,
     #                      stride=self.stride, padding=padding, bias=False)
     self.Conv = nn.Conv2d(self.size_in,
                           self.size,
                           self.length,
                           stride=self.stride,
                           padding=padding)
     # use Glorot uniform initialization
     # TODO: decide which initialization to use
     #self.Conv.weight.data = init.glorot_uniform((self.size, self.size_in,
     #                                             self.length, self.length))
     if self.relu:
         self.Relu = nn.ReLU(True)
     #FIXME what is the correct padding???
     if self.maxpool:
         self.Maxpool = nn.MaxPool2d(2, 2, ceil_mode=True)
Beispiel #9
0
 def __init__(self, in_size, out_size, bias_init=None, init_scale=0.04):
     super(Linear, self).__init__()
     util.autoassign(locals())
     self.w = torch.nn.Parameter(
         self.make_param((self.in_size, self.out_size), 'uniform'))
     if bias_init is not None:
         self.b = torch.nn.Parameter(
             self.make_param((self.out_size, ), self.bias_init))
Beispiel #10
0
 def __init__(self, size_in, size, depth=2, residual=False, fixed=False, **kwargs):
     super(StackedRHN, self).__init__()
     util.autoassign(locals())
     f = lambda x: Residual(x) if self.residual else x
     self.layers = torch.nn.ModuleList(
         [ f(RHNH0(self.size, self.size, fixed=self.fixed, **self.kwargs))  for _ in range(1,self.depth) ] )
     self.bottom = RHN(self.size_in, self.size, **self.kwargs)
     self.stack = reduce(lambda z, x: Compose(x, z), self.layers, Identity())
Beispiel #11
0
 def __init__(self, size, size_target_vocab, size_embed=64, depth=1):
     super(DecoderWithAttn, self).__init__()
     util.autoassign(locals())
     self.Decoder = SimpleDecoder(self.size_target_vocab,
                                  self.size,
                                  size_embed=self.size_embed,
                                  depth=self.depth)
     self.BAttn = BilinearAttention(self.size)
     self.Proj = nn.Linear(self.size * 2, self.size_target_vocab)
Beispiel #12
0
 def __init__(self, mapper, pad_end=False, visual=True, erasure=(5,5), sigma=None, noise_tied=False, midpoint=False):
     autoassign(locals())
     self.BEG = self.mapper.BEG_ID
     self.END = self.mapper.END_ID
     try:
         self.gap_low = self.erasure[0]
         self.gap_high = self.erasure[1]
     except:
         self.gap_low = self.erasure
         self.gap_high = self.erasure + 1
Beispiel #13
0
 def __init__(self, size_vocab, size, depth=1, dropout_p=0.0):
     super(SpeechEncoderBottomBidi, self).__init__()
     util.autoassign(locals())
     if self.depth > 0:
         self.Dropout = nn.Dropout(p=self.dropout_p)
         self.RNN = nn.GRU(self.size_vocab,
                           self.size,
                           self.depth,
                           batch_first=True,
                           bidirectional=True)
         self.Down = nn.Linear(self.size * 2, self.size)
Beispiel #14
0
 def __init__(self, size_vocab, size, depth=1, dropout_p=0.0):
     super(SpeechEncoderBottomNoConv, self).__init__()
     util.autoassign(locals())
     if self.depth > 0:
         self.h0 = torch.autograd.Variable(
             torch.zeros(self.depth, 1, self.size))
         self.Dropout = nn.Dropout(p=self.dropout_p)
         self.RNN = nn.GRU(self.size_vocab,
                           self.size,
                           self.depth,
                           batch_first=True)
Beispiel #15
0
 def __init__(self, size_in, length, size, stride=1):
     super(Convolution1D, self).__init__()
     util.autoassign(locals())
     self.Conv = nn.Conv1d(self.size_in,
                           self.size,
                           self.length,
                           stride=self.stride,
                           padding=self.length)
     # use Glorot uniform initialization
     self.Conv.weight.data = init.glorot_uniform(
         (self.size, self.size_in, self.length))
Beispiel #16
0
 def __init__(self,
              size_input,
              size,
              depth=1,
              size_attn=512,
              dropout_p=0.0):
     super(SpeechEncoderTopStack, self).__init__()
     util.autoassign(locals())
     if self.depth > 0:
         self.Dropout = nn.Dropout(p=self.dropout_p)
         self.RNN = GRUStack(self.size_input, self.size, self.depth)
     self.Attn = attention.SelfAttention(self.size, size=self.size_attn)
Beispiel #17
0
 def __init__(self, size_feature, size, size_embed=64, depth=1):
     super(SimpleDecoder, self).__init__()
     util.autoassign(locals())
     self.Embed = nn.Embedding(
         self.size_feature,
         self.size_embed)  # Why not share embeddings with encoder?
     self.h0 = torch.autograd.Variable(torch.zeros(self.depth, 1,
                                                   self.size))
     self.RNN = nn.GRU(self.size_embed,
                       self.size,
                       self.depth,
                       batch_first=True)
Beispiel #18
0
    def __init__(self, config):
        super(Audio, self).__init__()
        util.autoassign(locals())
        self.Encode = Encoder(**config['encoder'])

        self.Decode1 = Decoder(config['audio']['size_feature'],
                               config['audio']['size'])

        self.Decode3 = Decoder(config['audio']['size_feature'],
                               config['audio']['size'])
        self.optimizer = optim.Adam(self.parameters(),
                                    lr=config['audio']['lr'])
Beispiel #19
0
    def __init__(self, provider, tokenize=words, min_df=10, scale=True,
                 scale_input=False, scale_utt=False, batch_size=64,
                 shuffle=False, limit=None, limit_val=None, curriculum=False,
                 by_speaker=False, val_vocab=False, visual=True, erasure=5,
                 midpoint=False, sigma=None, noise_tied=False, speakers=None):
        autoassign(locals())
        self.data = {}
        self.mapper = IdMapper(min_df=self.min_df)
        self.scaler = StandardScaler() if scale else NoScaler()
        self.audio_scaler = InputScaler() if scale_input else NoScaler()
        self.speaker_encoder = LabelEncoder()
        parts = insideout(self.shuffled(arrange(
            provider.iterImages(split='train'),
            tokenize=self.tokenize,
            limit=limit,
            speakers=speakers)))
        parts_val = insideout(self.shuffled(arrange(
            provider.iterImages(split='val'),
            tokenize=self.tokenize,
            limit=limit_val)))
        # TRAINING
        if self.val_vocab:
            _ = list(self.mapper.fit_transform(parts['tokens_in'] + parts_val['tokens_in']))
            parts['tokens_in'] = self.mapper.transform(parts['tokens_in']) # FIXME UGLY HACK
        else:
            parts['tokens_in'] = self.mapper.fit_transform(parts['tokens_in'])

        parts['tokens_out'] = self.mapper.transform(parts['tokens_out'])
        parts['img'] = self.scaler.fit_transform(parts['img'])
        self.speaker_encoder.fit(parts['speaker']+parts_val['speaker'])
        parts['speaker_id'] = self.speaker_encoder.transform(parts['speaker'])
        if scale_input:
            parts['audio'] = self.audio_scaler.fit_transform(parts['audio'])
        elif scale_utt:
            parts['audio'] = scale_utterance(parts['audio'])

        self.data['train'] = outsidein(parts)

        # VALIDATION
        parts_val['tokens_in'] = self.mapper.transform(parts_val['tokens_in'])
        parts_val['tokens_out'] = self.mapper.transform(parts_val['tokens_out'])
        if self.visual:
            parts_val['img'] = self.scaler.transform(parts_val['img'])
        if scale_input:

            parts_val['audio'] = self.audio_scaler.transform(parts_val['audio'])
        elif scale_utt:
            parts_val['audio'] = scale_utterance(parts_val['audio'])
        parts_val['speaker_id'] = self.speaker_encoder.transform(parts_val['speaker'])
        self.data['valid'] = outsidein(parts_val)
        self.batcher = Batcher(self.mapper, pad_end=True, visual=visual,
                               erasure=erasure, sigma=sigma,
                               noise_tied=noise_tied, midpoint=midpoint)
Beispiel #20
0
    def __init__(self,
                 provider,
                 tokenize=words,
                 min_df=10,
                 scale=True,
                 scale_input=False,
                 batch_size=64,
                 shuffle=False,
                 limit=None,
                 curriculum=False,
                 val_vocab=False):
        autoassign(locals())
        self.data = {}
        self.mapper = IdMapper(min_df=self.min_df)
        self.scaler = StandardScaler() if scale else NoScaler()
        self.audio_scaler = InputScaler() if scale_input else NoScaler()

        parts = insideout(
            self.shuffled(
                arrange(provider.iterImages(split='train'),
                        tokenize=self.tokenize,
                        limit=limit)))
        parts_val = insideout(
            self.shuffled(
                arrange(provider.iterImages(split='val'),
                        tokenize=self.tokenize)))
        # TRAINING
        if self.val_vocab:
            _ = list(
                self.mapper.fit_transform(parts['tokens_in'] +
                                          parts_val['tokens_in']))
            parts['tokens_in'] = self.mapper.transform(
                parts['tokens_in'])  # FIXME UGLY HACK
        else:
            parts['tokens_in'] = self.mapper.fit_transform(parts['tokens_in'])

        parts['tokens_out'] = self.mapper.transform(parts['tokens_out'])
        parts['img'] = self.scaler.fit_transform(parts['img'])
        parts['audio'] = self.audio_scaler.fit_transform(parts['audio'])
        self.data['train'] = outsidein(parts)

        # VALIDATION
        parts_val['tokens_in'] = self.mapper.transform(parts_val['tokens_in'])
        parts_val['tokens_out'] = self.mapper.transform(
            parts_val['tokens_out'])
        parts_val['img'] = self.scaler.transform(parts_val['img'])
        parts_val['audio'] = self.audio_scaler.transform(parts_val['audio'])
        self.data['valid'] = outsidein(parts_val)
        self.batcher = Batcher(self.mapper, pad_end=False)
Beispiel #21
0
 def __init__(self,
              size_vocab,
              size,
              nb_conv_layer=1,
              depth=1,
              filter_length=6,
              filter_size=[64],
              stride=2,
              dropout_p=0.0,
              relu=False,
              maxpool=False,
              bidirectional=False):
     super(SpeechEncoderBottom, self).__init__()
     util.autoassign(locals())
     layers = []
     size_in = self.size_vocab
     for i_conv in range(0, self.nb_conv_layer):
         layers.append(
             conv.Convolution1D(size_in,
                                self.filter_length,
                                self.filter_size[i_conv],
                                stride=self.stride,
                                maxpool=self.maxpool))
         if self.relu:
             layers.append(nn.ReLU(True))
         size_in = self.filter_size[i_conv]
     self.Conv = nn.Sequential(*layers)
     if self.depth > 0:
         # TODO: LSTM/GRU?
         if self.bidirectional:
             self.h0 = torch.autograd.Variable(
                 torch.zeros(self.depth * 2, 1, self.size))
             self.c0 = torch.autograd.Variable(
                 torch.zeros(self.depth * 2, 1, self.size))
         else:
             self.h0 = torch.autograd.Variable(
                 torch.zeros(self.depth, 1, self.size))
             self.c0 = torch.autograd.Variable(
                 torch.zeros(self.depth, 1, self.size))
         self.Dropout = nn.Dropout(p=self.dropout_p)
         # TODO: LSTM/GRU?
         #self.RNN = nn.GRU(self.filter_size[self.nb_conv_layer - 1],
         #                  self.size, self.depth, batch_first=True,
         #                  bidirectional=self.bidirectional)
         self.RNN = nn.LSTM(self.filter_size[self.nb_conv_layer - 1],
                            self.size,
                            self.depth,
                            batch_first=True,
                            bidirectional=self.bidirectional)
Beispiel #22
0
 def __init__(self,
              in_size,
              out_size,
              bias_init='uniform',
              init_scale=0.04):
     super(Linear, self).__init__()
     util.autoassign(locals())
     self.layer = nn.Linear(in_size, out_size, bias=bias_init is not None)
     self.layer.weight.data.uniform_(-init_scale, init_scale)
     if isinstance(self.bias_init, numbers.Number):
         self.layer.bias.data.uniform_(bias_init)
     elif bias_init == 'uniform':
         self.layer.bias.data.uniform_(-init_scale, init_scale)
     else:
         raise AssertionError('unsupported init_scheme')
Beispiel #23
0
 def __init__(self,
              size_in,
              size,
              depth=2,
              residual=False,
              fixed=False,
              **kwargs):
     super(StackedRHNH0, self).__init__()
     util.autoassign(locals())
     self.layer = WithH0(StackedRHN(size_in,
                                    size,
                                    depth=depth,
                                    residual=residual,
                                    **kwargs),
                         fixed=fixed)
Beispiel #24
0
 def __init__(self, size_in, size, recur_depth=1, drop_i=0.75 , drop_s=0.25,
              init_T_bias=-2.0, init_H_bias='uniform', tied_noise=True, init_scale=0.04, seed=1):
     super(RHN, self).__init__()
     util.autoassign(locals())
     hidden_size = self.size
     self.LinearH = Linear(in_size=self.size_in, out_size=hidden_size, bias_init=self.init_H_bias)
     self.LinearT = Linear(in_size=self.size_in, out_size=hidden_size, bias_init=self.init_T_bias)
     self.recurH = nn.ModuleList()
     self.recurT = nn.ModuleList()
     for l in range(self.recur_depth):
         if l == 0:
             self.recurH.append(Linear(in_size=hidden_size, out_size=hidden_size))
             self.recurT.append(Linear(in_size=hidden_size, out_size=hidden_size))
         else:
             self.recurH.append(Linear(in_size=hidden_size, out_size=hidden_size, bias_init=self.init_H_bias))
             self.recurT.append(Linear(in_size=hidden_size, out_size=hidden_size, bias_init=self.init_T_bias))
Beispiel #25
0
 def __init__(self,
              size_feature,
              size,
              size_embed=64,
              depth=1,
              dropout_p=0.0):
     super(TextEncoderBottom, self).__init__()
     util.autoassign(locals())
     self.h0 = torch.autograd.Variable(torch.zeros(self.depth, 1,
                                                   self.size))
     self.Embed = nn.Embedding(self.size_feature, self.size_embed)
     self.Dropout = nn.Dropout(p=self.dropout_p)
     self.RNN = nn.GRU(self.size_embed,
                       self.size,
                       self.depth,
                       batch_first=True)
Beispiel #26
0
 def __init__(self, config):
     super(Audio, self).__init__()
     util.autoassign(locals())
     self.margin_size = config.get('margin_size', 0.2)
     # FIXME FIXME ADD gradient clipping!
     #self.make_updater = lambda: optim.Adam(self.parameters(), lr=config['lr'])
     self.max_norm = config['max_norm']
     self.Encode = Encoder(config['size_vocab'],
                           config['size'],
                           filter_length=config.get('filter_length', 6),
                           filter_size=config.get('filter_size', 1024),
                           stride=config.get('stride', 3),
                           depth=config.get('depth', 1))
     self.Attn = attention.SelfAttention(config['size'],
                                         size=config.get('size_attn', 512))
     self.ProjBeg = nn.Linear(config['size'], config['size_target'])
     self.ProjEnd = nn.Linear(config['size'], config['size_target'])
Beispiel #27
0
 def __init__(self,
              size_feature,
              size,
              depth=1,
              size_attn=512,
              dropout_p=0.0):
     super(TextEncoderTop, self).__init__()
     util.autoassign(locals())
     if self.depth > 0:
         self.h0 = torch.autograd.Variable(
             torch.zeros(self.depth, 1, self.size))
         self.Dropout = nn.Dropout(p=self.dropout_p)
         self.RNN = nn.GRU(self.size_feature,
                           self.size,
                           self.depth,
                           batch_first=True)
     self.Attn = attention.SelfAttention(self.size, size=self.size_attn)
Beispiel #28
0
 def __init__(self,
              size_vocab,
              size,
              depth=1,
              filter_length=6,
              filter_size=64,
              stride=2,
              dropout_p=0.0):
     super(SpeechEncoderBottomStack, self).__init__()
     util.autoassign(locals())
     self.Conv = conv.Convolution1D(self.size_vocab,
                                    self.filter_length,
                                    self.filter_size,
                                    stride=self.stride)
     if self.depth > 0:
         self.Dropout = nn.Dropout(p=self.dropout_p)
         self.RNN = GRUStack(self.filter_size, self.size, self.depth)
Beispiel #29
0
 def __init__(self,
              size_input,
              size,
              depth=1,
              size_attn=512,
              dropout_p=0.0):
     super(SpeechEncoderTopBidi, self).__init__()
     util.autoassign(locals())
     if self.depth > 0:
         self.Dropout = nn.Dropout(p=self.dropout_p)
         self.RNN = nn.GRU(self.size_input,
                           self.size,
                           self.depth,
                           batch_first=True,
                           bidirectional=True)
         self.Down = nn.Linear(self.size * 2, self.size)
     self.Attn = attention.SelfAttention(self.size, size=self.size_attn)
Beispiel #30
0
    def __init__(self, config):
        super(Audio, self).__init__()
        util.autoassign(locals())
        # FIXME FIXME ADD gradient clipping!
        #self.make_updater = lambda: optim.Adam(self.parameters(), lr=config['lr'])
        self.max_norm = config['max_norm']
        self.Encode = Encoder(config['size_vocab'],
                              config['size'],
                              filter_length=config.get('filter_length', 6),
                              filter_size=config.get('filter_size', 1024),
                              stride=config.get('stride', 3),
                              depth=config.get('depth', 1),
                              residual=config.get('residual', False))
        self.Attn = attention.SelfAttention(config['size'],
                                            size=config.get('size_attn', 512))

        self.Decode1 = Decoder(config['size_vocab'], config['size'])

        self.Decode3 = Decoder(config['size_vocab'], config['size'])