Example #1
0
 def iter_valid_batches(self):
     for bunch in util.grouper(self.data['valid'], self.batch_size * 20):
         bunch_sort = [
             bunch[i]
             for i in numpy.argsort([len(x['tokens_in']) for x in bunch])
         ]
         for item in util.grouper(bunch_sort, self.batch_size):
             yield self.batcher.batch(item)
Example #2
0
 def iter_train_batches(self, reshuffle=False):
     # sort data by length
     if self.curriculum:
         data = [self.data['train'][i] for i in numpy.argsort([len(x['tokens_in']) for x in self.data['train']])]
     else:
         data = self.data['train']
     if self.by_speaker:
         for x in randomized(by_speaker(self.batcher, data)):
             yield x
     else:                    
         if reshuffle:
             data = randomized(self.data['train'])
         for bunch in util.grouper(data, self.batch_size*20):
             bunch_sort = [ bunch[i] for i in numpy.argsort([len(x['tokens_in']) for x in bunch]) ]
             for item in util.grouper(bunch_sort, self.batch_size):
                 yield self.batcher.batch(item)
Example #3
0
def encode_sentences_SpeechText(task, audios, batch_size=128):                                                       
    def predict(x):                                                                                            
        return task.SpeechText.SpeechEncoderTop(task.SpeechText.SpeechEncoderBottom(x))
    return numpy.vstack([ predict(                                              
                torch.autograd.Variable(torch.from_numpy(
                         vector_padder(batch))).cuda()).data.cpu().numpy()
                               for batch in util.grouper(audios, batch_size) ])
Example #4
0
def encode_images(task, imgs, batch_size=128):
    """Project imgs to the joint space using model.
    """
    return numpy.vstack([ task.encode_images(
                            torch.autograd.Variable(torch.from_numpy(
                                numpy.vstack(batch))).cuda()).data.cpu().numpy()
                          for batch in util.grouper(imgs, batch_size) ])
Example #5
0
def encode_images(model, imgs, batch_size=128):
    """Project imgs to the joint space using model.
    """
    return numpy.vstack([
        model.task.encode_images(batch)
        for batch in util.grouper(imgs, batch_size)
    ])
Example #6
0
def encode_sentences(task, audios, batch_size=128):
    return numpy.vstack([
        task.predict(
            torch.autograd.Variable(torch.from_numpy(
                vector_padder(batch))).cuda()).data.cpu().numpy()
        for batch in util.grouper(audios, batch_size)
    ])
Example #7
0
 def iter_train_batches(self):
     # sort data by length
     if self.curriculum:
         data = [
             self.data['train'][i] for i in numpy.argsort(
                 [len(x['tokens_in']) for x in self.data['train']])
         ]
     else:
         data = self.data['train']
     for bunch in util.grouper(data, self.batch_size * 20):
         bunch_sort = [
             bunch[i]
             for i in numpy.argsort([len(x['tokens_in']) for x in bunch])
         ]
         for item in util.grouper(bunch_sort, self.batch_size):
             yield self.batcher.batch(item)
Example #8
0
def embed(net, audios, batch_size=32):
    """Return utterance embeddings for audio using the given net."""
    device = next(net.parameters()).device 
    out = [] 
    for batch in util.grouper(audios, batch_size): 
        for result in net.predict(torch.from_numpy(vector_padder(batch)).to(device)).cpu().numpy(): 
            out.append(result) 
    return np.stack(out)
Example #9
0
def iter_layer_states(model, audios, batch_size=128):
    """Pass audios through the model and for each audio return the state of each timestep and each layer."""

    lens = (numpy.array(map(len, audios)) +
            model.config['filter_length']) // model.config['stride']
    rs = (r for batch in util.grouper(audios, batch_size)
          for r in model.task.pile(vector_padder(batch)))
    for (r, l) in itertools.izip(rs, lens):
        yield r[-l:, :, :]
Example #10
0
def encode_texts(task, texts, batch_size=128):
    return numpy.vstack([
        task.TextImage.predict(
            torch.autograd.Variable(
                torch.from_numpy(
                    task.batcher.batch_inp(task.mapper.transform(
                        batch)).astype('int64'))).cuda()).data.cpu().numpy()
        for batch in util.grouper(texts, batch_size)
    ])
Example #11
0
def encode_sentences(model, audios, batch_size=128):
    """Project audios to the joint space using model.

    For each audio returns a vector.
    """
    return numpy.vstack([
        model.task.predict(vector_padder(batch))
        for batch in util.grouper(audios, batch_size)
    ])
Example #12
0
def encode_sentences(model, audios, batch_size=128):
    """Project audios to the joint space using model.

    For each audio returns a vector.
    """
    return numpy.vstack([
        model.task.predict(
            torch.autograd.Variable(torch.from_numpy(
                vector_padder(batch))).cuda()).data.cpu().numpy()
        for batch in util.grouper(audios, batch_size)
    ])
Example #13
0
def get_state_stack(net, audios, batch_size=128):
    """Pass audios through the model and for each audio return the state of each timestep and each layer."""
    device = next(net.parameters()).device
    result = []
    lens = inout(np.array(list(map(len, audios))))
    rs = (r for batch in util.grouper(audios, batch_size) 
                for r in state_stack(net, torch.from_numpy(vector_padder(batch)).to(device)).cpu().numpy()
         )
    for (r,l) in zip(rs, lens):
        result.append(r[-l:,:])
    return result
Example #14
0
def get_state_stack(net, audios, batch_size=128):
    import onion.util as util
    from vg.simple_data import vector_padder
    """Pass audios through the model and for each audio return the state of each timestep and each layer."""
    result = []
    lens = inout(np.array(list(map(len, audios))))
    rs = (r for batch in util.grouper(audios, batch_size) for r in state_stack(
        net,
        torch.from_numpy(vector_padder(batch)).cuda()).cpu().numpy())
    for (r, l) in zip(rs, lens):
        result.append(r[-l:, :])
    return result
Example #15
0
def by_speaker(batcher, data, batch_size=32):
    speaker = lambda x: x['speaker']
    for _, bunch in itertools.groupby(sorted(data, key=speaker), speaker):
        for item in util.grouper(bunch, batch_size):
            yield batcher.batch(item)