def open(self, maxlens, depth=None): """ """ # create rnn embedding models if needed if len(self._embed_models) == 0: for idx, maxlen in enumerate( maxlens): # i.e. how many buckets there is if self.embed_model != None: self._embed_models.append( self.embed_model.from_configurable(self, name='%s-%d' % (self.name, idx)) ) # initialize embedding model, name is the bucket name else: self._embed_models.append(None) self._indices = [(0, 0)] self._buckets = [] self._len2idx = {} prevlen = -1 for (idx, maxlen), bucket_embed_model in zip(enumerate(maxlens), self._embed_models): self._buckets.append( Bucket.from_configurable(self, embed_model=bucket_embed_model, name='%s-%d' % (self.name, idx)).open( maxlen, depth=depth) ) # use the same rnn embedding model instead of creating a new one self._len2idx.update( list( zip(list(range(prevlen + 1, maxlen + 1)), [idx] * (maxlen - prevlen)))) prevlen = maxlen return self
def open(self, maxlens, depth=None): """ """ self._indices = [(0, 0)] self._buckets = [] self._len2idx = {} prevlen = -1 for idx, maxlen in enumerate(maxlens): self._buckets.append( Bucket.from_configurable(self, embed_model=self.embed_model, name='%s-%d' % (self.name, idx)).open( maxlen, depth=depth)) self._len2idx.update( list( zip(list(range(prevlen + 1, maxlen + 1)), [idx] * (maxlen - prevlen)))) prevlen = maxlen return self
def from_dataset(cls, dataset, *args, **kwargs): """ """ multibucket = cls.from_configurable(dataset, *args, **kwargs) indices = [] for multibucket_ in dataset: indices.append(multibucket_.indices) #for i in xrange(1, len(indices)): # assert np.equal(indices[0].astype(int), indices[i].astype(int)).all() multibucket._indices = np.array(multibucket_.indices) buckets = [ Bucket.from_dataset(dataset, i, *args, **kwargs) for i in range(len(multibucket_)) ] multibucket._buckets = buckets if dataset.verbose: for bucket in multibucket: print('Bucket {name} is {shape}'.format( name=bucket.name, shape=ctext( ' x '.join(str(x) for x in bucket.indices.shape), 'bright_blue')), file=sys.stderr) return multibucket