Example #1
0
    def open(self, maxlens, depth=None):
        """ """

        # create rnn embedding models if needed
        if len(self._embed_models) == 0:
            for idx, maxlen in enumerate(
                    maxlens):  # i.e. how many buckets there is
                if self.embed_model != None:
                    self._embed_models.append(
                        self.embed_model.from_configurable(self,
                                                           name='%s-%d' %
                                                           (self.name, idx))
                    )  # initialize embedding model, name is the bucket name
                else:
                    self._embed_models.append(None)

        self._indices = [(0, 0)]
        self._buckets = []
        self._len2idx = {}
        prevlen = -1
        for (idx, maxlen), bucket_embed_model in zip(enumerate(maxlens),
                                                     self._embed_models):
            self._buckets.append(
                Bucket.from_configurable(self,
                                         embed_model=bucket_embed_model,
                                         name='%s-%d' % (self.name, idx)).open(
                                             maxlen, depth=depth)
            )  # use the same rnn embedding model instead of creating a new one
            self._len2idx.update(
                list(
                    zip(list(range(prevlen + 1, maxlen + 1)),
                        [idx] * (maxlen - prevlen))))
            prevlen = maxlen
        return self
Example #2
0
    def open(self, maxlens, depth=None):
        """ """

        self._indices = [(0, 0)]
        self._buckets = []
        self._len2idx = {}
        prevlen = -1
        for idx, maxlen in enumerate(maxlens):
            self._buckets.append(
                Bucket.from_configurable(self,
                                         embed_model=self.embed_model,
                                         name='%s-%d' % (self.name, idx)).open(
                                             maxlen, depth=depth))
            self._len2idx.update(
                list(
                    zip(list(range(prevlen + 1, maxlen + 1)),
                        [idx] * (maxlen - prevlen))))
            prevlen = maxlen
        return self
Example #3
0
    def from_dataset(cls, dataset, *args, **kwargs):
        """ """

        multibucket = cls.from_configurable(dataset, *args, **kwargs)
        indices = []
        for multibucket_ in dataset:
            indices.append(multibucket_.indices)
        #for i in xrange(1, len(indices)):
        #  assert np.equal(indices[0].astype(int), indices[i].astype(int)).all()
        multibucket._indices = np.array(multibucket_.indices)
        buckets = [
            Bucket.from_dataset(dataset, i, *args, **kwargs)
            for i in range(len(multibucket_))
        ]
        multibucket._buckets = buckets
        if dataset.verbose:
            for bucket in multibucket:
                print('Bucket {name} is {shape}'.format(
                    name=bucket.name,
                    shape=ctext(
                        ' x '.join(str(x) for x in bucket.indices.shape),
                        'bright_blue')),
                      file=sys.stderr)
        return multibucket