Beispiel #1
0
class Embedder:
    def __init__(self, max_seq_length, batch_size=32):
        self.embedder = BertEmbedding(max_seq_length)
        self.batch_size = batch_size

    def fit(self, X, y):
        return self

    def transform(self, X):
        #result = []
        #for i, row in enumerate(X):
        #    embedding = self.embedder.project(row)
        #    result.append(embedding)

        # batching
        result = []
        i = 0

        while i < len(X):
            print("start processing {} / {}".format(i, len(X)))
            batch = X[i:(i + self.batch_size)]
            embedding = self.embedder.project_batch(batch)

            result += embedding

            i += self.batch_size

        return np.array(result)
Beispiel #2
0
class Embedder:
    def __init__(self, max_seq_length, batch_size):
        self.batch_size = batch_size
        if batch_size == 1:
            self.embedder = BertEmbedding(512)
            print("seq length set to Bert maximum 512 when batch size is 1")
        else:
            self.embedder = BertEmbedding(max_seq_length)

    def fit(self, X, y):
        return self

    def transform(self, X):
        if self.batch_size == 1:
            return self.__transform_in_single(X)
        else:
            return self.__transform_in_batch(X)

    def __transform_in_single(self, X):
        result = []
        for i, row in enumerate(X):
            embedding = self.embedder.project(row)
            result.append(embedding)
        return np.array(result)

    def __transform_in_batch(self, X):
        result = []
        i = 0

        while i < len(X):
            print("start processing {} / {}".format(i, len(X)))
            batch = X[i:(i + self.batch_size)]
            embedding = self.embedder.project_batch(batch)

            result += embedding

            i += self.batch_size

        return np.array(result)