Ejemplos de WordCodec en Python

Lenguaje de programación: Python

Namespace/Package Name: training.dataprep.codecs.word_codec

Clase / Tipo: WordCodec

Ejemplos en hotexamples.com: 7

Python WordCodec - 7 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de training.dataprep.codecs.word_codec.WordCodec extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

WordCodec(5)

encode(5)

decode(4)

Métodos usados con frecuencia

WordCodec (5)

encode (5)

decode (4)

Ejemplo n.º 1

Mostrar archivo

    def test_extra_encoding(self):
        initial_text = "this is the string the"
        extra_text = "more to the encoding"
        codec = WordCodec()

        codec.encode(initial_text)
        encoded = codec.encode(extra_text)

        assert encoded == 1

Ejemplo n.º 2

Mostrar archivo

    def __init__(self, file_path: str = None):
        self.file_path = file_path
        self.textCodec = TextCodec()
        self.labelCodec = WordCodec()

        # build the word space by encoding everything
        for (text, label) in self.text_label_from_file(file_path):
            self.textCodec.encode(text)
            self.labelCodec.encode(label)

        self.matrix_iden = numpy.identity(len(self.labelCodec.encode_map))

Ejemplo n.º 3

Mostrar archivo

class AirbnbExample(DataSource):
    MAX_LEN = 128

    def __init__(self, file_path: str = None):
        self.file_path = file_path
        self.textCodec = TextCodec()
        self.labelCodec = WordCodec()

        # build the word space by encoding everything
        for (text, label) in self.text_label_from_file(file_path):
            self.textCodec.encode(text)
            self.labelCodec.encode(label)

        self.matrix_iden = numpy.identity(len(self.labelCodec.encode_map))

    def labels(self):
        for (_, label) in self.text_label_from_file(self.file_path):
            yield self.label_representation(label)

    def texts(self):
        for (text, _) in self.text_label_from_file(self.file_path):
            yield self.text_representation(text)

    def label_representation(self, label: str) -> numpy.ndarray:
        encoded = self.labelCodec.encode(label)
        return self.matrix_iden[encoded]

    def text_representation(self, text: str) -> numpy.ndarray:
        pre_pad = self.textCodec.encode(text)
        if len(pre_pad) >= self.MAX_LEN:
            return pre_pad[:self.MAX_LEN]
        else:
            return numpy.pad(pre_pad, (0, self.MAX_LEN - len(pre_pad)))

    def text_label_from_file(self, file_path):
        with open(self.file_path) as csv_file:
            reader = csv.reader(csv_file)
            for (text, label) in reader:
                yield (text, label)

    def read_prediction(self, single_prediction: numpy.ndarray):
        max_val = single_prediction.max()
        max_index = numpy.argmax(single_prediction)

        return (self.labelCodec.decode(max_index), max_val)

Ejemplo n.º 4

Mostrar archivo

class TextCodec(codecs.Codec):
    """
    Encodes a string of distinct words

    Encodes a string by splitting by word into an array
    """
    PADDING = "<PAD>"

    def __init__(self):
        self.word_codec = WordCodec()
        self.word_codec.encode(self.PADDING)  # 0 reserved is for padding

    def encode(self, input, errors='strict') -> str:
        words = input.split(' ')
        return [self.word_codec.encode(word) for word in words]

    def decode(self, input, errors='strict') -> str:
        return ' '.join([self.word_codec.decode(number) for number in input])

Ejemplo n.º 5

Mostrar archivo

 def test_initial_encoding(self):
     text = "this is the string the"
     output = 0
     codec = WordCodec()
     assert codec.encode(text) == output

Ejemplo n.º 6

Mostrar archivo

 def test_initial_decoding(self):
     text = "this is the string the"
     codec = WordCodec()
     encoded = codec.encode(text)
     assert codec.decode(encoded) == text

Ejemplo n.º 7

Mostrar archivo

 def __init__(self):
     self.word_codec = WordCodec()
     self.word_codec.encode(self.PADDING)  # 0 reserved is for padding