Beispiel #1
0
    def convert(self, data):
        """Convert a 3D list to a DataSet object.

        :param data: A 3D tensor.
            [
                [ [premise_word_11, premise_word_12, ...], [hypothesis_word_11, hypothesis_word_12, ...], [label_1] ],
                [ [premise_word_21, premise_word_22, ...], [hypothesis_word_21, hypothesis_word_22, ...], [label_2] ],
                ...
            ]
        :return: data_set: A DataSet object.
        """

        data_set = DataSet()

        for example in data:
            p, h, l = example
            # list, list, str
            x1 = TextField(p, is_target=False)
            x2 = TextField(h, is_target=False)
            x1_len = TextField([1] * len(p), is_target=False)
            x2_len = TextField([1] * len(h), is_target=False)
            y = LabelField(l, is_target=True)
            instance = Instance()
            instance.add_field("premise", x1)
            instance.add_field("hypothesis", x2)
            instance.add_field("premise_len", x1_len)
            instance.add_field("hypothesis_len", x2_len)
            instance.add_field("truth", y)
            data_set.append(instance)

        return data_set
Beispiel #2
0
 def convert_for_infer(self, data, vocabs):
     for word_seq in data:
         # list
         x = TextField(word_seq, is_target=False)
         instance = Instance()
         instance.add_field("word_seq", x)
         self.append(instance)
     self.index_field("word_seq", vocabs["word_vocab"])
Beispiel #3
0
 def convert_with_vocabs(self, data, vocabs):
     for example in data:
         word_seq, label = example[0], example[1]
         # list, str
         x = TextField(word_seq, is_target=False)
         y = LabelField(label, is_target=True)
         instance = Instance()
         instance.add_field("word_seq", x)
         instance.add_field("label", y)
         self.append(instance)
     self.index_field("word_seq", vocabs["word_vocab"])
     self.index_field("label", vocabs["label_vocab"])
Beispiel #4
0
 def convert(self, data):
     for example in data:
         word_seq, label = example[0], example[1]
         # list, str
         self.word_vocab.update(word_seq)
         self.label_vocab.update(label)
         x = TextField(word_seq, is_target=False)
         y = LabelField(label, is_target=True)
         instance = Instance()
         instance.add_field("word_seq", x)
         instance.add_field("label", y)
         self.append(instance)
     self.index_field("word_seq", self.word_vocab)
     self.index_field("label", self.label_vocab)
Beispiel #5
0
def convert_seq2seq_dataset(data):
    """Convert list of data into DataSet

    :param data: list of list of strings, [num_examples, *].
            ::
            [
                [ [word_11, word_12, ...], [label_1, label_1, ...] ],
                [ [word_21, word_22, ...], [label_2, label_1, ...] ],
                ...
            ]

    :return: a DataSet.
    """
    dataset = DataSet()
    for sample in data:
        word_seq, label_seq = sample[0], sample[1]
        ins = Instance()
        ins.add_field("word_seq", TextField(word_seq, is_target=False)) \
            .add_field("label_seq", TextField(label_seq, is_target=True))
        dataset.append(ins)
    return dataset
Beispiel #6
0
    def convert(self, data):
        """Convert a 3D list to a DataSet object.

        :param data: A 3D tensor.
            Example::
                [
                    [ [premise_word_11, premise_word_12, ...], [hypothesis_word_11, hypothesis_word_12, ...], [label_1] ],
                    [ [premise_word_21, premise_word_22, ...], [hypothesis_word_21, hypothesis_word_22, ...], [label_2] ],
                    ...
                ]

        :return: A DataSet object.
        """

        data_set = DataSet()

        for example in data:
            p, h, l = example
            # list, list, str
            instance = Instance()
            instance.add_field("premise", p)
            instance.add_field("hypothesis", h)
            instance.add_field("truth", l)
            data_set.append(instance)
        data_set.apply(lambda ins: len(ins["premise"]), new_field_name="premise_len")
        data_set.apply(lambda ins: len(ins["hypothesis"]), new_field_name="hypothesis_len")
        data_set.set_input("premise", "hypothesis", "premise_len", "hypothesis_len")
        data_set.set_target("truth")
        return data_set
Beispiel #7
0
 def convert_with_vocabs(self, data, vocabs):
     for example in data:
         word_seq, label_seq = example[0], example[1]
         # list, list
         x = TextField(word_seq, is_target=False)
         x_len = LabelField(len(word_seq), is_target=False)
         y = TextField(label_seq, is_target=False)
         instance = Instance()
         instance.add_field("word_seq", x)
         instance.add_field("truth", y)
         instance.add_field("word_seq_origin_len", x_len)
         self.append(instance)
     self.index_field("word_seq", vocabs["word_vocab"])
     self.index_field("truth", vocabs["label_vocab"])
Beispiel #8
0
    def convert_to_dataset(self, data, vocab, label_vocab):
        """Convert list of indices into a DataSet object.

        :param data: list. Entries are strings.
        :param vocab: a dict, mapping string (token) to index (int).
        :param label_vocab: a dict, mapping string (label) to index (int).
        :return data_set: a DataSet object
        """
        use_word_seq = False
        use_label_seq = False
        use_label_str = False

        # construct a DataSet object and fill it with Instances
        data_set = DataSet()
        for example in data:
            words, label = example[0], example[1]
            instance = Instance()

            if isinstance(words, list):
                x = TextField(words, is_target=False)
                instance.add_field("word_seq", x)
                use_word_seq = True
            else:
                raise NotImplementedError("words is a {}".format(type(words)))

            if isinstance(label, list):
                y = TextField(label, is_target=True)
                instance.add_field("label_seq", y)
                use_label_seq = True
            elif isinstance(label, str):
                y = LabelField(label, is_target=True)
                instance.add_field("label", y)
                use_label_str = True
            else:
                raise NotImplementedError("label is a {}".format(type(label)))
            data_set.append(instance)

        # convert strings to indices
        if use_word_seq:
            data_set.index_field("word_seq", vocab)
        if use_label_seq:
            data_set.index_field("label_seq", label_vocab)
        if use_label_str:
            data_set.index_field("label", label_vocab)

        return data_set
Beispiel #9
0
    def convert(self, data):
        """Convert lists of strings into Instances with Fields.

        :param data: 3-level lists. Entries are strings.
        """
        bar = ProgressBar(total=len(data))
        for example in data:
            word_seq, label_seq = example[0], example[1]
            # list, list
            self.word_vocab.update(word_seq)
            self.label_vocab.update(label_seq)
            x = TextField(word_seq, is_target=False)
            x_len = LabelField(len(word_seq), is_target=False)
            y = TextField(label_seq, is_target=False)
            instance = Instance()
            instance.add_field("word_seq", x)
            instance.add_field("truth", y)
            instance.add_field("word_seq_origin_len", x_len)
            self.append(instance)
            bar.move()
        self.index_field("word_seq", self.word_vocab)
        self.index_field("truth", self.label_vocab)
Beispiel #10
0
 def test_add_field(self):
     fields = {"x": [1, 2, 3], "y": [4, 5, 6]}
     ins = Instance(**fields)
     ins.add_field("z", [1, 1, 1])
     fields.update({"z": [1, 1, 1]})
     self.assertEqual(ins.fields, fields)