Python DataSet.apply Examples

Programming Language: Python

Namespace/Package Name: fastNLP.core.dataset

Class/Type: DataSet

Method/Function: apply

Examples at hotexamples.com: 8

Python DataSet.apply - 8 examples found. These are the top rated real world Python examples of fastNLP.core.dataset.DataSet.apply extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DataSet(30)

append(30)

add_field(11)

set_input(11)

set_target(9)

apply(8)

index_field(5)

get_field(2)

read_csv(2)

rename_field(2)

delete_field(1)

drop(1)

get_all_fields(1)

get_target_name(1)

load(1)

read_tokenize(1)

save(1)

add_seq_len(1)

get_input_name(1)

Example #1

Show file

File: dataset_loader.py Project: shlpu/fastNLP

    def convert(self, data):
        """Convert a 3D list to a DataSet object.

        :param data: A 3D tensor.
            Example::
                [
                    [ [premise_word_11, premise_word_12, ...], [hypothesis_word_11, hypothesis_word_12, ...], [label_1] ],
                    [ [premise_word_21, premise_word_22, ...], [hypothesis_word_21, hypothesis_word_22, ...], [label_2] ],
                    ...
                ]

        :return: A DataSet object.
        """

        data_set = DataSet()

        for example in data:
            p, h, l = example
            # list, list, str
            instance = Instance()
            instance.add_field("premise", p)
            instance.add_field("hypothesis", h)
            instance.add_field("truth", l)
            data_set.append(instance)
        data_set.apply(lambda ins: len(ins["premise"]), new_field_name="premise_len")
        data_set.apply(lambda ins: len(ins["hypothesis"]), new_field_name="hypothesis_len")
        data_set.set_input("premise", "hypothesis", "premise_len", "hypothesis_len")
        data_set.set_target("truth")
        return data_set

Example #2

Show file

File: dataset_loader.py Project: shlpu/fastNLP

 def convert(self, data):
     data_set = DataSet()
     for item in data:
         sent_words, sent_pos_tag = item[0], item[1]
         data_set.append(Instance(words=sent_words, tags=sent_pos_tag))
     data_set.apply(lambda ins: len(ins), new_field_name="seq_len")
     data_set.set_target("tags")
     data_set.set_input("sent_words")
     data_set.set_input("seq_len")
     return data_set

Example #3

Show file

    def test_apply(self):
        ds = DataSet({"x": [[1, 2, 3, 4]] * 40, "y": [[5, 6]] * 40})
        ds.apply(lambda ins: ins["x"][::-1], new_field_name="rx")
        self.assertTrue("rx" in ds.field_arrays)
        self.assertEqual(ds.field_arrays["rx"].content[0], [4, 3, 2, 1])

        ds.apply(lambda ins: len(ins["y"]), new_field_name="y")
        self.assertEqual(ds.field_arrays["y"].content[0], 2)

        res = ds.apply(lambda ins: len(ins["x"]))
        self.assertTrue(isinstance(res, list) and len(res) > 0)
        self.assertTrue(res[0], 4)

Example #4

Show file

    def predict(self, content):
        if not hasattr(self, 'pipeline'):
            raise ValueError("You have to load model first.")

        # 1. 利用POS得到分词和pos tagging结果
        pos_out = self.pos_tagger.predict(content)
        # pos_out = ['这里/NN 是/VB 分词/NN 结果/NN'.split()]

        # 2. 组建dataset
        dataset = DataSet()
        dataset.add_field('wp', pos_out)
        dataset.apply(lambda x: ['<BOS>'] + [w.split('/')[0] for w in x['wp']],
                      new_field_name='words')
        dataset.apply(lambda x: ['<BOS>'] + [w.split('/')[1] for w in x['wp']],
                      new_field_name='pos')
        dataset.rename_field("words", "raw_words")

        # 3. 使用pipeline
        self.pipeline(dataset)
        dataset.apply(lambda x: [str(arc) for arc in x['arc_pred']],
                      new_field_name='arc_pred')
        dataset.apply(lambda x: [
            arc + '/' + label
            for arc, label in zip(x['arc_pred'], x['label_pred_seq'])
        ][1:],
                      new_field_name='output')
        # output like: [['2/top', '0/root', '4/nn', '2/dep']]
        return dataset.field_arrays['output'].content

Example #5

Show file

File: dataset_loader.py Project: wzhystar/fastNLP

 def convert(self, data):
     data_set = DataSet()
     for item in data:
         sent_words = item[0]
         if self.pos is True and self.ner is True:
             instance = Instance(words=sent_words,
                                 pos_tags=item[1],
                                 ner=item[2])
         elif self.pos is True:
             instance = Instance(words=sent_words, pos_tags=item[1])
         elif self.ner is True:
             instance = Instance(words=sent_words, ner=item[1])
         else:
             instance = Instance(words=sent_words)
         data_set.append(instance)
     data_set.apply(lambda ins: len(ins["words"]), new_field_name="seq_len")
     return data_set

Example #6

Show file

File: api.py Project: huziye/fastNLP_fork

    def predict(self, content):
        """

        :param content: list of list of str. Each string is a token(word).
        :return answer: list of list of str. Each string is a tag.
        """
        if not hasattr(self, "pipeline"):
            raise ValueError("You have to load model first.")

        sentence_list = []
        # 1. 检查sentence的类型
        if isinstance(content, str):
            sentence_list.append(content)
        elif isinstance(content, list):
            sentence_list = content

        # 2. 组建dataset
        dataset = DataSet()
        dataset.add_field("words", sentence_list)

        # 3. 使用pipeline
        self.pipeline(dataset)

        def decode_tags(ins):
            pred_tags = ins["tag"]
            chars = ins["words"]
            words = []
            start_idx = 0
            for idx, tag in enumerate(pred_tags):
                if tag[0] == "S":
                    words.append(chars[start_idx:idx + 1] + "/" + tag[2:])
                    start_idx = idx + 1
                elif tag[0] == "E":
                    words.append("".join(chars[start_idx:idx + 1]) + "/" +
                                 tag[2:])
                    start_idx = idx + 1
            return words

        dataset.apply(decode_tags, new_field_name="tag_output")

        output = dataset.field_arrays["tag_output"].content
        if isinstance(content, str):
            return output[0]
        elif isinstance(content, list):
            return output

Example #7

Show file

File: test_sampler.py Project: zhangxt/fastNLP

 def test_BucketSampler(self):
     sampler = BucketSampler(num_buckets=3, batch_size=16, seq_lens_field_name="seq_len")
     data_set = DataSet({"x": [[0] * random.randint(1, 10)] * 10, "y": [[5, 6]] * 10})
     data_set.apply(lambda ins: len(ins["x"]), new_field_name="seq_len")
     indices = sampler(data_set)
     self.assertEqual(len(indices), 10)

Example #8

Show file

File: test_dataset.py Project: dangerousor/fastNLP

 def test_apply(self):
     ds = DataSet({"x": [[1, 2, 3, 4]] * 40, "y": [[5, 6]] * 40})
     ds.apply(lambda ins: ins["x"][::-1], new_field_name="rx")
     self.assertTrue("rx" in ds.field_arrays)
     self.assertEqual(ds.field_arrays["rx"].content[0], [4, 3, 2, 1])