Python DataSet.add_field 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: fastNLP.core.dataset

클래스/타입: DataSet

메소드/함수: add_field

hotexamples.com에서의 예제들: 11

Python DataSet.add_field - 11개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 fastNLP.core.dataset.DataSet.add_field에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

DataSet(30)

append(30)

add_field(11)

set_input(11)

set_target(9)

apply(8)

index_field(5)

get_field(2)

read_csv(2)

rename_field(2)

delete_field(1)

drop(1)

get_all_fields(1)

get_target_name(1)

load(1)

read_tokenize(1)

save(1)

add_seq_len(1)

get_input_name(1)

예제 #1

파일 보기

    def predict(self, content):
        """

        :param content: list of list of str. Each string is a token(word).
        :return answer: list of list of str. Each string is a tag.
        """
        if not hasattr(self, "pipeline"):
            raise ValueError("You have to load model first.")

        sentence_list = content
        # 1. 检查sentence的类型
        for sentence in sentence_list:
            if not all((type(obj) == str for obj in sentence)):
                raise ValueError("Input must be list of list of string.")

        # 2. 组建dataset
        dataset = DataSet()
        dataset.add_field("words", sentence_list)

        # 3. 使用pipeline
        self.pipeline(dataset)

        def merge_tag(words_list, tags_list):
            rtn = []
            for words, tags in zip(words_list, tags_list):
                rtn.append([w + "/" + t for w, t in zip(words, tags)])
            return rtn

        output = dataset.field_arrays["tag"].content
        if isinstance(content, str):
            return output[0]
        elif isinstance(content, list):
            return merge_tag(content, output)

예제 #2

파일 보기

 def test_delete_field(self):
     dd = DataSet()
     dd.add_field("x", [[1, 2, 3]] * 10)
     dd.add_field("y", [[1, 2, 3, 4]] * 10)
     dd.delete_field("x")
     self.assertFalse("x" in dd.field_arrays)
     self.assertTrue("y" in dd.field_arrays)

예제 #3

파일 보기

 def test_add_field(self):
     ds = DataSet({"x": [3, 4]})
     ds.add_field('y', [['hello', 'world'], ['this', 'is', 'a', 'test']],
                  is_input=True,
                  is_target=True)
     # ds.apply(lambda x:[x['x']]*3, is_input=True, is_target=True, new_field_name='y')
     print(ds)

예제 #4

파일 보기

    def predict(self, content):
        """
        分词接口。

        :param content: str或List[str], 例如: "中文分词很重要！"， 返回的结果是"中文 分词 很 重要 !"。 如果传入的为List[str]，比如
            [ "中文分词很重要！", ...], 返回的结果["中文 分词 很 重要 !", ...]。
        :return: str或List[str], 根据输入的的类型决定。
        """
        if not hasattr(self, 'pipeline'):
            raise ValueError("You have to load model first.")

        sentence_list = []
        # 1. 检查sentence的类型
        if isinstance(content, str):
            sentence_list.append(content)
        elif isinstance(content, list):
            sentence_list = content

        # 2. 组建dataset
        dataset = DataSet()
        dataset.add_field('raw_sentence', sentence_list)

        # 3. 使用pipeline
        self.pipeline(dataset)

        output = dataset.get_field('output').content
        if isinstance(content, str):
            return output[0]
        elif isinstance(content, list):
            return output

예제 #5

파일 보기

    def predict(self, content):
        if not hasattr(self, 'pipeline'):
            raise ValueError("You have to load model first.")

        # 1. 利用POS得到分词和pos tagging结果
        pos_out = self.pos_tagger.predict(content)
        # pos_out = ['这里/NN 是/VB 分词/NN 结果/NN'.split()]

        # 2. 组建dataset
        dataset = DataSet()
        dataset.add_field('wp', pos_out)
        dataset.apply(lambda x: ['<BOS>'] + [w.split('/')[0] for w in x['wp']],
                      new_field_name='words')
        dataset.apply(lambda x: ['<BOS>'] + [w.split('/')[1] for w in x['wp']],
                      new_field_name='pos')
        dataset.rename_field("words", "raw_words")

        # 3. 使用pipeline
        self.pipeline(dataset)
        dataset.apply(lambda x: [str(arc) for arc in x['arc_pred']],
                      new_field_name='arc_pred')
        dataset.apply(lambda x: [
            arc + '/' + label
            for arc, label in zip(x['arc_pred'], x['label_pred_seq'])
        ][1:],
                      new_field_name='output')
        # output like: [['2/top', '0/root', '4/nn', '2/dep']]
        return dataset.field_arrays['output'].content

예제 #6

파일 보기

파일: api.py 프로젝트: zhangtaokd/BERT_Pytorch_fastNLP

    def predict(self, content):
        """

        :param content: list of list of str. Each string is a token(word).
        :return answer: list of list of str. Each string is a tag.
        """
        if not hasattr(self, 'pipeline'):
            raise ValueError("You have to load model first.")

        sentence_list = []
        # 1. 检查sentence的类型
        if isinstance(content, str):
            sentence_list.append(content)
        elif isinstance(content, list):
            sentence_list = content

        # 2. 组建dataset
        dataset = DataSet()
        dataset.add_field('words', sentence_list)

        # 3. 使用pipeline
        self.pipeline(dataset)

        output = dataset['word_pos_output'].content
        if isinstance(content, str):
            return output[0]
        elif isinstance(content, list):
            return output

예제 #7

파일 보기

파일: api.py 프로젝트: huziye/fastNLP_fork

    def predict(self, content):
        """

        :param content: list of list of str. Each string is a token(word).
        :return answer: list of list of str. Each string is a tag.
        """
        if not hasattr(self, "pipeline"):
            raise ValueError("You have to load model first.")

        sentence_list = []
        # 1. 检查sentence的类型
        if isinstance(content, str):
            sentence_list.append(content)
        elif isinstance(content, list):
            sentence_list = content

        # 2. 组建dataset
        dataset = DataSet()
        dataset.add_field("words", sentence_list)

        # 3. 使用pipeline
        self.pipeline(dataset)

        def decode_tags(ins):
            pred_tags = ins["tag"]
            chars = ins["words"]
            words = []
            start_idx = 0
            for idx, tag in enumerate(pred_tags):
                if tag[0] == "S":
                    words.append(chars[start_idx:idx + 1] + "/" + tag[2:])
                    start_idx = idx + 1
                elif tag[0] == "E":
                    words.append("".join(chars[start_idx:idx + 1]) + "/" +
                                 tag[2:])
                    start_idx = idx + 1
            return words

        dataset.apply(decode_tags, new_field_name="tag_output")

        output = dataset.field_arrays["tag_output"].content
        if isinstance(content, str):
            return output[0]
        elif isinstance(content, list):
            return output

예제 #8

파일 보기

    def test_add_append(self):
        dd = DataSet()
        dd.add_field("x", [[1, 2, 3]] * 10)
        dd.add_field("y", [[1, 2, 3, 4]] * 10)
        dd.add_field("z", [[5, 6]] * 10)
        self.assertEqual(len(dd), 10)
        self.assertEqual(dd.field_arrays["x"].content, [[1, 2, 3]] * 10)
        self.assertEqual(dd.field_arrays["y"].content, [[1, 2, 3, 4]] * 10)
        self.assertEqual(dd.field_arrays["z"].content, [[5, 6]] * 10)

        with self.assertRaises(RuntimeError):
            dd.add_field("??", [[1, 2]] * 40)

예제 #9

파일 보기

파일: api.py 프로젝트: zhangtaokd/BERT_Pytorch_fastNLP

    def predict(self, content):
        if not hasattr(self, 'pipeline'):
            raise ValueError("You have to load model first.")

        sentence_list = []
        # 1. 检查sentence的类型
        if isinstance(content, str):
            sentence_list.append(content)
        elif isinstance(content, list):
            sentence_list = content

        # 2. 组建dataset
        dataset = DataSet()
        dataset.add_field('words', sentence_list)
        # dataset.add_field('tag', sentence_list)

        # 3. 使用pipeline
        self.pipeline(dataset)
        for ins in dataset:
            ins['heads'] = ins['heads'].tolist()

        return dataset['heads'], dataset['labels']

예제 #10

파일 보기

파일: test_dataset.py 프로젝트: dangerousor/fastNLP

 def test_add_append(self):
     dd = DataSet()
     dd.add_field("x", [[1, 2, 3]] * 10)
     dd.add_field("y", [[1, 2, 3, 4]] * 10)
     dd.add_field("z", [[5, 6]] * 10)
     self.assertEqual(len(dd), 10)
     self.assertEqual(dd.field_arrays["x"].content, [[1, 2, 3]] * 10)
     self.assertEqual(dd.field_arrays["y"].content, [[1, 2, 3, 4]] * 10)
     self.assertEqual(dd.field_arrays["z"].content, [[5, 6]] * 10)

예제 #11

파일 보기

파일: api.py 프로젝트: zhangtaokd/BERT_Pytorch_fastNLP

    def predict(self, content):

        if not hasattr(self, 'pipeline'):
            raise ValueError("You have to load model first.")

        sentence_list = []
        # 1. 检查sentence的类型
        if isinstance(content, str):
            sentence_list.append(content)
        elif isinstance(content, list):
            sentence_list = content

        # 2. 组建dataset
        dataset = DataSet()
        dataset.add_field('raw_sentence', sentence_list)

        # 3. 使用pipeline
        self.pipeline(dataset)

        output = dataset['output'].content
        if isinstance(content, str):
            return output[0]
        elif isinstance(content, list):
            return output