コード例 #1
0
 def _create_examples(self, data_path, set_type):
     examples = []
     with open(data_path, 'r', encoding='utf-8') as f:
         text = ''
         label = []
         count = 0
         for line in f:
             if '\n' == line:
                 guid = "{0}-{1}-{2}".format(set_type, ''.join(label),
                                             count)
                 text = tokenization.convert_to_unicode(text)
                 label = tokenization.convert_to_unicode(label)
                 examples.append(
                     InputExample(guid=guid,
                                  text=text,
                                  label=label,
                                  seq_lens=len(text)))
                 text = ''
                 label = []
                 count += 1
             else:
                 word, tag = line.replace('\n', '').split('\t')
                 text += word
                 label.append(tag)
         if len(text) > 0:
             guid = "{0}-{1}-{2}".format(set_type, ''.join(label), count)
             text = tokenization.convert_to_unicode(text)
             label = tokenization.convert_to_unicode(label)
             examples.append(
                 InputExample(guid=guid,
                              text=text,
                              label=label,
                              seq_lens=len(text)))
     random.shuffle(examples)
     return examples
コード例 #2
0
 def _create_example(self, lines, set_type):
     examples = []
     for (i, line) in enumerate(lines):
         guid = "%s-%s" % (set_type, i)
         texts = tokenization.convert_to_unicode(line[0])
         labels = tokenization.convert_to_unicode(line[1])
         examples.append(InputExample(guid=guid, text=texts, label=labels))
     return examples
コード例 #3
0
ファイル: practice_clf.py プロジェクト: quanmou/polisis
 def create_examples(cls, lines, set_type):
     """Creates examples for the training and dev sets."""
     examples = []
     for (i, line) in enumerate(lines):
         guid = "%s-%s" % (set_type, tokenization.convert_to_unicode(str(i+1)))
         text_a = tokenization.convert_to_unicode(str(line[0]))
         text_b = tokenization.convert_to_unicode('')
         label = list(map(float, line[1].split(',')))
         examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
     return examples
コード例 #4
0
 def _create_examples(self, data_path, set_type):
     labels, examples = set(), []
     with open(data_path, "r") as f:
         for i, line in enumerate(f):
             label, message = line.strip().split("\t")
             if "#" in label:
                 continue
             labels.add(label)
             guid = "{0}-{1}-{2}".format(set_type, label, i)
             text = tokenization.convert_to_unicode(message)
             label = tokenization.convert_to_unicode(label)
             examples.append(InputExample(guid=guid, text=text,
                                          label=label))
     random.shuffle(examples)
     return examples, list(labels)