コード例 #1
0
 def _create_examples(self, lines, set_type):
     """Creates examples for the training and dev sets."""
     examples = []
     for (i, line) in enumerate(lines):
         guid = '%s-%s' % (set_type, i)
         # print(line)
         text_a = tokenization.convert_to_unicode(line[1])
         label = tokenization.convert_to_unicode(line[0])
         examples.append(InputExample(guid=guid, text_a=text_a, label=label))
     return examples
コード例 #2
0
ファイル: run_classifier.py プロジェクト: aakorolyova/DeSpin
 def _create_examples(self, lines, set_type):
   """Creates examples for the training and dev sets."""
   examples = []
   for (i, line) in enumerate(lines):
     guid = "%s-%s" % (set_type, i)
     text_a = tokenization.convert_to_unicode(line[0])
     text_b = tokenization.convert_to_unicode(line[1])
     if set_type == "test":
       label = "0"
     examples.append(
         InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
   return examples
コード例 #3
0
ファイル: train_ner.py プロジェクト: NCuz/AwesomeNLPBaseline
 def _create_examples(self, lines, set_type):
     """Creates examples for the training and dev sets."""
     examples = []
     for (i, line) in enumerate(lines):
         if set_type == 'test':
             json_str = json.loads(line)
             text_a = tokenization.convert_to_unicode(json_str['text'])
             tag = None
             guid = json_str['id']
         else:
             text_tag = line.split('\t')
             guid = "%s-%s" % (set_type, i)
             text_a = tokenization.convert_to_unicode(text_tag[0])
             tag = tokenization.convert_to_unicode(text_tag[1])
         examples.append(
             InputExample(guid=guid, text_a=text_a, text_b=None, tag=tag))
     return examples
コード例 #4
0
 def _create_examples(self, lines, set_type):
     """Creates examples for the training and dev sets."""
     examples = []
     print(len(lines))
     for (i, line) in enumerate(lines):
         if i == 0:
             continue
         guid = "%s-%s" % (set_type, i)
         line = line[0].split('***')  #*************
         text_a = tokenization.convert_to_unicode(line[1])
         label = tokenization.convert_to_unicode(line[0])
         examples.append(
             InputExample(guid=guid,
                          text_a=text_a,
                          text_b=None,
                          label=label))
     return examples
コード例 #5
0
 def _create_examples(self, lines, set_type):
     """Creates examples for the training and dev sets."""
     examples = []
     for (i, line) in enumerate(lines):
         # Only the test set has a header
         if set_type == "test":
             continue
         guid = "%s-%s" % (set_type, i)
         if set_type == "test":
             text_a = tokenization.convert_to_unicode(line[0])
             label = "0"
         else:
             text_a = tokenization.convert_to_unicode(line[0])
             label = tokenization.convert_to_unicode(line[1])
         examples.append(
             InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
     return examples
コード例 #6
0
 def _create_examples(self, lines, set_type):
     """Creates examples for the training and dev sets."""
     examples = []
     for (i, line) in enumerate(lines):
         json_str = json.loads(line)
         guid = "%s-%s" % (set_type, i)
         if set_type == "test":
             text_a = tokenization.convert_to_unicode(json_str['sentence'])
             label = None
             guid = json_str['id']
         else:
             text_a = tokenization.convert_to_unicode(json_str['sentence'])
             label = tokenization.convert_to_unicode(json_str['label'])
         examples.append(
             InputExample(guid=guid,
                          text_a=text_a,
                          text_b=None,
                          label=label))
     return examples
コード例 #7
0
def read_examples(case):
    """Read a list of `InputExample`s from a case from
  Shelf :: {'headings': String, 'body': [String]}."""

    examples = []
    unique_id = 0
    sentence_list = [case['headings']] + case['body']
    for sentence in sentence_list:
        line = tokenization.convert_to_unicode(sentence)
        line = line.strip()
        text_a = None
        text_b = None
        m = re.match(r"^(.*) \|\|\| (.*)$", line)
        if m is None:
            text_a = line
        else:
            text_a = m.group(1)
            text_b = m.group(2)
        examples.append(
            InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b))
        unique_id += 1
    return examples
コード例 #8
0
 def _create_examples(self, lines, set_type):
     """Creates examples for the training and dev sets."""
     examples = []
     for (i, line) in enumerate(lines):
         guid = "%s-%s" % (set_type, i)
         # line 是一个字典
         text_a = [
             tokenization.convert_to_unicode(sent) for sent in line['src']
         ]
         # text_a = tokenization.convert_to_unicode(line[1])
         if set_type == 'train' or set_type == 'dev':
             label = line['ids']
             examples.append(
                 InputExample(guid=guid,
                              text_a=text_a,
                              text_b=None,
                              label=label))
         else:
             examples.append(
                 InputExample(guid=guid, text_a=text_a, text_b=None))
     # 返回的text_a label 都是list
     return examples
コード例 #9
0
    def _create_examples(self, emotion_lines, news_lines, nli_lines, set_type):
        """Creates examples for the training and dev sets."""
        examples = []

        # emotion
        for (i, line) in enumerate(emotion_lines):
            guid = "%s-%s" % (set_type, i)
            if set_type == "test":
                text_a = tokenization.convert_to_unicode(line[1])
                label = None
                guid = line[0]
            else:
                text_a = tokenization.convert_to_unicode(line[0])
                label = tokenization.convert_to_unicode(line[1])
            examples.append(
                InputExample(guid=guid, text_a=text_a, text_b=None, label=label, task='1'))

        # news
        for i, line in enumerate(news_lines):
            guid = f'news_{set_type}_{i}'
            if set_type == 'test':
                text_a = tokenization.convert_to_unicode(line[1])
                label = None
                guid = line[0]
            else:
                text_a = tokenization.convert_to_unicode(line[0])
                label = tokenization.convert_to_unicode(str(line[1]))

            examples.append(
                InputExample(guid=guid, text_a=text_a, text_b=None, label=label, task='2'))

        # nli
        for i, line in enumerate(nli_lines):
            guid = f'news_{set_type}_{i}'
            if set_type == 'test':
                text_a = tokenization.convert_to_unicode(line[1])
                text_b = tokenization.convert_to_unicode(line[2])
                label = None
                guid = line[0]
            else:
                text_a = tokenization.convert_to_unicode(line[0])
                text_b = tokenization.convert_to_unicode(line[1])
                label = tokenization.convert_to_unicode(str(line[2]))

            examples.append(
                InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label, task='3'))

        return examples