def _create_examples(self, lines, set_type): """Creates examples for the training and dev sets.""" examples = [] for (i, line) in enumerate(lines): guid = '%s-%s' % (set_type, i) # print(line) text_a = tokenization.convert_to_unicode(line[1]) label = tokenization.convert_to_unicode(line[0]) examples.append(InputExample(guid=guid, text_a=text_a, label=label)) return examples
def _create_examples(self, lines, set_type): """Creates examples for the training and dev sets.""" examples = [] for (i, line) in enumerate(lines): guid = "%s-%s" % (set_type, i) text_a = tokenization.convert_to_unicode(line[0]) text_b = tokenization.convert_to_unicode(line[1]) if set_type == "test": label = "0" examples.append( InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples
def _create_examples(self, lines, set_type): """Creates examples for the training and dev sets.""" examples = [] for (i, line) in enumerate(lines): if set_type == 'test': json_str = json.loads(line) text_a = tokenization.convert_to_unicode(json_str['text']) tag = None guid = json_str['id'] else: text_tag = line.split('\t') guid = "%s-%s" % (set_type, i) text_a = tokenization.convert_to_unicode(text_tag[0]) tag = tokenization.convert_to_unicode(text_tag[1]) examples.append( InputExample(guid=guid, text_a=text_a, text_b=None, tag=tag)) return examples
def _create_examples(self, lines, set_type): """Creates examples for the training and dev sets.""" examples = [] print(len(lines)) for (i, line) in enumerate(lines): if i == 0: continue guid = "%s-%s" % (set_type, i) line = line[0].split('***') #************* text_a = tokenization.convert_to_unicode(line[1]) label = tokenization.convert_to_unicode(line[0]) examples.append( InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) return examples
def _create_examples(self, lines, set_type): """Creates examples for the training and dev sets.""" examples = [] for (i, line) in enumerate(lines): # Only the test set has a header if set_type == "test": continue guid = "%s-%s" % (set_type, i) if set_type == "test": text_a = tokenization.convert_to_unicode(line[0]) label = "0" else: text_a = tokenization.convert_to_unicode(line[0]) label = tokenization.convert_to_unicode(line[1]) examples.append( InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) return examples
def _create_examples(self, lines, set_type): """Creates examples for the training and dev sets.""" examples = [] for (i, line) in enumerate(lines): json_str = json.loads(line) guid = "%s-%s" % (set_type, i) if set_type == "test": text_a = tokenization.convert_to_unicode(json_str['sentence']) label = None guid = json_str['id'] else: text_a = tokenization.convert_to_unicode(json_str['sentence']) label = tokenization.convert_to_unicode(json_str['label']) examples.append( InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) return examples
def read_examples(case): """Read a list of `InputExample`s from a case from Shelf :: {'headings': String, 'body': [String]}.""" examples = [] unique_id = 0 sentence_list = [case['headings']] + case['body'] for sentence in sentence_list: line = tokenization.convert_to_unicode(sentence) line = line.strip() text_a = None text_b = None m = re.match(r"^(.*) \|\|\| (.*)$", line) if m is None: text_a = line else: text_a = m.group(1) text_b = m.group(2) examples.append( InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b)) unique_id += 1 return examples
def _create_examples(self, lines, set_type): """Creates examples for the training and dev sets.""" examples = [] for (i, line) in enumerate(lines): guid = "%s-%s" % (set_type, i) # line 是一个字典 text_a = [ tokenization.convert_to_unicode(sent) for sent in line['src'] ] # text_a = tokenization.convert_to_unicode(line[1]) if set_type == 'train' or set_type == 'dev': label = line['ids'] examples.append( InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) else: examples.append( InputExample(guid=guid, text_a=text_a, text_b=None)) # 返回的text_a label 都是list return examples
def _create_examples(self, emotion_lines, news_lines, nli_lines, set_type): """Creates examples for the training and dev sets.""" examples = [] # emotion for (i, line) in enumerate(emotion_lines): guid = "%s-%s" % (set_type, i) if set_type == "test": text_a = tokenization.convert_to_unicode(line[1]) label = None guid = line[0] else: text_a = tokenization.convert_to_unicode(line[0]) label = tokenization.convert_to_unicode(line[1]) examples.append( InputExample(guid=guid, text_a=text_a, text_b=None, label=label, task='1')) # news for i, line in enumerate(news_lines): guid = f'news_{set_type}_{i}' if set_type == 'test': text_a = tokenization.convert_to_unicode(line[1]) label = None guid = line[0] else: text_a = tokenization.convert_to_unicode(line[0]) label = tokenization.convert_to_unicode(str(line[1])) examples.append( InputExample(guid=guid, text_a=text_a, text_b=None, label=label, task='2')) # nli for i, line in enumerate(nli_lines): guid = f'news_{set_type}_{i}' if set_type == 'test': text_a = tokenization.convert_to_unicode(line[1]) text_b = tokenization.convert_to_unicode(line[2]) label = None guid = line[0] else: text_a = tokenization.convert_to_unicode(line[0]) text_b = tokenization.convert_to_unicode(line[1]) label = tokenization.convert_to_unicode(str(line[2])) examples.append( InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label, task='3')) return examples