def _create_examples(self, data_path, set_type): examples = [] with open(data_path, 'r', encoding='utf-8') as f: text = '' label = [] count = 0 for line in f: if '\n' == line: guid = "{0}-{1}-{2}".format(set_type, ''.join(label), count) text = tokenization.convert_to_unicode(text) label = tokenization.convert_to_unicode(label) examples.append( InputExample(guid=guid, text=text, label=label, seq_lens=len(text))) text = '' label = [] count += 1 else: word, tag = line.replace('\n', '').split('\t') text += word label.append(tag) if len(text) > 0: guid = "{0}-{1}-{2}".format(set_type, ''.join(label), count) text = tokenization.convert_to_unicode(text) label = tokenization.convert_to_unicode(label) examples.append( InputExample(guid=guid, text=text, label=label, seq_lens=len(text))) random.shuffle(examples) return examples
def _create_example(self, lines, set_type): examples = [] for (i, line) in enumerate(lines): guid = "%s-%s" % (set_type, i) texts = tokenization.convert_to_unicode(line[0]) labels = tokenization.convert_to_unicode(line[1]) examples.append(InputExample(guid=guid, text=texts, label=labels)) return examples
def create_examples(cls, lines, set_type): """Creates examples for the training and dev sets.""" examples = [] for (i, line) in enumerate(lines): guid = "%s-%s" % (set_type, tokenization.convert_to_unicode(str(i+1))) text_a = tokenization.convert_to_unicode(str(line[0])) text_b = tokenization.convert_to_unicode('') label = list(map(float, line[1].split(','))) examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples
def _create_examples(self, data_path, set_type): labels, examples = set(), [] with open(data_path, "r") as f: for i, line in enumerate(f): label, message = line.strip().split("\t") if "#" in label: continue labels.add(label) guid = "{0}-{1}-{2}".format(set_type, label, i) text = tokenization.convert_to_unicode(message) label = tokenization.convert_to_unicode(label) examples.append(InputExample(guid=guid, text=text, label=label)) random.shuffle(examples) return examples, list(labels)