def _create_examples(self, lines, set_type): """Creates examples for the training and dev sets.""" examples = [] print( "UDC dataset is too big, loading data spent a long time, please wait patiently.................." ) for (i, line) in enumerate(lines): if len(line) < 3: print("data format error: %s" % "\t".join(line)) print( "data row contains at least three parts: label\tconv1\t.....\tresponse" ) continue guid = "%s-%d" % (set_type, i) text_a = "\t".join(line[1:-1]) text_a = tokenization.convert_to_unicode(text_a) text_a = text_a.split('\t') text_b = line[-1] text_b = tokenization.convert_to_unicode(text_b) label = tokenization.convert_to_unicode(line[0]) examples.append( InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) return examples
def _create_examples(self, lines, set_type): """Creates examples for the training and dev sets.""" examples = [] for (i, line) in enumerate(lines): if len(line) != 2: print("data format error: %s" % "\t".join(line)) print("data row contains two parts: label \t conversation_content") continue guid = "%s-%d" % (set_type, i) text_a = line[1] text_a = tokenization.convert_to_unicode(text_a) label = tokenization.convert_to_unicode(line[0]) examples.append( InputExample( guid=guid, text_a=text_a, label=label)) return examples