def _create_examples(self, f, k): """Creates examples for the training and dev sets.""" (train_x, test_x), (train_y, test_y) = f(k) examples_train = [] for (i, line) in enumerate(train_x): guid = "0-0" text_a = tokenization.convert_to_unicode(line) label = tokenization.convert_to_unicode(str(train_y[i])) examples_train.append( InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) examples_dev = [] # for (i, line) in enumerate(dev_x): # guid = "0-0" # text_a = tokenization.convert_to_unicode(line) # label = tokenization.convert_to_unicode(str(dev_y[i])) # examples_dev.append( # InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) examples_test = [] for (i, line) in enumerate(test_x): guid = "0-0" text_a = tokenization.convert_to_unicode(line) label = tokenization.convert_to_unicode(str(test_y[i])) examples_test.append( InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) return examples_train, examples_test
def read_examples(input_file): """Read a list of `InputExample`s from an input file.""" examples = [] unique_id = 0 with tf.gfile.GFile(input_file, "r") as reader: while True: line = tokenization.convert_to_unicode(reader.readline()) if not line: break line = line.strip() text_a = None text_b = None m = re.match(r"^(.*) \|\|\| (.*)$", line) if m is None: text_a = line else: text_a = m.group(1) text_b = m.group(2) examples.append( InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b)) unique_id += 1 return examples