예제 #1
0
    def _create_examples(self, f, k):
        """Creates examples for the training and dev sets."""
        (train_x, test_x), (train_y, test_y) = f(k)
        examples_train = []
        for (i, line) in enumerate(train_x):
            guid = "0-0"
            text_a = tokenization.convert_to_unicode(line)
            label = tokenization.convert_to_unicode(str(train_y[i]))
            examples_train.append(
                InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
        examples_dev = []
        # for (i, line) in enumerate(dev_x):

        #   guid = "0-0"
        #   text_a = tokenization.convert_to_unicode(line)
        #   label = tokenization.convert_to_unicode(str(dev_y[i]))
        #   examples_dev.append(
        #       InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
        examples_test = []
        for (i, line) in enumerate(test_x):
            guid = "0-0"
            text_a = tokenization.convert_to_unicode(line)
            label = tokenization.convert_to_unicode(str(test_y[i]))
            examples_test.append(
                InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
        return examples_train, examples_test
def read_examples(input_file):
    """Read a list of `InputExample`s from an input file."""
    examples = []
    unique_id = 0
    with tf.gfile.GFile(input_file, "r") as reader:
        while True:
            line = tokenization.convert_to_unicode(reader.readline())
            if not line:
                break
            line = line.strip()
            text_a = None
            text_b = None
            m = re.match(r"^(.*) \|\|\| (.*)$", line)
            if m is None:
                text_a = line
            else:
                text_a = m.group(1)
                text_b = m.group(2)
            examples.append(
                InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b))
            unique_id += 1
    return examples