Example #1
0
def load_test_data(f, optsplit=[]):
    testRe1 = re.compile(
        r"^÷\s+([^\s].*[^\s])\s+÷\s+#\s+÷\s+\[0.2\].*?([÷×].*)\s+÷\s+\[0.3\]\s*$"
    )

    unicode.fetch(f)
    data = []
    for line in fileinput.input(os.path.basename(f)):
        # lines that include a test start with the ÷ character
        if len(line) < 2 or not line.startswith('÷'):
            continue

        m = testRe1.match(line)
        if not m:
            print("error: no match on line where test was expected: %s" % line)
            continue

        # process the characters in this test case
        chars = process_split_string(m.group(1))
        # skip test case if it contains invalid characters (viz., surrogates)
        if not chars:
            continue

        # now process test cases
        (chars, info) = process_split_info(m.group(2), chars, optsplit)

        # make sure that we have break info for each break!
        assert len(chars) - 1 == len(info)

        data.append((chars, info))

    return data
def load_test_data(f, optsplit=[]):
    outls = []
    testRe1 = re.compile("^÷\s+([^\s].*[^\s])\s+÷\s+#\s+÷\s+\[0.2\].*?([÷×].*)\s+÷\s+\[0.3\]\s*$")

    unicode.fetch(f)
    data = []
    for line in fileinput.input(os.path.basename(f)):
        # lines that include a test start with the ÷ character
        if len(line) < 2 or line[0:2] != '÷':
            continue

        m = testRe1.match(line)
        if not m:
            print "error: no match on line where test was expected: %s" % line
            continue

        # process the characters in this test case
        chars = process_split_string(m.group(1))
        # skip test case if it contains invalid characters (viz., surrogates)
        if not chars:
            continue

        # now process test cases
        (chars, info) = process_split_info(m.group(2), chars, optsplit)

        # make sure that we have break info for each break!
        assert len(chars) - 1 == len(info)

        outls.append((chars, info))

    return outls
def load_test_data(f):
    outls = []
    testRe = re.compile("^(.*?);(.*?);(.*?);(.*?);(.*?);\s+#.*$")

    unicode.fetch(f)
    for line in fileinput.input(os.path.basename(f)):
        # comment and header lines start with # and @ respectively
        if len(line) < 1 or line[0:1] == '#' or line[0:1] == '@':
            continue

        m = testRe.match(line)
        groups = []
        if not m:
            print "error: no match on line where test was expected: %s" % line
            continue

        has_surrogates = False
        for i in range(1, 6):
            group = []
            chs = m.group(i).split()
            for ch in chs:
                intch = int(ch, 16)
                if unicode.is_surrogate(intch):
                    has_surrogates = True
                    break
                group.append(intch)

            if has_surrogates:
                break
            groups.append(group)

        if has_surrogates:
            continue
        outls.append(groups)

    return outls
def load_test_data(f):
    outls = []
    testRe = re.compile("^(.*?);(.*?);(.*?);(.*?);(.*?);\s+#.*$")

    unicode.fetch(f)
    for line in fileinput.input(os.path.basename(f)):
        # comment and header lines start with # and @ respectively
        if len(line) < 1 or line[0:1] == "#" or line[0:1] == "@":
            continue

        m = testRe.match(line)
        groups = []
        if not m:
            print "error: no match on line where test was expected: %s" % line
            continue

        has_surrogates = False
        for i in range(1, 6):
            group = []
            chs = m.group(i).split()
            for ch in chs:
                intch = int(ch, 16)
                if unicode.is_surrogate(intch):
                    has_surrogates = True
                    break
                group.append(intch)

            if has_surrogates:
                break
            groups.append(group)

        if has_surrogates:
            continue
        outls.append(groups)

    return outls