Esempio n. 1
0
def read_lff(level, fp=None):
    lang_line = re.compile('\s+' + NAME_AND_ID_REGEX + '(\[([a-z]{3})?\])$')
    class_line = re.compile(NAME_AND_ID_REGEX + '(,\s*' + NAME_AND_ID_REGEX + ')*$')
    isolate_line = re.compile('([^\[]+)(\[-isolate-\])$')

    path = None
    with fp or build_path('%sff.txt' % level[0]).open(encoding='utf8') as fp:
        for line in fp:
            line = line.rstrip()
            if line.startswith('#') or not line.strip():
                # ignore comments or empty lines
                continue
            match = lang_line.match(line)
            if match:
                assert path
                yield Languoid.from_lff(
                    None if path == 'isolate' else path, line.strip(), level)
            else:
                match = isolate_line.match(line)
                if match:
                    path = 'isolate'
                else:
                    # assert it matches a classification line!
                    if not class_line.match(line):
                        raise ValueError(line)
                    path = line.strip()
Esempio n. 2
0
def read_lff(level, fp=None, dry_run=False):
    assert isinstance(level, Level)
    lang_line = re.compile("\s+" + NAME_AND_ID_REGEX + "(\[([a-z]{3}|NOCODE\_[^\]]+)?\])$")
    class_line = re.compile(NAME_AND_ID_REGEX + "(,\s*" + NAME_AND_ID_REGEX + ")*$")
    isolate_line = re.compile("([^\[]+)(\[-isolate-\])$")

    path = None
    with fp or build_path("%sff.txt" % level.name[0]).open(encoding="utf8") as fp:
        for line in fp:
            line = line.rstrip()
            if line.startswith("#") or not line.strip():
                # ignore comments or empty lines
                continue
            match = lang_line.match(line)
            if match:
                assert path
                yield Languoid.from_lff(None if path == "isolate" else path, line.strip(), level, dry_run=dry_run)
            else:
                match = isolate_line.match(line)
                if match:
                    path = "isolate"
                else:
                    # assert it matches a classification line!
                    if not class_line.match(line):
                        raise ValueError(line)
                    path = line.strip()
Esempio n. 3
0
def read_lff(level, fp=None, dry_run=False):
    assert isinstance(level, Level)
    lang_line = re.compile('\s+' + NAME_AND_ID_REGEX +
                           '(\[([a-z]{3}|NOCODE\_[^\]]+)?\])$')
    class_line = re.compile(NAME_AND_ID_REGEX + '(,\s*' + NAME_AND_ID_REGEX +
                            ')*$')
    isolate_line = re.compile('([^\[]+)(\[-isolate-\])$')

    path = None
    with fp or build_path(
            '%sff.txt' % level.name[0]).open(encoding='utf8') as fp:
        for line in fp:
            line = line.rstrip()
            if line.startswith('#') or not line.strip():
                # ignore comments or empty lines
                continue
            match = lang_line.match(line)
            if match:
                assert path
                yield Languoid.from_lff(None if path == 'isolate' else path,
                                        line.strip(),
                                        level,
                                        dry_run=dry_run)
            else:
                match = isolate_line.match(line)
                if match:
                    path = 'isolate'
                else:
                    # assert it matches a classification line!
                    if not class_line.match(line):
                        raise ValueError(line)
                    path = line.strip()