def read_lff(level, fp=None): lang_line = re.compile('\s+' + NAME_AND_ID_REGEX + '(\[([a-z]{3})?\])$') class_line = re.compile(NAME_AND_ID_REGEX + '(,\s*' + NAME_AND_ID_REGEX + ')*$') isolate_line = re.compile('([^\[]+)(\[-isolate-\])$') path = None with fp or build_path('%sff.txt' % level[0]).open(encoding='utf8') as fp: for line in fp: line = line.rstrip() if line.startswith('#') or not line.strip(): # ignore comments or empty lines continue match = lang_line.match(line) if match: assert path yield Languoid.from_lff( None if path == 'isolate' else path, line.strip(), level) else: match = isolate_line.match(line) if match: path = 'isolate' else: # assert it matches a classification line! if not class_line.match(line): raise ValueError(line) path = line.strip()
def read_lff(level, fp=None, dry_run=False): assert isinstance(level, Level) lang_line = re.compile("\s+" + NAME_AND_ID_REGEX + "(\[([a-z]{3}|NOCODE\_[^\]]+)?\])$") class_line = re.compile(NAME_AND_ID_REGEX + "(,\s*" + NAME_AND_ID_REGEX + ")*$") isolate_line = re.compile("([^\[]+)(\[-isolate-\])$") path = None with fp or build_path("%sff.txt" % level.name[0]).open(encoding="utf8") as fp: for line in fp: line = line.rstrip() if line.startswith("#") or not line.strip(): # ignore comments or empty lines continue match = lang_line.match(line) if match: assert path yield Languoid.from_lff(None if path == "isolate" else path, line.strip(), level, dry_run=dry_run) else: match = isolate_line.match(line) if match: path = "isolate" else: # assert it matches a classification line! if not class_line.match(line): raise ValueError(line) path = line.strip()
def read_lff(level, fp=None, dry_run=False): assert isinstance(level, Level) lang_line = re.compile('\s+' + NAME_AND_ID_REGEX + '(\[([a-z]{3}|NOCODE\_[^\]]+)?\])$') class_line = re.compile(NAME_AND_ID_REGEX + '(,\s*' + NAME_AND_ID_REGEX + ')*$') isolate_line = re.compile('([^\[]+)(\[-isolate-\])$') path = None with fp or build_path( '%sff.txt' % level.name[0]).open(encoding='utf8') as fp: for line in fp: line = line.rstrip() if line.startswith('#') or not line.strip(): # ignore comments or empty lines continue match = lang_line.match(line) if match: assert path yield Languoid.from_lff(None if path == 'isolate' else path, line.strip(), level, dry_run=dry_run) else: match = isolate_line.match(line) if match: path = 'isolate' else: # assert it matches a classification line! if not class_line.match(line): raise ValueError(line) path = line.strip()