def from_file(cls, fname): """ Orthography profiles must be - tab-separated CSV files - encoded in UTF-8 - with a header containing a column "Grapheme" """ return cls(*list( reader(readlines(fname, normalize='NFD'), dicts=True, delimiter='\t', quotechar=None)))
def test_readlines(tmpdir): from clldutils.path import readlines # Test files are read using universal newline mode: tpath = tmpdir / 'test.txt' tpath.write_binary(b'a\nb\r\nc\rd') assert len(readlines(str(tpath))) == 4 lines = ['\t#ä '] assert readlines(lines) == lines assert readlines(lines, normalize='NFD') != lines assert readlines(lines, strip=True)[0] == lines[0].strip() assert readlines(lines, comment='#') == [] assert readlines(lines, comment='#', linenumbers=True) == [(1, None)] lines = [''] assert readlines(lines) == [''] assert readlines(lines, comment='#') == [] assert readlines(lines, strip=True, normalize='NFC') == []
def read_lff(api, log, new, level, fname=None): assert level in [Level.language, Level.dialect] log.info('reading {0}s from {1}'.format(level.name, fname)) path = None for line in fname if isinstance(fname, list) \ else readlines(fname or api.build_path('%sff.txt' % level.name[0])): line = line.rstrip() if line.startswith('#') or not line.strip(): # ignore comments or empty lines continue if re.match('\s', line): # leading whitespace => a language/dialect spec. if path is None: raise ValueError('language line without classification line') name, id_, hid = parse_languoid(line.strip(), log) yield languoid(api, log, new, path, name, id_, hid, level) else: path = [ parse_languoid(s.strip(), log) for s in line.split(LINEAGE_SEP) ]
def editors(self): res = [] Editor = namedtuple('Editor', ['name', 'start', 'end']) in_editors, in_table = False, False for line in readlines(self.path('CONTRIBUTORS.md'), strip=True): if in_editors and line.startswith('#'): in_editors, in_table = False, False continue if line.endswith('# Editors'): in_editors = True continue if in_editors and line.startswith('--- '): in_table = True continue if in_table and '|' in line: period, _, name = line.partition('|') period = period.strip().partition('-') res.append( Editor(name.strip(), period[0].strip(), period[2].strip() or None)) return res
def from_file(cls, fname): return cls( *list(reader(readlines(fname, comment='#', normalize='NFD'))))
def from_textfile(cls, fname, mapping='mapping'): return cls.from_text(' '.join(readlines(fname)), mapping=mapping)