def loadData( withTopology=False, withCorner=False) -> Tuple[Dict[str, Component], Dict[str, Compound]]: databasePath = _path('../data/main') database = connect(databasePath) cursor = database.cursor() strokeDataPattern = RE(r'(?<=\d)(?=M)') COMPONENTS = {} for row in cursor.execute( 'SELECT name, gb, pinyin, feature, svg FROM main WHERE operator IS NULL;' ): name, inGB, pinyinString, featureString, svgString = row pinyinList = [] if pinyinString is None else pinyinString.split(',') featureList = featureString.split(',') svgList = strokeDataPattern.split(svgString) strokeList = [ Stroke(feature, svg) for feature, svg in zip(featureList, svgList) ] COMPONENTS[name] = Component(name, strokeList, None, inGB=inGB, pinyinList=pinyinList) if withTopology: topologyPath = _path('../data/topology') if not exists(topologyPath): buildTopology(COMPONENTS, topologyPath) with open(topologyPath, 'rb') as f: TOPOLOGIES = load(f) for name, component in COMPONENTS.items(): component.topologyMatrix = TOPOLOGIES[name] if withCorner: cornerPath = _path('../data/corner') if not exists(cornerPath): buildCorner(COMPONENTS, cornerPath) with open(cornerPath, 'rb') as f: CORNERS = load(f) for name, component in COMPONENTS.items(): component.corner = CORNERS[name] COMPOUNDS = {} compoundData = cursor.execute( 'SELECT name, gb, pinyin, operator, first, second, mix FROM main WHERE operator IS NOT NULL;' ).fetchall() while compoundData: row = compoundData.pop(0) name, inGB, pinyinString, operator, firstChildName, secondChildName, mix = row pinyinList = [] if pinyinString is None else pinyinString.split(',') firstChild = COMPONENTS.get(firstChildName, COMPOUNDS.get(firstChildName)) secondChild = COMPONENTS.get(secondChildName, COMPOUNDS.get(secondChildName)) if firstChild and secondChild: COMPOUNDS[name] = Compound(name, operator, firstChild, secondChild, mix, inGB=inGB, pinyinList=pinyinList) else: compoundData.append(row) return COMPONENTS, COMPOUNDS
assert txt_file.endswith('.txt') new_name = txt_file + '.html' import sys, html from re import compile as Re import generate_utils try: from generate_utils import OutFileGreen as OutFile except ImportError: OutFile = open with open(txt_file) as file: string = file.read() R = Re('\d?\d:\d+\d+') A = R.split(string) B = R.findall(string) if not (A and A[0].strip() == ''): raise ValueError("Must begin with a time") assert len(A) == 1 + len( B), "the programmer did not understand re.split and re.findall" bits = [] for i in range(len(B)): b, a = A[i + 1], B[i] x, y = a.split(':') x, y = int(x), int(y) time = x * 60 + y title = html.escape(b)