Ejemplo n.º 1
0
def loadData(
        withTopology=False,
        withCorner=False) -> Tuple[Dict[str, Component], Dict[str, Compound]]:
    databasePath = _path('../data/main')
    database = connect(databasePath)
    cursor = database.cursor()
    strokeDataPattern = RE(r'(?<=\d)(?=M)')
    COMPONENTS = {}
    for row in cursor.execute(
            'SELECT name, gb, pinyin, feature, svg FROM main WHERE operator IS NULL;'
    ):
        name, inGB, pinyinString, featureString, svgString = row
        pinyinList = [] if pinyinString is None else pinyinString.split(',')
        featureList = featureString.split(',')
        svgList = strokeDataPattern.split(svgString)
        strokeList = [
            Stroke(feature, svg) for feature, svg in zip(featureList, svgList)
        ]
        COMPONENTS[name] = Component(name,
                                     strokeList,
                                     None,
                                     inGB=inGB,
                                     pinyinList=pinyinList)
    if withTopology:
        topologyPath = _path('../data/topology')
        if not exists(topologyPath): buildTopology(COMPONENTS, topologyPath)
        with open(topologyPath, 'rb') as f:
            TOPOLOGIES = load(f)
        for name, component in COMPONENTS.items():
            component.topologyMatrix = TOPOLOGIES[name]
    if withCorner:
        cornerPath = _path('../data/corner')
        if not exists(cornerPath): buildCorner(COMPONENTS, cornerPath)
        with open(cornerPath, 'rb') as f:
            CORNERS = load(f)
        for name, component in COMPONENTS.items():
            component.corner = CORNERS[name]
    COMPOUNDS = {}
    compoundData = cursor.execute(
        'SELECT name, gb, pinyin, operator, first, second, mix FROM main WHERE operator IS NOT NULL;'
    ).fetchall()
    while compoundData:
        row = compoundData.pop(0)
        name, inGB, pinyinString, operator, firstChildName, secondChildName, mix = row
        pinyinList = [] if pinyinString is None else pinyinString.split(',')
        firstChild = COMPONENTS.get(firstChildName,
                                    COMPOUNDS.get(firstChildName))
        secondChild = COMPONENTS.get(secondChildName,
                                     COMPOUNDS.get(secondChildName))
        if firstChild and secondChild:
            COMPOUNDS[name] = Compound(name,
                                       operator,
                                       firstChild,
                                       secondChild,
                                       mix,
                                       inGB=inGB,
                                       pinyinList=pinyinList)
        else:
            compoundData.append(row)
    return COMPONENTS, COMPOUNDS
Ejemplo n.º 2
0
class FlexibleZeroPinyin(Pinyin):
    '''
    不固定零声母,如自然码双拼、小鹤双拼
    '''
    regularize = {
        '^([aoe])': r'零\1\1',
        '^(m|n|ng)$': r'零o\1',
        'v': r'ü',
    }
    splitter = RE(r'(?<=[bpmfdtnlgkhjqxzcsryw])(?=[aeiouünm])|(?<=零[aoe])')
Ejemplo n.º 3
0
class FixedZeroPinyin(Pinyin):
    '''
    固定零声母,如微软双拼
    '''
    regularize = {
        '^([aoe])': r'零\1',
        '^(m|n|ng)$': r'零\1',
        'v': r'ü',
    }
    splitter = RE(r'(?<=[bpmfdtnlgkhjqxzcsryw零])(?=[aeiouünm])')
Ejemplo n.º 4
0
class Stroke:
    '''
    笔画是由一段或多段曲线首尾相接组成的几何图形,通常记作 :math:`s`。

    :param feature: 笔形,如「横」「竖」「横折」「竖折提」等等
    :param svg: 表示整个笔画的 svg 字符串

    '''
    commandSplitter = RE(r'(?<=\d)(?=[hvlc])')

    def __init__(self, feature: str, svg: str):
        self.feature = feature
        '''笔画的笔形,如横、竖等'''
        self.curveList: List[Curve] = []
        '''笔画的所有曲线构成的列表'''
        commandList = self.commandSplitter.split(svg)
        position: Point = array(
            [int(x) for x in commandList.pop(0)[1:].split(' ')])
        for curveString in commandList:
            curve, position = self.factory(position, curveString)
            self.curveList.append(curve)

    def factory(self, position, curveString):
        command = curveString[0]
        parameterList = [int(x) for x in curveString[1:].split(' ')]
        p0 = position
        if command == 'h':
            p1 = p0 + array(parameterList + [0])
            curve = Linear(p0, p1)
            return curve, p1
        elif command == 'v':
            p1 = p0 + array([0] + parameterList)
            curve = Linear(p0, p1)
            return curve, p1
        elif command == 'l':
            p1 = p0 + array(parameterList)
            curve = Linear(p0, p1)
            return curve, p1
        else:
            p1 = p0 + array(parameterList[:2])
            p2 = p0 + array(parameterList[2:4])
            p3 = p0 + array(parameterList[4:])
            curve = Cubic(p0, p1, p2, p3)
            return curve, p3

    @cached_property
    def linearizeLength(self):
        '''
        :returns: 笔画所包含的所有曲线的线性长度之和
        '''
        return sum(curve.linearizeLength() for curve in self.curveList)

    def __str__(self):
        return f'{self.feature}: {self.start} -> {self.curveList}'
Ejemplo n.º 5
0
class StandardPinyin(Pinyin):
    '''
    参考 https://zh.m.wikisource.org/zh-hans/%E6%B1%89%E8%AF%AD%E6%8B%BC%E9%9F%B3%E6%96%B9%E6%A1%88。
    '''
    regularize = {
        '([zcs])i': r'\1i1',  # 舌尖元音 ɿ
        '(h)i': r'\1i2',  # 舌尖元音 ʅ
        '^([aoe])': r'零开\1',  # 开口呼
        '^(m|n|ng)$': r'零开\1',  # 鼻音韵母算开口呼
        'w(u)': r'零合\1',  # 合口呼 wu
        'w([aoe])': r'零合u\1',  # 合口呼其他
        'y(i)': r'零齐\1',  # 齐齿呼 yi yin ying
        'y([aoe])': r'零齐i\1',  # 齐齿呼 ya yan yang ye you yong
        'y(u)': r'零撮ü',  # 撮口呼
        '([jqx])u|([nl])v': r'\1ü',
        'iu': 'iou',
        'ui': 'uei',
        'un': 'uen'
    }
    splitter = RE(r'(?<=[bpmfdtnlgkhjqxzcsrh开合齐撮])(?=[aeiouünm])')
Ejemplo n.º 6
0
Archivo: q3config.py Proyecto: braph/oa
    FileNotFoundError = FileNotFoundError
except:
    FileNotFoundError = IOError  # Python2

from re import compile as RE, VERBOSE as __RE_VERBOSE
from itertools import islice

GRAMMAR = RE(
    r'''
(?# === Most-likely first === )

 (?# A Comment)
    (?: //[^\n]* )
|(?# A Word)
    (?: [^\s";]+ (?=//)) |
    (?: [^\s";]+)
|(?# Quoted Word - Ending Quote can be omitted)
    (?: " [^"\n]* "? )
|(?# Whitespace)
     [ \t\f\v]+
|(?# Newline)
     \n
|(?# Semicolon)
     ;
''', __RE_VERBOSE)


class Token:
    '''
    Base token class which provides functionality such as appending, dropping
    and iterating over tokens.
    '''
Ejemplo n.º 7
0
# -*- coding: utf-8 -*-

# This is also a source
LABEL = 'Openarena Configurator'

import sys
from re import compile as RE
from libmk import *
from values import *
import net.joz3d        as joz3d
import net.quake3tweaks as quake3tweaks
import net.stupidctf    as stupidctf
import oa.cvardump      as cvardump
import oa.source_vars   as source_vars

count_upper = lambda s,f=RE('[A-Z]').findall: len(f(s))
is_float = RE(r'\d+\.\d+').fullmatch
is_str   = RE(r'\D').search
VARIABLES = {}

# =============================================================================
# NAMES
# =============================================================================
VARNAMES = {}
def find_best_names(source):
    to_drop = []
    for lower, var in source.VARIABLES.items():
        name = var['name']
        if not name or len(name) < 3:
            warn('Dropping strange variable %r coming from %r' % (name, source.LABEL))
            to_drop.append(lower)
Ejemplo n.º 8
0
from collections import namedtuple
from re import compile as RE

Snippet = namedtuple('Snippet', 'sha offset length')

pat = ('{'
       '\s*'
       '(?P<sha>[a-f0-9]+)'
       '\s+'
       '(?P<offset>\d+)'
       '\s+'
       '(?P<length>\d+)'
       '\s*'
       '}')

_PAT = RE(pat)


def to_string(snip):
    return _ts(*snip)


def _ts(sha, offset, length):
    return f'{{{sha.decode("ascii")} {offset} {length}}}'


def from_string(text):
    m = _PAT.match(text)
    if not m:
        raise ValueError
    return _fs(**m.groupdict())