def generate_stroke(): abc = abcNLPChar('C') db = abcSql("abcNLP.db") db.recreate_stroke_order() abc.getStrokeOrderDict(db=db) db.commit() db.close()
def generate_stroke(): abc = abcNLPChar("C") db = abcSql("abcNLP.db") db.recreate_stroke_order() abc.getStrokeOrderDict(db=db) db.commit() db.close()
def generate_variant(): abc = abcNLPChar("C") db = abcSql("abcNLP.db") db.recreate_char_variant() dic = db.fetch_stroke_orders() stk = abc.getStrokeOrderSimilar(dic, db=db) db.commit() db.close
def generate_variant(): abc = abcNLPChar('C') db = abcSql("abcNLP.db") db.recreate_char_variant() dic = db.fetch_stroke_orders() stk = abc.getStrokeOrderSimilar(dic, db=db) db.commit() db.close
def generate_tongyin_variant(maxnum=500000): abc = abcNLPChar('C') db = abcSql("abcNLP.db") db.recreate_tongyin_variant() count = 0 for src in abc.getDomainCharacterIterator(): if (count >= maxnum): print "Max number %d reached!" % count break try: sc = abc.getStrokeCount(src) py = abc.getReadingForCharacter(src, 'Pinyin') # sorry, only process one Pinyin of each of char tongyins = abc.getCharactersForReading(py[0], 'Pinyin') except: continue # default, the Pinyin is a variant (score set to 0) db.insert_tongyin_char_variant(src, py[0], 0) # print "%s --> %s (0)" % (src, py[0]) distances = [ (500, ""), (500, ""), (500, "")] for ty in tongyins: if ty == src: continue try: # distance = abs(abc.getStrokeCount(ty) - sc) ; use difference distance = abc.getStrokeCount(ty) # the simple character is better except: continue distances.pop() distances.append((distance, ty)) insertionSort(distances) for distance in distances: if distance[0] == 500: break #print "%s --> %s (%d)" % (src, distance[1], distance[0]) db.insert_tongyin_char_variant(src, distance[1], distance[0]) count = count + 1 db.commit() db.close()
def generate_default_variant(maxnum=500000): abc = abcNLPChar('C') db = abcSql("abcNLP.db") db.recreate_default_variant() count = 0 for src in abc.getDomainCharacterIterator(): if (count >= maxnum): print "Max number %d reached!" % count break try: exts = get_abc_extend_char(abc, src) except: continue for ext in exts: if ext == src: continue #print "%s --> %s " % (src, ext) db.insert_default_char_variant(src, ext, 0) count = count + 1 db.commit() db.close()
def clear_db(): db = abcSql("abcNLP.db") db.recreate_stroke_order() db.recreate_char_variant() db.close
def remove_other_variant(): abc = abcNLPChar('C') db = abcSql("abcNLP.db") db.remove_old_tables() db.close()
def generate_bigone_variant(): abc = abcNLPChar('C') db = abcSql("abcNLP.db") db.recreate_allinone_variant() db.merge_to_one_variant() db.close()
def generate_decomp_variant(maxnum=500000, depth=1): abc = abcNLPChar('C') db = abcSql("abcNLP.db") if depth == 0: db.recreate_decomp_variant() elif depth > 0: db.recreate_decompext_variant() count = 0 for src in abc.getDomainCharacterIterator(): # [u'待', u'法', u'⾽']: if (count >= maxnum): print "Max number %d reached!" % count break try: decomps = abc.getDecompositionEntries(src) except: continue level = depth decomp = [] if decomps: decomp = decomps[0] if decomp: idc = decomp[0] if not abc.isBinaryIDSOperator(idc) and not abc.isTrinaryIDSOperator(idc): raise exception.NoInformationError("IDC of char %s is error: %s" % (char, idc)) if idc == u'⿰'and type(decomp[1]) == type(()) and type(decomp[2]) == type(()): a = decomp[1][0]; b = decomp[2][0]; if (a == u'?' or b == u'?'): continue ao, bo = a, b if level > 1: a = get_abc_extend_char(abc, a).pop() if ao != a: level = level - 1 if level > 1: b = get_abc_extend_char(abc, b).pop() if bo != b: level = level - 1 # for the reason of level, b may hasn't been changed if ao == bo and ao != a: b = a chg = not (ao == a and bo == b) #print "%s --> <%s%s> %d" % (src, a, b, chg) if depth == 0: db.insert_decomp_char_variant(src, a+b, 0) elif depth > 0: db.insert_decompext_char_variant(src, a+b, 0) count = count + 1 elif idc == u'⿲' and type(decomp[1]) == type(()) and type(decomp[2]) == type(()) \ and type(decomp[3]) == type(()): a = decomp[1][0]; b = decomp[2][0]; c = decomp[3][0]; if (a == u'?' or b == u'?' or c == u'?'): continue ao, bo, co = a, b, c if level > 1: a = get_abc_extend_char(abc, a).pop() if ao != a: level = level - 1 if level > 1: b = get_abc_extend_char(abc, b).pop() if bo != b: level = level - 1 if level > 1: b = get_abc_extend_char(abc, b).pop() if bo != b: level = level - 1 # for the reason of level, b, c may hasn't been changed if ao == bo and ao != a: b = a if ao == co and ao != a: c = a if bo == co and bo != b: c = b chg = not (ao == a and bo == b and co == c) #print "%s --> <%s%s%s> %d" % (src, a, b, c, chg) if depth == 0: db.insert_decomp_char_variant(src, a+b+c, 0) elif depth > 0: db.insert_decompext_char_variant(src, a+b+c, 0) count = count + 1 db.commit() db.close()