Exemple #1
0
def generate_stroke():
    abc = abcNLPChar('C')
    db = abcSql("abcNLP.db")
    db.recreate_stroke_order()
    abc.getStrokeOrderDict(db=db)
    db.commit()
    db.close()
Exemple #2
0
def generate_stroke():
    abc = abcNLPChar("C")
    db = abcSql("abcNLP.db")
    db.recreate_stroke_order()
    abc.getStrokeOrderDict(db=db)
    db.commit()
    db.close()
Exemple #3
0
def generate_variant():
    abc = abcNLPChar("C")
    db = abcSql("abcNLP.db")

    db.recreate_char_variant()
    dic = db.fetch_stroke_orders()
    stk = abc.getStrokeOrderSimilar(dic, db=db)
    db.commit()
    db.close
Exemple #4
0
def generate_variant():
    abc = abcNLPChar('C')
    db = abcSql("abcNLP.db")

    db.recreate_char_variant()
    dic = db.fetch_stroke_orders()
    stk = abc.getStrokeOrderSimilar(dic, db=db)
    db.commit()
    db.close
Exemple #5
0
def generate_tongyin_variant(maxnum=500000):
    abc = abcNLPChar('C')    
    db = abcSql("abcNLP.db")    
    db.recreate_tongyin_variant()
    
    count = 0
    for src in abc.getDomainCharacterIterator():
        if (count >= maxnum):
            print "Max number %d reached!" % count
            break
        try:
            sc = abc.getStrokeCount(src)
            py = abc.getReadingForCharacter(src, 'Pinyin')
            # sorry, only process one Pinyin of each of char
            tongyins = abc.getCharactersForReading(py[0],  'Pinyin')            
        except:
            continue 
        # default, the Pinyin is a variant (score set to 0)
        db.insert_tongyin_char_variant(src,  py[0],  0)
        # print "%s --> %s (0)" % (src,  py[0])          
        distances = [ (500, ""),  (500, ""), (500, "")] 
        for ty in tongyins:
            if ty == src:
                continue
            try:
                # distance = abs(abc.getStrokeCount(ty) - sc) ; use difference
                distance = abc.getStrokeCount(ty) # the simple character is better
            except:
                continue
            distances.pop()
            distances.append((distance,  ty))
            insertionSort(distances)
        for distance in distances:
            if distance[0] == 500:
                break
            #print "%s --> %s (%d)" % (src,  distance[1],  distance[0])  
            db.insert_tongyin_char_variant(src,  distance[1],  distance[0])
        count = count + 1

    db.commit() 
    db.close()
Exemple #6
0
def generate_default_variant(maxnum=500000):
    abc = abcNLPChar('C')    
    db = abcSql("abcNLP.db")    
    db.recreate_default_variant()
    
    count = 0
    for src in abc.getDomainCharacterIterator():
        if (count >= maxnum):
            print "Max number %d reached!" % count
            break
        try:
            exts = get_abc_extend_char(abc,  src)            
        except:
            continue            
        for ext in exts:
            if ext == src:
                continue
            #print "%s --> %s " % (src,  ext)  
            db.insert_default_char_variant(src,  ext,  0)
            count = count + 1
    db.commit() 
    db.close()
Exemple #7
0
def clear_db():
    db = abcSql("abcNLP.db")
    db.recreate_stroke_order()
    db.recreate_char_variant()
    db.close
Exemple #8
0
def clear_db():
    db = abcSql("abcNLP.db")
    db.recreate_stroke_order()
    db.recreate_char_variant()
    db.close
Exemple #9
0
def remove_other_variant():
    abc = abcNLPChar('C')    
    db = abcSql("abcNLP.db")
    db.remove_old_tables()
    db.close()
Exemple #10
0
def generate_bigone_variant():
    abc = abcNLPChar('C')    
    db = abcSql("abcNLP.db")
    db.recreate_allinone_variant()
    db.merge_to_one_variant()
    db.close()
Exemple #11
0
def generate_decomp_variant(maxnum=500000, depth=1):
    abc = abcNLPChar('C')    
    db = abcSql("abcNLP.db")    
    if depth == 0:
        db.recreate_decomp_variant()
    elif depth > 0:
        db.recreate_decompext_variant()
        
    count = 0    
    for src in abc.getDomainCharacterIterator(): 
        #  [u'待',  u'法', u'⾽']: 

        if (count >= maxnum):
            print "Max number %d reached!" % count
            break
            
        try:            
            decomps = abc.getDecompositionEntries(src)
        except:          
            continue
            
        level = depth
        decomp = []
        if decomps:
            decomp = decomps[0]            
        if decomp:
            idc = decomp[0]
            if not abc.isBinaryIDSOperator(idc) and not abc.isTrinaryIDSOperator(idc):
                raise exception.NoInformationError("IDC of char %s is error: %s" % (char, idc)) 
            if idc == u'⿰'and type(decomp[1]) == type(()) and type(decomp[2]) == type(()):
                a = decomp[1][0];
                b = decomp[2][0];
                if (a == u'?' or b == u'?'):
                    continue  
                ao,  bo = a,  b                    
                if level > 1: 
                    a = get_abc_extend_char(abc,  a).pop()
                    if ao != a:
                        level = level - 1
                if level > 1:
                    b = get_abc_extend_char(abc,  b).pop()
                    if bo != b:
                        level = level - 1
                # for the reason of level, b may hasn't been changed
                if ao == bo and ao != a:
                    b = a
                chg = not (ao == a and bo == b)
                #print "%s --> <%s%s> %d" % (src,  a,  b,  chg)  
                if depth == 0:
                    db.insert_decomp_char_variant(src,  a+b,  0)
                elif depth > 0:
                    db.insert_decompext_char_variant(src,  a+b,  0)
                count = count + 1
            elif idc == u'⿲' and type(decomp[1]) == type(()) and type(decomp[2]) == type(()) \
                and type(decomp[3]) == type(()):
                a = decomp[1][0];
                b = decomp[2][0];
                c = decomp[3][0];   
                if (a == u'?' or b == u'?' or c == u'?'):
                    continue               
                ao,  bo, co = a,  b,  c                    
                if level > 1: 
                    a = get_abc_extend_char(abc,  a).pop()
                    if ao != a:
                        level = level - 1
                if level > 1:
                    b = get_abc_extend_char(abc,  b).pop()
                    if bo != b:
                        level = level - 1
                if level > 1:
                    b = get_abc_extend_char(abc,  b).pop()
                    if bo != b:
                        level = level - 1                        
                # for the reason of level, b, c may hasn't been changed
                if ao == bo and ao != a:
                    b = a
                if ao == co and ao != a:
                    c = a
                if bo == co and bo != b:
                    c = b
                chg = not (ao == a and bo == b and co == c)
                #print "%s --> <%s%s%s> %d" % (src,  a,  b,  c,  chg)  
                if depth == 0:
                    db.insert_decomp_char_variant(src,  a+b+c,  0)
                elif depth > 0:
                    db.insert_decompext_char_variant(src,  a+b+c,  0)
                count = count + 1
    db.commit() 
    db.close()