def handle(school):

    name = school[1]
    match = []

    if name.replace(' ', '').isalpha():
        top = 2900110
    else:
        top = 34050

    school_id = [k for k, v in SCHOOL_UNIVERSITY.iteritems() if v == name]

    if not school_id:
        match = [(i, find_lcs_len(v.encode('utf-8'), name.encode('utf-8')))
                 for i, v in SCHOOL_UNIVERSITY.iteritems() if i <= top]
        match = sorted(match, key=lambda x: x[1], reverse=True)[:10]

        print '\n--------%s--------\n' % name
        get = getIndex('\n'.join([
            '选择:\t' + str(match.index(i)) + ' ' + SCHOOL_UNIVERSITY[i[0]]
            for i in match
        ]))
        if get < 10:
            school_id = match[get][0]
        else:
            school_id = 0
        if school_id:
            print '\n\n++++++%s++++++++' % SCHOOL_UNIVERSITY[school_id]

    else:
        school_id = school_id[0]

    name = school[2]
    match = []
    depDict = defaultdict(str)
    dep_id = 0
    if name.replace(' ', ''):
        if school_id and type(
                school_id
        ) is int and school_id in SCHOOL_UNIVERSITY_DEPARTMENT_ID:
            for id in SCHOOL_UNIVERSITY_DEPARTMENT_ID[school_id]:
                depDict[id] = SCHOOL_UNIVERSITY_DEPARTMENT_ID2NAME[id]
        else:
            depDict = SCHOOL_UNIVERSITY_DEPARTMENT_ID2NAME

        dep_id = []
        for k, v in depDict.iteritems():
            mlen = find_lcs_len(name.encode('utf-8'), v.encode('utf-8'))
            if mlen > 3:
                dep_id.append((k, mlen))

        dep_id.sort(key=lambda x: x[1])

        if not dep_id:
            dep_id = 0
        else:
            dep_id = dep_id[0][0]
            print name, SCHOOL_UNIVERSITY_DEPARTMENT_ID2NAME[dep_id]

    return [school[0], school_id, dep_id]
Example #2
0
def handleSchool(school):
    school_id = [k for k,v in SCHOOL_UNIVERSITY.iteritems() if v==school[1]]
    dep_id = [ k for k,v in SCHOOL_UNIVERSITY_DEPARTMENT_ID2NAME.iteritems() if v==school[2]]

    if school_id and dep_id:
        school_id=school_id[0]
        dep_id=dep_id[0]
        return (school[0],school_id,dep_id,school[3],school[4])

    MANUAL_LIST.append(school)
def handle(school):

    name = school[1]
    match = []

    if name.replace(' ', '').isalpha():
        top = 2900110
    else:
        top = 34050

    school_id = [k for k, v in SCHOOL_UNIVERSITY.iteritems() if v == name]

    if not school_id:
        match = [(i, find_lcs_len(v.encode('utf-8'), name.encode('utf-8'))) for i, v in SCHOOL_UNIVERSITY.iteritems() if i <= top]
        match = sorted(match, key=lambda x:x[1], reverse=True)[:10]

        print '\n--------%s--------\n'% name
        get = getIndex('\n'.join(['选择:\t'+str(match.index(i))+' '+SCHOOL_UNIVERSITY[i[0]] for i in match]))
        if get < 10:
            school_id = match[get][0]
        else:
            school_id = 0
        if school_id:
            print '\n\n++++++%s++++++++'%SCHOOL_UNIVERSITY[school_id]

    else:
        school_id = school_id[0]

    name = school[2]
    match = []
    depDict = defaultdict(str)
    dep_id = 0
    if name.replace(' ', ''):
        if school_id and type(school_id) is int and school_id in SCHOOL_UNIVERSITY_DEPARTMENT_ID:
            for id in SCHOOL_UNIVERSITY_DEPARTMENT_ID[school_id]:
                depDict[id] = SCHOOL_UNIVERSITY_DEPARTMENT_ID2NAME[id]
        else:
            depDict = SCHOOL_UNIVERSITY_DEPARTMENT_ID2NAME

        dep_id = []
        for k,v in depDict.iteritems():
            mlen = find_lcs_len(name.encode('utf-8'),v.encode('utf-8'))
            if mlen > 3:
                dep_id.append((k,mlen))

        dep_id.sort(key=lambda x:x[1])

        if not dep_id:
            dep_id = 0
        else:
            dep_id = dep_id[0][0]
            print name,SCHOOL_UNIVERSITY_DEPARTMENT_ID2NAME[dep_id]

    return [school[0], school_id, dep_id]
Example #4
0
    name = name.replace('大学', '大').replace('科学技术', '科').replace(
        '中国',
        '中').replace('师范', '师').replace('科技', '科').replace('交通', '交').replace(
            '财经', '财').replace('工业', '工').replace('北京', '北').replace(
                '科学', '科').replace('农业', '农').decode('utf-8')
    if name.endswith(u"大") and len(name) > 2:
        name = name[:-1]
    return name


with open('to_be_verified', 'r') as to_be_verifyed:
    data = loads(to_be_verifyed.read())
    #shuffle(data)

_SCHOOL_UNIVERSITY = dict(
    (replace_name(v), k) for k, v in SCHOOL_UNIVERSITY.iteritems())

f = open('out2.txt', 'w')
err = open('logging2', 'w')
fcount = 0
for pos, i in enumerate(data):

    _name = i[1]
    name = replace_name(_name)
    print name
    if not name: continue
    c = []
    maxlen = 0

    for j, id in _SCHOOL_UNIVERSITY.iteritems():
        if len(set(name) & set(j)) >= 2:
Example #5
0
from random import shuffle

def replace_name(name):
    name = ftoj(name.decode('utf-8'))
    if type(name) is unicode:
        name = name.encode('utf-8')
    name = name.replace('大学', '大').replace('科学技术', '科').replace('中国', '中').replace('师范', '师').replace('科技', '科').replace('交通', '交').replace('财经', '财').replace('工业', '工').replace('北京', '北').replace('科学', '科').replace('农业', '农').decode('utf-8')
    if name.endswith(u"大") and len(name) > 2:
        name = name[:-1]
    return name

with open('to_be_verified', 'r') as to_be_verifyed:
    data = loads(to_be_verifyed.read())
    #shuffle(data)

_SCHOOL_UNIVERSITY = dict((replace_name(v), k) for k, v in SCHOOL_UNIVERSITY.iteritems())


f = open('out2.txt', 'w')
err = open('logging2','w')
fcount = 0
for pos, i in enumerate(data):

    _name = i[1]
    name = replace_name(_name)
    print name
    if not name: continue
    c = []
    maxlen = 0

    for j, id in _SCHOOL_UNIVERSITY.iteritems():