def handle(school): name = school[1] match = [] if name.replace(' ', '').isalpha(): top = 2900110 else: top = 34050 school_id = [k for k, v in SCHOOL_UNIVERSITY.iteritems() if v == name] if not school_id: match = [(i, find_lcs_len(v.encode('utf-8'), name.encode('utf-8'))) for i, v in SCHOOL_UNIVERSITY.iteritems() if i <= top] match = sorted(match, key=lambda x: x[1], reverse=True)[:10] print '\n--------%s--------\n' % name get = getIndex('\n'.join([ '选择:\t' + str(match.index(i)) + ' ' + SCHOOL_UNIVERSITY[i[0]] for i in match ])) if get < 10: school_id = match[get][0] else: school_id = 0 if school_id: print '\n\n++++++%s++++++++' % SCHOOL_UNIVERSITY[school_id] else: school_id = school_id[0] name = school[2] match = [] depDict = defaultdict(str) dep_id = 0 if name.replace(' ', ''): if school_id and type( school_id ) is int and school_id in SCHOOL_UNIVERSITY_DEPARTMENT_ID: for id in SCHOOL_UNIVERSITY_DEPARTMENT_ID[school_id]: depDict[id] = SCHOOL_UNIVERSITY_DEPARTMENT_ID2NAME[id] else: depDict = SCHOOL_UNIVERSITY_DEPARTMENT_ID2NAME dep_id = [] for k, v in depDict.iteritems(): mlen = find_lcs_len(name.encode('utf-8'), v.encode('utf-8')) if mlen > 3: dep_id.append((k, mlen)) dep_id.sort(key=lambda x: x[1]) if not dep_id: dep_id = 0 else: dep_id = dep_id[0][0] print name, SCHOOL_UNIVERSITY_DEPARTMENT_ID2NAME[dep_id] return [school[0], school_id, dep_id]
def handle(school): name = school[1] match = [] if name.replace(' ', '').isalpha(): top = 2900110 else: top = 34050 school_id = [k for k, v in SCHOOL_UNIVERSITY.iteritems() if v == name] if not school_id: match = [(i, find_lcs_len(v.encode('utf-8'), name.encode('utf-8'))) for i, v in SCHOOL_UNIVERSITY.iteritems() if i <= top] match = sorted(match, key=lambda x:x[1], reverse=True)[:10] print '\n--------%s--------\n'% name get = getIndex('\n'.join(['选择:\t'+str(match.index(i))+' '+SCHOOL_UNIVERSITY[i[0]] for i in match])) if get < 10: school_id = match[get][0] else: school_id = 0 if school_id: print '\n\n++++++%s++++++++'%SCHOOL_UNIVERSITY[school_id] else: school_id = school_id[0] name = school[2] match = [] depDict = defaultdict(str) dep_id = 0 if name.replace(' ', ''): if school_id and type(school_id) is int and school_id in SCHOOL_UNIVERSITY_DEPARTMENT_ID: for id in SCHOOL_UNIVERSITY_DEPARTMENT_ID[school_id]: depDict[id] = SCHOOL_UNIVERSITY_DEPARTMENT_ID2NAME[id] else: depDict = SCHOOL_UNIVERSITY_DEPARTMENT_ID2NAME dep_id = [] for k,v in depDict.iteritems(): mlen = find_lcs_len(name.encode('utf-8'),v.encode('utf-8')) if mlen > 3: dep_id.append((k,mlen)) dep_id.sort(key=lambda x:x[1]) if not dep_id: dep_id = 0 else: dep_id = dep_id[0][0] print name,SCHOOL_UNIVERSITY_DEPARTMENT_ID2NAME[dep_id] return [school[0], school_id, dep_id]
def calculate_similarity(file_name_1, file_name_2): seq1 = fetch_machine_instruction_sequence(file_name_1) seq2 = fetch_machine_instruction_sequence(file_name_2) lcs_len_seq1_seq2 = lcs.find_lcs_len(seq1, seq2) similarity = calculate_jaccard_coefficient(lcs_len_seq1_seq2, seq1, seq2) return similarity
f = open('out2.txt', 'w') err = open('logging2', 'w') fcount = 0 for pos, i in enumerate(data): _name = i[1] name = replace_name(_name) print name if not name: continue c = [] maxlen = 0 for j, id in _SCHOOL_UNIVERSITY.iteritems(): if len(set(name) & set(j)) >= 2: llen = find_lcs_len(name, j) if llen > maxlen: c = [j] maxlen = llen elif llen == maxlen: c.append(j) ok = False if c: c.sort(key=len) # print " ".join(c) if (maxlen / float(len(name))) > 0.6 and maxlen / float(len(c[0])) > 0.6: ok = True if ok: name = i[2].encode('utf-8')
f = open('out2.txt', 'w') err = open('logging2','w') fcount = 0 for pos, i in enumerate(data): _name = i[1] name = replace_name(_name) print name if not name: continue c = [] maxlen = 0 for j, id in _SCHOOL_UNIVERSITY.iteritems(): if len(set(name)&set(j)) >= 2: llen = find_lcs_len(name, j) if llen > maxlen: c = [j] maxlen = llen elif llen == maxlen: c.append(j) ok = False if c: c.sort(key=len) # print " ".join(c) if (maxlen / float(len(name)) ) > 0.6 and maxlen/float(len(c[0])) > 0.6: ok = True if ok: name = i[2].encode('utf-8') p = []