예제 #1
0
		item.append(tmp['title']) #3
		item.append(tmp['institution']) #4
		item.append(tmp['venue']) #5
		item.append(tmp['address']) #6
		item.append(tmp['publisher']) #7
		item.append(tmp['year']) #8
		item.append(tmp['pages']) #9
		item.append(tmp['editor']) #10
		item.append(tmp['note']) #11
		item.append(tmp['month']) #12
		rlist.append(item)

		for j in xrange(1, 13):
			if (item[j] != ''):
				s[j] += 1

	print 'id, auth, vol, ttl, ins, ven, addr, pub, year, pag, edi, nt, mon'
	print s

	dim = len(rlist[0])
	# f-swoosh: record dimension, record list, feature list, match func list, merge func list
	fsw = fswoosh(dim, rlist, flist, matchFuncList, mergeFuncList)
	result = fsw.compute()

	# evaluation: num after merging, num of records, result file, correct answer file
	eva = evaluate(len(result), len(coraObj), 'clusters.txt', 'cora-clusters.txt')
	eva.do()

	t2 = time.time()
	print 't2-t1: ' + str(t2-t1)
예제 #2
0
			tmp = coraObj[str(buc[i])]
			item = []
			item.append(str(buc[i])) #0
			item.append(tmp['author'])  #1
			item.append(tmp['volume']) #2
			item.append(tmp['title']) #3
			item.append(tmp['institution']) #4
			item.append(tmp['venue']) #5
			item.append(tmp['address']) #6
			item.append(tmp['publisher']) #7
			item.append(tmp['year']) #8
			item.append(tmp['pages']) #9
			item.append(tmp['editor']) #10
			item.append(tmp['note']) #11
			item.append(tmp['month']) #12
			rlist.append(item)
		dim = len(rlist[0])
		fsw = fswoosh(dim, rlist, flist, matchFuncList, mergeFuncList)
		res = fsw.compute()
		rpool += res
	dim = len(rpool[0])
	print len(rpool)
	matchFuncList[2] = levDistPool
	fswpool = fswoosh(dim, rpool, flist, matchFuncList, mergeFuncList)
	result = fswpool.compute()
	print len(result)
	eva = evaluate(len(result), len(coraObj), 'clusters.txt', 'cora-clusters.txt')
	eva.do()

	t2 = time.time()
	print 't2-t1: ' + str(t2-t1)