Ejemplo n.º 1
0
import sys
import re
import string
from nltk import bigrams
from fingerprint import FingerPrint

f = FingerPrint()

for line in sys.stdin:
	cols = line.split("\t")
	try:
		print "%s\t%s\t%s" % (f.bigram_fingerprint_num(cols[2]),cols[0],cols[2])
	except:
		pass
	


	
Ejemplo n.º 2
0
import sys
import json
from fingerprint import FingerPrint


blkid = 0
f = FingerPrint()
for line in sys.stdin:
	blkid+=1
	data = json.loads(line)
	for k,v in data.iteritems():
		for title in v:
			print "\t".join([f.bigram_fingerprint_num(title),str(blkid),title])