コード例 #1
0
ファイル: trainer.py プロジェクト: yysherlock/snippet
def extract_feature(file,func):
	for line in file: # format: i j k l nodes splitted by '\t'
		# parse line
		contents = line.strip().split('\t')
		i,j,k,l = map(int,contents[0:4])
		if k<=j or i>j or k > l: continue # unvalid sentence
		nodes = ['0/null/null/null/0']
		nodes.extend(contents[4:])
		
		tree = Generator.build_tree(nodes)
		cat = []
		
		for w in range(len(nodes)):
			c = 'N'
			if w>0: 
				if w >= i and w <=j: c = 'C'
				if w >= k and w <= l: c = 'E'
			cat.append(c)

		for w in range(1,len(nodes)):
			
			# for every w (word index), we extract context/features for it
			context = get_context(w,tree,cat) # extract contexts from the dependency subtree where w belongs to
			print 'context of `',str(tree.sentence[w]),'`',context
			# feature incremental for feat_dict 
			func(context,cat[w])
コード例 #2
0
ファイル: predictor.py プロジェクト: yysherlock/snippet
def predict_file(filein, fileout):

	for line in filein:
        	contents = line.strip().split('\t')
		nodes = ['0/null/null/null/0']
		nodes.extend(contents[4:])
		tree = Generator.build_tree(nodes)

		if len(tree.sentence) == 0: continue
        	results = predict_sentence(tree, 3)

        	assert (len(tree.sentence) == len(results))
        
        	for i in range(1,len(tree.sentence)):
            		w = tree.sentence[i].word
            		fileout.write('%s/%s ' % (w, results[i]))
        	print >> fileout