def extract_feature(file,func): for line in file: # format: i j k l nodes splitted by '\t' # parse line contents = line.strip().split('\t') i,j,k,l = map(int,contents[0:4]) if k<=j or i>j or k > l: continue # unvalid sentence nodes = ['0/null/null/null/0'] nodes.extend(contents[4:]) tree = Generator.build_tree(nodes) cat = [] for w in range(len(nodes)): c = 'N' if w>0: if w >= i and w <=j: c = 'C' if w >= k and w <= l: c = 'E' cat.append(c) for w in range(1,len(nodes)): # for every w (word index), we extract context/features for it context = get_context(w,tree,cat) # extract contexts from the dependency subtree where w belongs to print 'context of `',str(tree.sentence[w]),'`',context # feature incremental for feat_dict func(context,cat[w])
def predict_file(filein, fileout): for line in filein: contents = line.strip().split('\t') nodes = ['0/null/null/null/0'] nodes.extend(contents[4:]) tree = Generator.build_tree(nodes) if len(tree.sentence) == 0: continue results = predict_sentence(tree, 3) assert (len(tree.sentence) == len(results)) for i in range(1,len(tree.sentence)): w = tree.sentence[i].word fileout.write('%s/%s ' % (w, results[i])) print >> fileout