def main(trained_model, test_file, viterbi, output_tags="output.tag", output_predictions="output.pred"): test_data, identifier = load_data(testing_file) evaluate = True # extract features if not "crf" in trained_model: if not isinstance(trained_model, list): with open(trained_model, 'rb') as frb: clf, previous_n, next_n, word_vocab, other_features = pickle.load( frb) else: clf, previous_n, next_n, word_vocab, other_features = trained_model tic = time.clock() with open(output_tags, 'w') as fw: confidences = [] for i in range(len(test_data) + len(identifier)): if i % 2 == 1: if "crf" in trained_model: y, tmp_conf = crf.predict(test_data[i / 2][0], trained_model) fw.write(" ".join([ test_data[i / 2][0][j] + "_" + y[j] for j in range(len(test_data[i / 2][0])) ])) else: y, tmp_conf = predict_tags_n(viterbi, previous_n, next_n, clf, test_data[i / 2][0], word_vocab, other_features) fw.write(" ".join([ test_data[i / 2][0][j] + "_" + int2tags[int(y[j])] for j in range(len(test_data[i / 2][0])) ])) assert (len(y) == len(tmp_conf)) confidences.append(tmp_conf) fw.write("\n") else: fw.write(identifier[i / 2]) fw.write("\n") print(time.clock() - tic) if evaluate: eval_mode_batch(output_tags, confidences, helper.cities) else: predict_mode_batch(output_tags, output_predictions, helper.cityies)
def main(trained_model,testing_file,viterbi,output_tags="output.tag", output_predictions="output.pred"): test_data, identifier = load_data(testing_file) evaluate = True ## extract features if not "crf" in trained_model: if not isinstance(trained_model, list): clf, previous_n, next_n, word_vocab,other_features = pickle.load( open( trained_model, "rb" ) ) else: clf, previous_n, next_n, word_vocab,other_features = trained_model tic = time.clock() f = open(output_tags,'w') confidences = [] for i in range(len(test_data)+len(identifier)): if i%2 == 1: if "crf" in trained_model: y, tmp_conf = crf.predict(test_data[i/2][0], trained_model) f.write(" ".join([test_data[i/2][0][j]+"_"+y[j] for j in range(len(test_data[i/2][0]))])) else: y, tmp_conf = predict_tags_n(viterbi, previous_n,next_n, clf, test_data[i/2][0], word_vocab,other_features) f.write(" ".join([test_data[i/2][0][j]+"_"+int2tags[int(y[j])] for j in range(len(test_data[i/2][0]))])) assert(len(y) == len(tmp_conf)) confidences.append(tmp_conf) f.write("\n") else: f.write(identifier[i/2]) f.write("\n") #print time.clock()-tic f.close() if evaluate: eval_mode_batch(output_tags, confidences, helper.cities) else: predict_mode_batch(output_tags, output_predictions, helper.cities) return
def predictCRF(trained_model, words, cities): tags, confidences = crf.predict(words, trained_model) pred, conf_scores, conf_cnts = predict_mode(words, tags, confidences, cities, True) return pred, conf_scores, conf_cnts