def small_fLoc_test3(): s1 = ", ?Van Daal ," tnt = NER() ee = tnt(s1) print ee for e in ee: print e[0], e[1], e[2], " : ", s1[e[1] : e[2] + 1] # print "Entity: ", s1[e[1]: e[2] + 1] m = DistanceMatrix(fLevenshteinDistance, s1, tnt._processed_text) p = fMinPath()(m) print m.toString(p)
t = time.time() #prepare_data() #step1_fetch_text(os.path.join(data_dir, "xml")) #step2_run_tnt(os.path.join(data_dir, "txt")) #step3_calc_lev_distance() #step4_calc_minpath() #step5_calc_minpath_dt() #step6_cleantnt_txt() #step5_calc_minpath_dt("cleaned-tnt", "txt", size_limit=1000) from gravity.tae.match.lev_distance import fLevDistanceDiag #s1 = "Song about Alice dream" #s2 = "Song al?ce dream" s1 = " abc d dfg rer klm" s2 = "abc dfg klm" m = DistanceMatrix(111) #fLevDistanceDiag(1).fill_matrix(s2, s1, m) fLevDistance().fill_matrix(s2, s1, m) print m.toString(s2, s1, fMinPath()(m)) print "Elapsed time: ", (time.time() - t)
step1_fetch_text(os.path.join(data_dir, "xml")) step2_run_tnt(os.path.join(data_dir, "txt")) step6_cleantnt_txt() t = time.time() #prepare_data() #step1_fetch_text(os.path.join(data_dir, "xml")) #step2_run_tnt(os.path.join(data_dir, "txt")) #step3_calc_lev_distance() #step4_calc_minpath() #step5_calc_minpath_dt() #step6_cleantnt_txt() #step5_calc_minpath_dt("cleaned-tnt", "txt", size_limit=1000) from gravity.tae.match.lev_distance import fLevDistanceDiag #s1 = "Song about Alice dream" #s2 = "Song al?ce dream" s1 = " abc d dfg rer klm" s2 = "abc dfg klm" m = DistanceMatrix(111) #fLevDistanceDiag(1).fill_matrix(s2, s1, m) fLevDistance().fill_matrix(s2, s1, m) print m.toString(s2, s1, fMinPath()(m)) print "Elapsed time: ", (time.time() - t)
# small_fLoc_test3() # general_test() # lev_test() # levdiag_test0() # levdiag_test2() # levdiag_test1() # levdiag_test3() # levdiag_test4() # levdiag_test5() t1, t2 = "asdsdsa", "ssasasas" m = DistanceMatrix(fLevenshteinDistance2, t1, t2) print m.toString(t1, t2) t1, t2 = "asdsdsa", "ssasasas" m = DistanceMatrix(fLevenshteinDistance, t1, t2) print m.toString(t1, t2) t1, t2 = "asdsdsa", "ssasasas" m = DistanceMatrix(fClassicalLevenshteinDistance, t1, t2) print m.toString(t1, t2) t1, t2 = "asdsdsa", "ssasasas" m = DistanceMatrix(fDiagDistance(6), t1, t2, 111) print m.toString(t1, t2) s = s1 = s2 = read_file(data_dir + "/73632.agg.txt")