def test_010_bench_pattern_time_Eclipse(self): return import time with open( "tests/documentation/Heat_Map/References/Eclipse_SWT/Eclipse.cxml", encoding='utf-8') as df: d = df.read() # p = "@exception SWTException" # opt: ? -> 1255 # p = "@exception SWTException <ul> <li>ERROR_WIDGET_DISPOSED - if the receiver has been" # opt: 10685 -> 1094 # p = "<li>ERROR_THREAD_INVALID_ACCESS - if not called from the thread that created the receiver" # opt: 13056 -> 1012 p = dedent(""" @exception SWTException <ul> <li>ERROR_WIDGET_DISPOSED - if the receiver has been disposed</li> <li>ERROR_THREAD_INVALID_ACCESS - if not called from the thread that created the receiver</li> </ul> """).strip() # k; t; |R| # 0.55;3412;1229 --- 2809 =) l = [] for kp in range(55, 101, 5): k = kp / 100.0 t1 = time.time() fnds = pnds.search(d, p, k) t2 = time.time() r = "%0.2f;%d;%d " % (k, t2 - t1, len(fnds)) l.append(r) print(r) print("k;t;|R|") for e in l: print(e)
def test_a_smoke_search_spl(self): return d = "w1 w2 w3 w4 w5 \n A B C D E1 F G H I w6 w7 \n w8 w9 A B C \n D E2 F G H I \n w10 w11 w12 w13" p = "A B C D E1 F G H I" fnds = pnds.search(d, p, self.sim) for fb, fe in fnds: print("<<<" + d[fb:fe] + ">>>")
def test_005_pattern_NDA_001(self): return with open("tests/gzp/gzp.pxml", encoding='utf-8') as df: d = df.read() # NDA, not shipped p = "в появившемся окне" fnds = pnds.search(d, p, 0.877) for f in fnds: print(f, d[f[0]:f[1]])
def test_1_eclipse_max(self): return with open( "tests/documentation/Heat_Map/References/Eclipse_SWT/Eclipse.cxml", encoding='utf-8') as df: d = df.read() # p = "@exception SWTException" # opt: ? -> 1255 # p = "@exception SWTException <ul> <li>ERROR_WIDGET_DISPOSED - if the receiver has been" # opt: 10685 -> 1094 p = "<li>ERROR_THREAD_INVALID_ACCESS - if not called from the thread that created the receiver" # opt: 13056 -> 1012 fnds = pnds.search(d, p, 0.87) print(len(fnds))
def test_009_bench_pattern_time_LKD(self): return import time with open( "tests/documentation/Heat_Map/4_first/Linux_Kernel/Linux_Kernel_Documentation.cxml", encoding='utf-8') as df: d = df.read() p = dedent(""" This documentation is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this documentation; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA For more details see the file COPYING in the source distribution of Linux. """).strip() """ k;t;|R| 0.55;450;25 0.60;98;25 0.65;74;24 0.70;55;24 0.75;37;24 0.80;25;24 0.85;17;24 0.90;10;22 0.95;4;22 1.00;1;2 """ l = [] for kp in range(55, 101, 5): k = kp / 100.0 t1 = time.time() fnds = pnds.search(d, p, k) t2 = time.time() r = "%0.2f;%d;%d " % (k, t2 - t1, len(fnds)) l.append(r) print(r) print("k;t;|R|") for e in l: print(e)
def test_008_bench_pattern_time_Blender(self): return import time with open( "tests/documentation/Heat_Map/References/Blender_manual/blender_manual.pxml", encoding='utf-8') as df: d = df.read() p = "Faces Reference Mode: Edit Mode Menu: Mesh -- & gt; Clean up -- & gt;" l = [] for kp in range(55, 101, 5): k = kp / 100.0 t1 = time.time() fnds = pnds.search(d, p, k) t2 = time.time() r = "%0.2f;%d;%d " % (k, t2 - t1, len(fnds)) l.append(r) print(r) print("k;t;|R|") for e in l: print(e)
def test_1_psql_fitting(self): # return import itertools p = dedent(""" To alter the owner, you must also be a direct or indirect member of the new owning role, and that role must have CREATE privilege on the table's schema. (These restrictions enforce that altering the owner doesn't do anything you couldn't do by dropping and recreating the table. However, a superuser can alter ownership of any table anyway.) """).strip() with open( "tests/documentation/Heat_Map/References/PostgreSQL_9.6.1_SQL_Reference/PostgreSQL_9.6.1_SQL_Reference.cxml", encoding='utf-8') as df: d = df.read() fnds = pnds.search(d, p, 0.77, unify_whitespaces=True) print(len(fnds)) for (fb, fe), n in zip(fnds, itertools.count(1)): print( str(n) + " <<<" + d[fb - 25:fb] + "[[[\n" + d[fb:fe] + "\n]]]" + d[fe:fe + 25] + ">>>")
def test_020_bench_pattern_time_PostgreSQL(self): return import time with open( "tests/documentation/Heat_Map/References/PostgreSQL_9.6.1_SQL_Reference/PostgreSQL_9.6.1_SQL_Reference.cxml", encoding='utf-8') as df: d = df.read() p = "you must also be a direct or indirect member of the new owning role, and that role must have CREATE privilege on the table's schema." # k=0.57, t=31, |R|=17 # k=0.62, t=27, |R|=17 # k=0.67, t=27, |R|=17 # k=0.72, t=27, |R|=17 # k=0.77, t=27, |R|=17 # k=0.82, t=27, |R|=17 # k=0.87, t=28, |R|=17 # k=0.92, t=27, |R|=17 # k=0.97, t=27, |R|=17 # p = "you must also be a direct or indirect member of the new owning role" # k=0.57, t=20, |R|=19 # k=0.62, t=20, |R|=19 # k=0.67, t=20, |R|=19 # k=0.72, t=20, |R|=19 # k=0.77, t=20, |R|=19 # k=0.82, t=20, |R|=19 # k=0.87, t=20, |R|=19 # k=0.92, t=20, |R|=19 # k=0.97, t=20, |R|=19 l = [] for kp in range(57, 101, 5): k = kp / 100.0 t1 = time.time() fnds = pnds.search(d, p, k, unify_whitespaces=True) t2 = time.time() l.append("k=%0.2f, t=%d, |R|=%d " % (k, t2 - t1, len(fnds))) for e in l: print(e)
def find_like_pattern(inputfile, pattern, ms): try: import faster_pattern_near_duplicate_search as pnds except (ImportError, ModuleNotFoundError): import sys print("Falling back to slower pattern search", file=sys.stderr) import pattern_near_duplicate_search as pnds # Support ignored/accepted ranges -- not yet implemented tx = inputfile.text marked = [(ob, ce) for ob, ce, mt in sourcemarkers.find_marked_intervals(tx)] tx = smear_text(tx, marked) ranges = pnds.search(tx, pattern, ms) results = [] for cb, ce in ranges: ctext = inputfile.text[cb:ce] cwords = util.tokenst(ctext) results.append((cb, ce - 1, util.diratio(ctext, pattern), ctext, cwords)) return results
def test_smoke_search_p(self): return fnds = pnds.search(self.d, self.p, self.sim) for fb, fe in fnds: print("<<<" + self.d[fb:fe] + ">>>")
def test_smoke_non_opt_search(self): return pnds.search(self.d, self.p, self.sim, optimize_size=False)