def test_builder_noindex(self): '''test basic function''' for f in glob.glob("mdx/Vocabulary*.db"): os.remove(f) print("***without sql index***\n") start = time.time() bd = IndexBuilder(self._mdx_file, sql_index=False, check=True) print("takes {0} seconds to build without sql index\n".format( time.time() - start)) start = time.time() word = 'dedicate' for i in range(self._repeat): self.assertTrue(bd.mdx_lookup(word)) print("takes {0} second to lookup {1} {2} times\n".format( time.time() - start, word, self._repeat)) for i in range(self._repeat): bd.get_mdx_keys("dedi*") print("takes {0} second to lookup {1} {2} times\n".format( time.time() - start, "dedi*", self._repeat))
temp[i] = re.sub('\r\n(\s)+', '', contents[i]) eng = temp[0] + ' ' + temp[1] + ' ' + temp[2] elif len(contents) == 2: print(222) print(span) temp = ['', ''] for i in range(2): if hasattr(contents[i], 'text'): if contents[i].findChild(): continue temp[i] = contents[i].text else: temp[i] = re.sub('\r\n(\s)+', '', contents[i]) eng = temp[0] + ' ' + temp[1] if not eng.endswith("\n"): eng += "\n" u.append(eng + chn + "\r\n") return u keys = list(OrderedDict.fromkeys(builder.get_mdx_keys())) """ for key in keys: write (key,"".join(getSentList(key))) """ for key in keys[1100:1120]: print(key) print("".join(getSentList(key)))
#if sys.argv.__len__() < 3: # sys.exit(1) #print sys.argv dict = {} query_type = sys.argv[2] query_word = sys.argv[3].strip() builder = IndexBuilder('/Users/david/Desktop/G/ciku/Longman Dictionary of Contemporary English.mdx') if query_type == "key": dict[query_word] = builder.mdx_lookup(query_word, True) elif query_type == "wildcard": keys = builder.get_mdx_keys(query_word) count = 0 for key in keys: count += 1 dict[key] = builder.mdx_lookup(key) if count > 10: break elif query_type == "wildcardcount": keys = builder.get_mdx_keys(query_word) print keys.__len__() sys.exit(0) print json.dumps(dict) #result_text = builder.mdx_lookup('dedication') #print result_text
from mdict_query import IndexBuilder bd = IndexBuilder("mdx\\oed.mdx") keys = bd.get_mdx_keys("ded*") result = bd.mdx_lookup('a') pass