def optimize_index(request): if not users.is_current_user_admin(): return http.HttpResponse("no", status=403) if request.method == "POST": term = request.POST.get("term") elif request.method == "GET": term = request.GET.get("term") else: return http.HttpResponse("no", status=403) if term: search.optimize_index(term) return http.HttpResponse("ok")
def test_index_optimization(self): # Create a bunch of test keys. test_keys = [ db.Key.from_path("kind_dummy", "key%02d" % i) for i in range(12) ] # Now create four test SearchMatches, all for the same term # and over two different fields. Split the four sets of keys # between them. for i in range(4): sm = models.SearchMatches(generation=search._GENERATION, entity_kind="kind_dummy", field="field%d" % (i % 2), term="foo") sm.matches.extend(test_keys[i::4]) sm.save() # Optimize on our term num_deleted = search.optimize_index("foo") # We go from 4 objects to 2, so we should return 4-2=2. self.assertEqual(2, num_deleted) # There should now just one SearchMatches per field. sm = None query = models.SearchMatches.all().filter("term =", "foo") all_sms = sorted(query.fetch(999), key=lambda sm: sm.field) self.assertEqual(2, len(all_sms)) for sm in all_sms: self.assertEqual(search._GENERATION, sm.generation) self.assertEqual("kind_dummy", sm.entity_kind) self.assertEqual("foo", sm.term) self.assertEqual("field0", all_sms[0].field) self.assertEqual(set(test_keys[0::2]), set(all_sms[0].matches)) self.assertEqual("field1", all_sms[1].field) self.assertEqual(set(test_keys[1::2]), set(all_sms[1].matches)) # Create a SearchMatches for a stop word. Optimization should # cause that object to be deleted. sm = models.SearchMatches(generation=search._GENERATION, entity_kind="kind_dummy", field="field", term="the") sm.matches.extend(test_keys) sm.save() num_deleted = search.optimize_index("the") self.assertEqual(1, num_deleted) query = models.SearchMatches.all().filter("term =", "the") self.assertEqual(0, query.count())
def test_index_optimization(self): # Create a bunch of test keys. test_keys = [db.Key.from_path("kind_dummy", "key%02d" % i) for i in range(12)] # Now create four test SearchMatches, all for the same term # and over two different fields. Split the four sets of keys # between them. for i in range(4): sm = models.SearchMatches(generation=search._GENERATION, entity_kind="kind_dummy", field="field%d" % (i%2), term="foo") sm.matches.extend(test_keys[i::4]) sm.save() # Optimize on our term num_deleted = search.optimize_index("foo") # We go from 4 objects to 2, so we should return 4-2=2. self.assertEqual(2, num_deleted) # There should now just one SearchMatches per field. sm = None query = models.SearchMatches.all().filter("term =", "foo") all_sms = sorted(query.fetch(999), key=lambda sm: sm.field) self.assertEqual(2, len(all_sms)) for sm in all_sms: self.assertEqual(search._GENERATION, sm.generation) self.assertEqual("kind_dummy", sm.entity_kind) self.assertEqual("foo", sm.term) self.assertEqual("field0", all_sms[0].field) self.assertEqual(set(test_keys[0::2]), set(all_sms[0].matches)) self.assertEqual("field1", all_sms[1].field) self.assertEqual(set(test_keys[1::2]), set(all_sms[1].matches)) # Create a SearchMatches for a stop word. Optimization should # cause that object to be deleted. sm = models.SearchMatches(generation=search._GENERATION, entity_kind="kind_dummy", field="field", term="the") sm.matches.extend(test_keys) sm.save() num_deleted = search.optimize_index("the") self.assertEqual(1, num_deleted) query = models.SearchMatches.all().filter("term =", "the") self.assertEqual(0, query.count())
chirpradio.connect() terms_to_opt = set() for line in codecs.open("index.data", "r", "utf-8"): F = [f.strip() for f in line.split(",")] if F[2] == "1": continue terms_to_opt.add(F[0]) deleted = 0 skipping = True for term in terms_to_opt: if skipping and term: skipping = False if skipping: continue attempt = 1 while True: try: n = search.optimize_index(term) break except (datastore_errors.Timeout, urllib2.URLError): attempt += 1 print "Timeout on attempt %d for %s!" % (attempt, term.encode("utf-8")) time.sleep(2) deleted += n print term.encode("utf-8"), n, deleted