Exemple #1
0
def optimize_index(request):
    if not users.is_current_user_admin():
        return http.HttpResponse("no", status=403)
    if request.method == "POST":
        term = request.POST.get("term")
    elif request.method == "GET":
        term = request.GET.get("term")
    else:
        return http.HttpResponse("no", status=403)
    if term:
        search.optimize_index(term)
    return http.HttpResponse("ok")
def optimize_index(request):
    if not users.is_current_user_admin():
        return http.HttpResponse("no", status=403)
    if request.method == "POST":
        term = request.POST.get("term")
    elif request.method == "GET":
        term = request.GET.get("term")
    else:
        return http.HttpResponse("no", status=403)
    if term:
        search.optimize_index(term)
    return http.HttpResponse("ok")
Exemple #3
0
    def test_index_optimization(self):
        # Create a bunch of test keys.
        test_keys = [
            db.Key.from_path("kind_dummy", "key%02d" % i) for i in range(12)
        ]
        # Now create four test SearchMatches, all for the same term
        # and over two different fields.  Split the four sets of keys
        # between them.
        for i in range(4):
            sm = models.SearchMatches(generation=search._GENERATION,
                                      entity_kind="kind_dummy",
                                      field="field%d" % (i % 2),
                                      term="foo")
            sm.matches.extend(test_keys[i::4])
            sm.save()
        # Optimize on our term
        num_deleted = search.optimize_index("foo")
        # We go from 4 objects to 2, so we should return 4-2=2.
        self.assertEqual(2, num_deleted)
        # There should now just one SearchMatches per field.
        sm = None
        query = models.SearchMatches.all().filter("term =", "foo")
        all_sms = sorted(query.fetch(999), key=lambda sm: sm.field)
        self.assertEqual(2, len(all_sms))
        for sm in all_sms:
            self.assertEqual(search._GENERATION, sm.generation)
            self.assertEqual("kind_dummy", sm.entity_kind)
            self.assertEqual("foo", sm.term)
        self.assertEqual("field0", all_sms[0].field)
        self.assertEqual(set(test_keys[0::2]), set(all_sms[0].matches))
        self.assertEqual("field1", all_sms[1].field)
        self.assertEqual(set(test_keys[1::2]), set(all_sms[1].matches))

        # Create a SearchMatches for a stop word.  Optimization should
        # cause that object to be deleted.
        sm = models.SearchMatches(generation=search._GENERATION,
                                  entity_kind="kind_dummy",
                                  field="field",
                                  term="the")
        sm.matches.extend(test_keys)
        sm.save()
        num_deleted = search.optimize_index("the")
        self.assertEqual(1, num_deleted)
        query = models.SearchMatches.all().filter("term =", "the")
        self.assertEqual(0, query.count())
    def test_index_optimization(self):
        # Create a bunch of test keys.
        test_keys = [db.Key.from_path("kind_dummy", "key%02d" % i)
                     for i in range(12)]
        # Now create four test SearchMatches, all for the same term
        # and over two different fields.  Split the four sets of keys
        # between them.
        for i in range(4):
            sm = models.SearchMatches(generation=search._GENERATION,
                                      entity_kind="kind_dummy",
                                      field="field%d" % (i%2),
                                      term="foo")
            sm.matches.extend(test_keys[i::4])
            sm.save()
        # Optimize on our term
        num_deleted = search.optimize_index("foo")
        # We go from 4 objects to 2, so we should return 4-2=2.
        self.assertEqual(2, num_deleted)
        # There should now just one SearchMatches per field.
        sm = None
        query = models.SearchMatches.all().filter("term =", "foo")
        all_sms = sorted(query.fetch(999), key=lambda sm: sm.field)
        self.assertEqual(2, len(all_sms))
        for sm in all_sms:
            self.assertEqual(search._GENERATION, sm.generation)
            self.assertEqual("kind_dummy", sm.entity_kind)
            self.assertEqual("foo", sm.term)
        self.assertEqual("field0", all_sms[0].field)
        self.assertEqual(set(test_keys[0::2]), set(all_sms[0].matches))
        self.assertEqual("field1", all_sms[1].field)
        self.assertEqual(set(test_keys[1::2]), set(all_sms[1].matches))

        # Create a SearchMatches for a stop word.  Optimization should
        # cause that object to be deleted.
        sm = models.SearchMatches(generation=search._GENERATION,
                                  entity_kind="kind_dummy",
                                  field="field",
                                  term="the")
        sm.matches.extend(test_keys)
        sm.save()
        num_deleted = search.optimize_index("the")
        self.assertEqual(1, num_deleted)
        query = models.SearchMatches.all().filter("term =", "the")
        self.assertEqual(0, query.count())
chirpradio.connect()

terms_to_opt = set()
for line in codecs.open("index.data", "r", "utf-8"):
    F = [f.strip() for f in line.split(",")]
    if F[2] == "1":
        continue
    terms_to_opt.add(F[0])

deleted = 0
skipping = True
for term in terms_to_opt:
    if skipping and term:
        skipping = False
    if skipping:
        continue
    attempt = 1
    while True:
        try:
            n = search.optimize_index(term)
            break
        except (datastore_errors.Timeout, urllib2.URLError):
            attempt += 1
            print "Timeout on attempt %d for %s!" % (attempt,
                                                     term.encode("utf-8"))
            time.sleep(2)
    deleted += n
    print term.encode("utf-8"), n, deleted