Example #1
0
    def search(self, query_norms, verbose=False):
        """ Boolean Search by query """
        oper = ""
        query_index = set()

        with open(self.bin_name, "rb") as f_backward:
            for norm in query_norms:

                if norm in ["AND", "OR", "NOT"]:
                    oper = norm

                else:
                    try:  # if 1:

                        # TODO: !!!
                        offset, size = self.word_index[norm]["ids"]
                        # offset, size = self.w_offsets[norm]
                    except:
                        if verbose:
                            utils.print_utf("--- " + norm)
                        continue

                    f_backward.seek(offset)
                    coded = f_backward.read(size)
                    decoded = self.archiver.decode(coded)

                    for i in xrange(1, len(decoded)):
                        decoded[i] += decoded[i - 1]

                    # print decoded
                    decoded = set(decoded)

                    if not query_index:
                        query_index = decoded
                    elif oper == "AND":
                        query_index &= decoded
                    elif oper == "OR":
                        query_index |= decoded
                    elif oper == "NOT":
                        query_index -= decoded
                    else:
                        break

        return list(query_index)
Example #2
0
    def extract(self, query_norms, up=["ids", "lens", "posits", "hashes"], verbose=False):
        """ Extract all data by query words """
        query_index = {}
        with open(self.bin_name, "rb") as f_backward:
            for norm in query_norms:

                if norm in self.cache:
                    query_index[norm] = self.cache[norm]["index"]
                    self.cache[norm]["time"] = time.time()
                    continue

                if norm not in self.word_index:
                    if verbose:
                        utils.print_utf("--- " + norm)
                    continue

                # start_time = time.time()
                query_index[norm] = self.read_and_decode_word_index(f_backward, norm, self.word_index[norm], up=up)
                # if verbose: print "arc. %.3f sec." % (time.time() - start_time),
                self.cache_insert(norm, query_index[norm])
        # if verbose: print 'cache_len %d' % len(self.cache)
        return query_index