Example #1
0
    def token_count_unique(self, token, estimate=False):
#        if not pysary.saryer_search2(self.sary, token, len(token)):
        if not pysary.saryer_search2(self.sary, token):
            return 0
        pysary.saryer_sort_occurrences(self.sary)
        count = pysary.saryer_count_occurrences(self.sary)

        nextone = 0
        uniquecount = 0
        streami = 0
        for i in xrange(count):
            fileoffset = pysary.saryer_get_next_offset(self.sary)
            assert(fileoffset >= 0)
            if fileoffset < nextone:
                continue
            uniquecount += 1
            if estimate:
                nextone = fileoffset + int(self.length/self.numstreams) # hack
            else:
                (streami, nextone) = self.offset_to_index(fileoffset)#, start=streami)
#               print streami, fileoffset, nextone
#            sys.stdin.readline()

            if not nextone:
                break

        return uniquecount
Example #2
0
    def token_count_unique(self, token, estimate=False):
        #        if not pysary.saryer_search2(self.sary, token, len(token)):
        if not pysary.saryer_search2(self.sary, token):
            return 0
        pysary.saryer_sort_occurrences(self.sary)
        count = pysary.saryer_count_occurrences(self.sary)

        nextone = 0
        uniquecount = 0
        streami = 0
        for i in xrange(count):
            fileoffset = pysary.saryer_get_next_offset(self.sary)
            assert fileoffset >= 0
            if fileoffset < nextone:
                continue
            uniquecount += 1
            if estimate:
                nextone = fileoffset + int(self.length / self.numstreams)  # hack
            else:
                (streami, nextone) = self.offset_to_index(fileoffset)  # , start=streami)
            #               print streami, fileoffset, nextone
            #            sys.stdin.readline()

            if not nextone:
                break

        return uniquecount
Example #3
0
    def token_count(self, token):
#        if not pysary.saryer_search2(self.sary, token, len(token)):
        if not pysary.saryer_search2(self.sary, token):
            return 0
        return pysary.saryer_count_occurrences(self.sary)
Example #4
0
 def token_count(self, token):
     #        if not pysary.saryer_search2(self.sary, token, len(token)):
     if not pysary.saryer_search2(self.sary, token):
         return 0
     return pysary.saryer_count_occurrences(self.sary)