コード例 #1
0
ファイル: reading.py プロジェクト: ljarufe/mp100
    def _merge_iters(self, iterlist):
        # Merge-sorts terms coming from a list of
        # term iterators (IndexReader.__iter__() or
        # IndexReader.iter_from()).

        # Fill in the list with the head term from each iterator.

        current = []
        for it in iterlist:
            fnum, text, docfreq, termcount = it.next()
            current.append((fnum, text, docfreq, termcount, it))
        heapify(current)

        # Number of active iterators
        active = len(current)
        while active > 0:
            # Peek at the first term in the sorted list
            fnum, text = current[0][:2]
            docfreq = 0
            termcount = 0

            # Add together all terms matching the first term in the list.
            while current and current[0][0] == fnum and current[0][1] == text:
                docfreq += current[0][2]
                termcount += current[0][3]
                it = current[0][4]
                try:
                    fn, t, df, tc = it.next()
                    heapreplace(current, (fn, t, df, tc, it))
                except StopIteration:
                    heappop(current)
                    active -= 1

            # Yield the term with the summed doc frequency and term count.
            yield (fnum, text, docfreq, termcount)
コード例 #2
0
ファイル: ir_baseline.py プロジェクト: ahiroto/ParlAI
 def add(self, item, priority=None):
     if priority is None:
         priority = item
     if len(self.lst) < self.capacity:
         heapq.heappush(self.lst, (priority, item))
     elif priority > self.lst[0][0]:
         heapq.heapreplace(self.lst, (priority, item))
コード例 #3
0
ファイル: fieldcache.py プロジェクト: intabeta/inta
    def scored_groups(self, scores_and_docnums, limit=None):
        """Takes a sequence of (score, docnum) pairs and returns a dictionary
        mapping key values to sorted lists of (score, docnum) pairs.
        
        If you specify the ``limit`` keyword, the sorted lists will contain
        only the ``limit`` highest-scoring items.
        """

        groups = defaultdict(list)
        key_for = self.key_for

        for score, docnum in scores_and_docnums:
            key = key_for(docnum)
            ritem = (0 - score, docnum)
            ls = groups[key]
            if limit:
                if len(ls) < limit:
                    heappush(ls, ritem)
                elif ritem[0] > ls[0][0]:
                    heapreplace(ls, ritem)
            else:
                ls.append(ritem)

        for v in groups.values():
            v.sort()

        return groups
コード例 #4
0
ファイル: vpt.py プロジェクト: stevenxxiu/websearch_unimelb
 def _search(self, nearest, node, q):
     if not node:
         return
     tau = -nearest[0][0]
     x = self.distance(q, self.X[node.p])
     self.n_traversed += 1
     if x < tau:
         tau = x
         heapq.heapreplace(nearest, (-x, node.p))
     if node.left and node.right:
         middle = (node.left.upper_bnd + node.right.lower_bnd)/2
         if x < middle:
             if node.left.lower_bnd - tau < x < node.left.upper_bnd + tau:
                 self._search(nearest, node.left, q)
             if node.right.lower_bnd - tau < x < node.right.upper_bnd + tau:
                 self._search(nearest, node.right, q)
         else:
             if node.right.lower_bnd - tau < x < node.right.upper_bnd + tau:
                 self._search(nearest, node.right, q)
             if node.left.lower_bnd - tau < x < node.left.upper_bnd + tau:
                 self._search(nearest, node.left, q)
     elif node.left and not node.right:
         if node.left.lower_bnd - tau < x < node.left.upper_bnd + tau:
             self._search(nearest, node.left, q)
     elif node.right and not node.left:
         if node.right.lower_bnd - tau < x < node.right.upper_bnd + tau:
             self._search(nearest, node.right, q)
コード例 #5
0
ファイル: svd.py プロジェクト: joelimome/axiak-github-contest
def get_suggestions(user, user_vector, svd, already_mapped, top_n=5):
    heap = []
    start = time.time()

    #debug(user_vector.shape)    
    #debug(svd.shape)
    repos = svd * user_vector #.v_dotproducts_with(user_vector)

    #debug("dot products took %0.4fs" % (time.time() - start), 4)

    i = 0
    for repo, value in repos.iteritems():
        i += 1
        if not value:
            continue

        if (user, repo) in already_mapped:
            continue

        newval = (value, repo)
        if not heap:
            heap.append(newval)
        elif len(heap) == top_n:
            if newval > heap[0]:
                heapreplace(heap, newval)
        else:
            heappush(heap, newval)
        #if i % 10000 == 0:
        #    debug("Finished with repo %s" % i)
    heap.sort(reverse=True)
    return "%s:%s" % (user, ','.join("%s;%0.4f" % (x[1][0], x[0]) for x in heap))
コード例 #6
0
ファイル: Heap.py プロジェクト: Superbeet/LeetCode
	def push(self, elem):
		elem = -elem
		if len(self.data)<self.k:
			heapq.heappush(self.data, elem)
		else:
			if elem>self.data[0]:
				heapq.heapreplace(self.data, elem)
コード例 #7
0
ファイル: midiutil.py プロジェクト: nosuchtim/MMTT1
		def _merge(*subsequences):
			# Python Cookbook Recipe 19.14
			
			# prepare a priority queue whose items are pairs of the form
			# (current-value, iterator), one each per (non-empty) subsequence
			heap = [  ]
			for subseq in subsequences:
				iterator = iter(subseq)
				for current_value in iterator:
					# subseq is not empty, therefore add this subseq's pair
					# (current-value, iterator) to the list
					heap.append((current_value, iterator))
					break
			# make the priority queue into a heap
			heapq.heapify(heap)
			while heap:
				# get and yield lowest current value (and corresponding iterator)
				current_value, iterator = heap[0]
				yield current_value
				for current_value in iterator:
					# subseq is not finished, therefore add this subseq's pair
					# (current-value, iterator) back into the priority queue
					heapq.heapreplace(heap, (current_value, iterator))
					break
				else:
					# subseq has been exhausted, therefore remove it from
					# the queue
					heapq.heappop(heap)
コード例 #8
0
ファイル: heapq.py プロジェクト: numba/numba
    def hq_nlargest_impl(n, iterable):

        if n == 0:
            return [iterable[0] for _ in range(0)]
        elif n == 1:
            out = max(iterable)
            return [out]

        size = len(iterable)
        if n >= size:
            return sorted(iterable)[::-1][:n]

        it = iter(iterable)
        result = [(elem, i) for i, elem in zip(range(0, -n, -1), it)]

        hq.heapify(result)
        top = result[0][0]
        order = -n

        for elem in it:
            if top < elem:
                hq.heapreplace(result, (elem, order))
                top, _order = result[0]
                order -= 1
        result.sort(reverse=True)
        return [elem for (elem, order) in result]
コード例 #9
0
ファイル: timewarp.py プロジェクト: braz/timewarp
def aes(strm, k=1):
    """ Weighted reservoir sampling without replacement implementation.

        See [Efraimidis et. al][1].

        k = reservoir size
        rsv = reservoir
        strm = stream
        wts = associated weights for the stream

        [1]: http://arxiv.org/pdf/1012.0256.pdf
    """
    rsv = []
    heapq.heapify(rsv)
    # generate a key and fill the reservoir to k elements with associated keys
    for n, (el, wi) in enumerate(strm):
        ki = random()**(1. / wi)
        if n < k:
            heapq.heappush(rsv, (ki, el))

        # if the reservoir is full, find a minimum threshold, t.
        # if ki is large then t, pop t and push ki onto the heap.
        else:
            if len(rsv) > 1:
                t, _ = heapq.nsmallest(1, rsv)[0]
                if ki > t:
                    heapq.heapreplace(rsv, (ki, el))

    # yield k elements with the largest keys, this is the reservoir sample.
    for elem in heapq.nlargest(k, rsv):

        yield elem[1]
コード例 #10
0
ファイル: spelling.py プロジェクト: Apophus/microblog
    def suggest(self, text, limit=5, maxdist=2, prefix=0):
        """
        :param text: the text to check. This word will **not** be added to the
            suggestions, even if it appears in the word graph.
        :param limit: only return up to this many suggestions. If there are not
            enough terms in the field within ``maxdist`` of the given word, the
            returned list will be shorter than this number.
        :param maxdist: the largest edit distance from the given word to look
            at. Values higher than 2 are not very effective or efficient.
        :param prefix: require suggestions to share a prefix of this length
            with the given word. This is often justifiable since most
            misspellings do not involve the first letter of the word. Using a
            prefix dramatically decreases the time it takes to generate the
            list of words.
        """

        _suggestions = self._suggestions

        heap = []
        for item in _suggestions(text, maxdist, prefix):
            # Note that the *higher* scores (item[0]) are better!
            if len(heap) < limit:
                heappush(heap, item)
            elif item > heap[0]:
                heapreplace(heap, item)

        sugs = sorted(heap, key=lambda x: (0 - x[0], x[1]))
        return [sug for _, sug in sugs]
コード例 #11
0
ファイル: clique.py プロジェクト: sirrice/dbwipes_src
    def top_k(self, rules):
      n = 0
      best = self.best and max(self.best, key=lambda ro: ro.inf) or None
      for ro in rules:
          if len(self.best) >= self.max_bests:
              bound = best.inf - self.best[0].inf
              thresh = self.best[0].inf + bound * 0.02
              if ro.inf <= thresh:
                  continue
          if ro in self.best:
              continue
          if math.isnan(ro.inf):
              continue

          if not best or ro.inf > best.inf:
              n += 1            
              _logger.debug(str(ro))

          if len(self.best) < self.max_bests:
              heapq.heappush(self.best, ro)
          else:
              heapq.heapreplace(self.best, ro)
          
          best = best and max(best, ro) or ro

      return n
コード例 #12
0
ファイル: hbaseClient.py プロジェクト: Manchester412/socorro
  def merge_scan_with_prefix(self,table,prefix,columns):
    #TODO: Need assertion that columns is array containing at least one string
    """
    A generator based iterator that yields totally ordered rows starting with a given prefix.
    The implementation opens up 16 scanners (one for each leading hex character of the salt)
    simultaneously and then yields the next row in order from the pool on each iteration.
    """

    iterators = []
    next_items_queue = []
    for salt in '0123456789abcdef':
      salted_prefix = "%s%s" % (salt,prefix)
      scanner = self.client.scannerOpenWithPrefix(table, salted_prefix, columns)
      iterators.append(salted_scanner_iterable(self.logger,self.client,self._make_row_nice,salted_prefix,scanner))
    # The i below is so we can advance whichever scanner delivers us the polled item.
    for i,it in enumerate(iterators):
      try:
        next = it.next
        next_items_queue.append([next(),i,next])
      except StopIteration:
        pass
    heapq.heapify(next_items_queue)

    while 1:
      try:
        while 1:
          row_tuple,iter_index,next = s = next_items_queue[0]
          #tuple[1] is the actual nice row.
          yield row_tuple[1]
          s[0] = next()
          heapq.heapreplace(next_items_queue, s)
      except StopIteration:
        heapq.heappop(next_items_queue)
      except IndexError:
        return
コード例 #13
0
    def median(self, nums):
        length = len(nums)
        if length == 0:
            return []

        min_heap = [nums[0]]  # min root heap
        max_heap = []  # max root heap
        result = [nums[0]]

        for i in range(1, length):
            if i % 2 == 0:
                if nums[i] > min_heap[0]:  # nums[i]
                    heappush(max_heap, -min_heap[0])
                    heapreplace(min_heap, nums[i])
                else:
                    heappush(max_heap, -nums[i])
            elif i % 2 != 0:
                if nums[i] > min_heap[0]:  # nums[i]
                    heappush(min_heap, nums[i])
                else:
                    max_heap_root = heappushpop(max_heap, -nums[i])
                    heappush(min_heap, -max_heap_root)
            result.append(min_heap[0])

        return result
コード例 #14
0
ファイル: pgcluster.py プロジェクト: DmitryZagr/nginx-proxy
 def get_bloat(self):
     """ get max database bloat of all databases of cluster """
     q = pgq.BLOAT
     pages = 0
     otta = 0
     bloatest = [] # list of bloatest tables
     ret = self.query_eachdb(q, exclude=['template0'])
     for db in ret.keys():
         # loop through all databases
         for r in ret[db]:
             # and its tables
             pages += r[4]
             otta += r[5]
             if pages > 1000:
                 # add to list of bloatest tables
                 bloat = 100 - 100*(pages-otta) / pages
                 item = (bloat, "%s.%s.%s->%s" % (db, r[0], r[1], r[2]))
                 if len(bloatest) < 5:
                     heapq.heappush(bloatest, item)
                 else:
                     heapq.heapreplace(bloatest, item)
     self.logger.debug("pages: %i, otta: %i" % (pages, otta))
     while len(bloatest):
         b = heapq.heappop(bloatest)
         self.logger.debug("bloatest: %s: %.2f%%" % (b[1], 100-b[0]))
     if pages < 5000: # cluster < then 40 Mb is no serious
         return 0
     else:
         return 100*(pages - otta) / pages
コード例 #15
0
ファイル: match.py プロジェクト: ANKIT-KS/fjord
    def matches(self, text):
        """Returns a list of possible matches for given source text.

        :type text: String
        :param text: The text that will be search for in the translation memory
        :rtype: list
        :return: a list of units with the source and target strings from the
                 translation memory. If :attr:`self.addpercentage` is
                 *True* (default) the match quality is given as a
                 percentage in the notes.
        """
        bestcandidates = [(0.0, None)] * self.MAX_CANDIDATES
        #We use self.MIN_SIMILARITY, but if we already know we have max_candidates
        #that are better, we can adjust min_similarity upwards for speedup
        min_similarity = self.MIN_SIMILARITY

        # We want to limit our search in self.candidates, so we want to ignore
        # all units with a source string that is too short or too long. We use
        # a binary search to find the shortest string, from where we start our
        # search in the candidates.

        # minimum source string length to be considered
        startlength = self.getstartlength(min_similarity, text)
        startindex = 0
        endindex = len(self.candidates.units)
        while startindex < endindex:
            mid = (startindex + endindex) // 2
            if sourcelen(self.candidates.units[mid]) < startlength:
                startindex = mid + 1
            else:
                endindex = mid

        # maximum source string length to be considered
        stoplength = self.getstoplength(min_similarity, text)
        lowestscore = 0

        for candidate in self.candidates.units[startindex:]:
            cmpstring = candidate.source
            if len(cmpstring) > stoplength:
                break
            similarity = self.comparer.similarity(text, cmpstring, min_similarity)
            if similarity < min_similarity:
                continue
            if similarity > lowestscore:
                heapq.heapreplace(bestcandidates, (similarity, candidate))
                lowestscore = bestcandidates[0][0]
                if lowestscore >= 100:
                    break
                if min_similarity < lowestscore:
                    min_similarity = lowestscore
                    stoplength = self.getstoplength(min_similarity, text)

        #Remove the empty ones:
        def notzero(item):
            score = item[0]
            return score != 0
        bestcandidates = filter(notzero, bestcandidates)
        #Sort for use as a general list, and reverse so the best one is at index 0
        bestcandidates.sort(reverse=True)
        return self.buildunits(bestcandidates)
コード例 #16
0
ファイル: _sample.py プロジェクト: rootart/logtools
def logsample_weighted(options, args, fh):
    """Implemented Weighted Reservoir Sampling, assuming integer weights.
    See Weighted random sampling with a reservoir, Efraimidis et al."""
    
    N = options.num_samples
    delimiter = options.delimiter
    # NOTE: Convert to 0-based indexing since we expose as 1-based
    field = options.field-1
    
    R = []
    min_val = float("inf")
    i = 0
    
    for line in fh:
        w = int(line.split(delimiter)[field])
        if w < 1: 
            continue
        
        r = random()
        k = r ** (1./w)            
        
        if i < N:
            heappush(R, (k, line))
            if k < min_val:
                min_val = k
        else:
            if k > min_val:
                # Replace smallest item in record list
                heapreplace(R, (k, line))
        i+=1
                
    # Emit output
    for key, record in R:
        yield key, record.strip()
コード例 #17
0
ファイル: kdt.py プロジェクト: stevenxxiu/websearch_unimelb
 def _search(self, nearest, node, q, depth):
     if not node:
         return
     axis = depth % self.X.shape[1]
     # compare with pivot
     d = -nearest[0][0]
     x = np.sqrt(np.sum(np.power((q - self.X[node.p]).data, 2)))
     self.n_traversed += 1
     if x < d:
         d = x
         heapq.heapreplace(nearest, (-d, node.p))
     # check if left/right nodes need to be visited, visit the one with lower minimum axis distance first
     mu = self.X[node.p,axis]
     left_dist = q[0,axis] - mu + node.left_lower_bnd if node.left else None
     right_dist = mu - q[0,axis] + node.left_lower_bnd if node.right else None
     if node.left and node.right:
         if left_dist < right_dist:
             if left_dist <= d:
                 self._search(nearest, node.left, q, depth+1)
             if right_dist <= d:
                 self._search(nearest, node.right, q, depth+1)
         else:
             if right_dist <= d:
                 self._search(nearest, node.right, q, depth+1)
             if left_dist <= d:
                 self._search(nearest, node.left, q, depth+1)
     elif node.left and not node.right:
         if left_dist <= d:
             self._search(nearest, node.left, q, depth+1)
     elif node.right and not node.left:
         if right_dist <= d:
             self._search(nearest, node.right, q, depth+1)
コード例 #18
0
def magic_loop(models_to_run, clfs, grid, X, y):
    '''
    Takes a list of models to use, two dictionaries of classifiers and parameters, and array of X
    Set to find ten models with best precision at 5 percent recall
    '''
    table = {}
    top = []
    for i in range(10):
        top.append((0, " "))
    heapq.heapify(top)
    k = 0.05
    for n in range(1, 2):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
        for index, clf in enumerate([clfs[x] for x in models_to_run]):
            for p in ParameterGrid(grid[models_to_run[index]]):
                try:
                    clf.set_params(**p)
                    print (clf)
                    y_pred_probs = clf.fit(X_train, y_train).predict_proba(X_test)[:,1]
                    plot_precision_recall_n(y_test, y_pred_probs, clf)
                    l = scoring(k, y_test, y_pred_probs)
                    m, s = top[0]
                    p = l['precision']
                    if p > m:
                        heapq.heapreplace(top, (p, clf))
                        table[str(clf)] = l
                except: 
                    print ('Error:')
                    continue
    print (top)
    return top, table
コード例 #19
0
def run(filename):
    '''create statistics  on the json alias file'''
    with open(filename, 'rU') as f:
        aliases = json.load(f)

    count = {}
    vals = aliases.values()

    for val in vals:
        if val in count:
            count[val] = count[val] +1
        else:
            count[val] = 1

    h = [(0, " ")] * K
    heapq.heapify(h)
    for key, val in count.items():
        replacement = (val, key) 
        if replacement > min(h):
            heapq.heapreplace(h,replacement)
    print "There are", len(count.keys()),  "unique businesses represented in the json \n"
    print "There are", len(aliases),  "aliases in the json \n"
    print "The top 10 companies with the most aliases are: \n"
    h = list(h)
    h.sort(reverse=True)
    for i in h:
        print i[1], "had " , i[0], " aliases \n"
    print "Here are some the aliases \n"
    att = find_aliases(aliases,'AT&T')
    coke = find_aliases(aliases,'Coca-Cola Enterprises, Inc.')
    sachs = find_aliases(aliases,'The Goldman Sachs Group, Inc.')
    print(att, "\n")
    print(coke, "\n")
    print(sachs, "\n")
コード例 #20
0
ファイル: merge.py プロジェクト: ifwe/tds
def imerge(*iterlist, **key):
    """Merge a sequence of sorted iterables.

    Returns pairs [value, index] where each value comes from
iterlist[index], and the pairs are sorted
    if each of the iterators is sorted.
    Hint use groupby(imerge(...), operator.itemgetter(0)) to get
the items one by one.
    """
    # thanks
    # http://mail.python.org/pipermail/python-bugs-list/2005-August/029983.html
    if key.keys() not in ([], ["key"]):
        raise TypeError("Excess keyword arguments for imerge")

    key = key.get("key", lambda x: x)

    # initialize the heap containing this tuple:
    #   (inited, value, index, currentItem, iterator)
    # this automatically makes sure all iterators are initialized, then run,
    # and finally emptied
    heap = [(False, None, index, None, iter(iterator))
            for index, iterator in enumerate(iterlist)]

    while heap:
        inited, item, index, value, iterator = heap[0]
        if inited:
            yield value, index
        try:
            item = iterator.next()
        except StopIteration:
            heappop(heap)
        else:
            heapreplace(heap, (True, key(item), index, item, iterator))
コード例 #21
0
ファイル: reading.py プロジェクト: intabeta/inta
    def _merge_terms(self, iterlist):
        # Merge-sorts terms coming from a list of term iterators.

        # Create a map so we can look up each iterator by its id() value
        itermap = {}
        for it in iterlist:
            itermap[id(it)] = it

        # Fill in the list with the head term from each iterator.

        current = []
        for it in iterlist:
            term = next(it)
            current.append((term, id(it)))
        heapify(current)

        # Number of active iterators
        active = len(current)
        while active:
            # Peek at the first term in the sorted list
            term = current[0][0]

            # Re-iterate on all items in the list that have that term
            while active and current[0][0] == term:
                it = itermap[current[0][1]]
                try:
                    nextterm = next(it)
                    heapreplace(current, (nextterm, id(it)))
                except StopIteration:
                    heappop(current)
                    active -= 1

            # Yield the term
            yield term
コード例 #22
0
ファイル: ladybower.py プロジェクト: peakrisk/everest
    def isample_without_replacement(self, k):
        """ Return a sample of size k, without replacement

        k <= n

        O(n)

        Use a heap to keep track of selection.
        """
        if k > len(self.weights):
            raise ValueError("Sample size should be <= %d" % len(self.weights))
    
        heap = []

        random = self.random.random_sample
        weights = random(len(self.weights)) ** (1.0/self.weights)

        for ix, weight in enumerate(weights):
            if ix < k:
                heapq.heappush(heap, (weight, ix))
            else:
                if heap[0][0] < weight:
                    heapq.heapreplace(heap, (weight, ix))

        # now sort the heap -- this is to make things repeatable
        heap.sort()

        # return permuted indices
        return(self.random.permutation([x[1] for x in heap]))
コード例 #23
0
ファイル: _acrostic_iter.py プロジェクト: PhilHarnish/forge
  def _iter_phrases(self, phrases):
    """Uses a heap to yield from lists of `phrases` in sorted order.

    Sadly, this is 5% faster than the built-in heapq.merge solution:
      yield from heapq.merge(
          *[best_items.items() for pos, best_items in phrases.items()],
          key=lambda x: x[1], reverse=True)
    """
    if not phrases:
      return
    best_phrases = []
    cache = []
    for pos, l in phrases.items():
      best_items = iter(l.items())
      next_best_tuple = next(best_items, _EMPTY)
      if next_best_tuple is not _EMPTY:
        item, weight = next_best_tuple
        # Cache the real data for this queue entry somewhere else.
        # cache[id] contains a tuple of (next_best_tuple, best_items)
        heapq.heappush(best_phrases, (-weight, len(cache)))
        cache.append((next_best_tuple, best_items))
    while best_phrases:
      _, cache_id = best_phrases[0]
      next_best_tuple, best_items = cache[cache_id]
      yield next_best_tuple
      next_best_tuple = next(best_items, _EMPTY)
      if next_best_tuple is _EMPTY:
        heapq.heappop(best_phrases)
      else:
        item, weight = next_best_tuple
        cache[cache_id] = (next_best_tuple, best_items)
        heapq.heapreplace(best_phrases, (-weight, cache_id))
コード例 #24
0
ファイル: shortestsnipplet.py プロジェクト: amitarya/junk
def findShortestSniplet(m, word, alphabets):
	size = len(m)
	if len(m) < len(alphabets):
		return (-1, -1)
	minh = []
	maxh = []
	for w in m:
		i = iter(m[w])
		val = next(i, sys.maxint)
		heapq.heappush(minh, (val, i))
		heapq.heappush(maxh, -val)
	mind = sys.maxint
	start = 0
	end = 0
	minval = maxval = 0
	while minval != sys.maxint and maxval != sys.maxint:
		(minval, it1) = minh[0]
		maxval = -maxh[0]
		print minval, maxval
		if minval != sys.maxint and maxval != sys.maxint and mind > maxval - minval:
			mind = maxval - minval
			start = minval
			end = maxval
		val = next(it1, sys.maxint)
		heapq.heapreplace(minh, (val, it1))
		heapq.heappush(maxh, -val)
	print start, end
	return (start, end)
コード例 #25
0
ファイル: postings.py プロジェクト: archatas/whoosh
 def next(self):
     if self.id is None:
         raise ReadTooFar
     
     state = self.state
     
     if len(state) < self.minmatch:
         # Can't match the minimum if there aren't enough readers left
         self.id = None
     elif len(state) == 1:
         # Short circuit if there's only one reader
         r = state[0]
         r.next()
         self.id = r.id
     else:
         # Advance all the readers that match the current id
         lowid = state[0].id
         while state and state[0].id == lowid:
             r = state[0]
             r.next()
             if r.id is None:
                 heappop(state)
             else:
                 heapreplace(state, r)
         
         if state:
             self.id = state[0].id
         else:
             self.id = None
コード例 #26
0
 def addNum(self, num):
     """
     Adds a num into the data structure.
     :type num: int
     :rtype: void
     """
     if (not self.small_heap) and (self.pivort == None):
         self.pivort = num
         return
     if self.pivort == None:
         max_of_smalls = - self.small_heap[0]
         min_of_bigs = self.big_heap[0]
         if max_of_smalls <= num <= min_of_bigs:
             self.pivort = num
         elif num < max_of_smalls:
             # swap with max_of_smalls
             self.pivort = - heapq.heapreplace(self.small_heap, - num)
         else:
             self.pivort = heapq.heapreplace(self.big_heap, num)
         return
     # when pivort already exists
     if num <= self.pivort:
         small, big = num, self.pivort
     else:
         small, big = self.pivort, num
     self.pivort = None
     heapq.heappush(self.small_heap, - small)
     heapq.heappush(self.big_heap, big)
コード例 #27
0
ファイル: heap.py プロジェクト: anchikam/cs2013
def get_topk(l, k):
    """Finds the top K elements in a list of integers.

    Parameters:
    - l: An iterable object with integers
    - k: an integer
    """
    counts = {}

    for i in l:
        n = int(i)
        counts[n] = counts.setdefault(n, 0) + 1

    h = [(count,n) for (n,count) in counts.items()[:k]]
    heapq.heapify(h)

    for n, count in counts.items()[k:]:
        min_count, min_n = h[0]

        if count > min_count:
            heapq.heapreplace(h, (count, n))

    h.sort(reverse=True)

    return h
コード例 #28
0
ファイル: packer.py プロジェクト: ZhanruiLiang/pysheetmusic
 def fill(gw):
     if gw < maxSingleW: 
         return
     nonlocal maxRate, bestSize, bestPos
     rowRests = []
     heapq.heappush(rowRests, (-gw, 0))
     rowYs = [0]
     rowHeight = sizes[0][0][1]
     for (w, h), id in sizes:
         maxW = -rowRests[0][0]
         if maxW >= w:
             # Add to this row
             i = rowRests[0][1]
             heapq.heapreplace(rowRests, (- (maxW - w), i))
             pos[id] = (gw - maxW, rowYs[i])
         else:
             # Create new row
             heapq.heappush(rowRests, (-(gw - w), len(rowRests)))
             rowYs.append(rowYs[-1] + rowHeight)
             rowHeight = h
             pos[id] = (0, rowYs[-1])
     size = (gw, rowYs[-1] + rowHeight)
     rate = area / (size[0] * size[1])
     if rate > maxRate:
         maxRate = rate
         bestSize = size
         bestPos = pos[:]
コード例 #29
0
ファイル: helpers.py プロジェクト: PeterHancock/bup
def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
    if key:
        samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
    else:
        samekey = operator.eq
    count = 0
    total = sum(len(it) for it in iters)
    iters = (iter(it) for it in iters)
    heap = ((next(it),it) for it in iters)
    heap = [(e,it) for e,it in heap if e]

    heapq.heapify(heap)
    pe = None
    while heap:
        if not count % pfreq:
            pfunc(count, total)
        e, it = heap[0]
        if not samekey(e, pe):
            pe = e
            yield e
        count += 1
        try:
            e = it.next() # Don't use next() function, it's too expensive
        except StopIteration:
            heapq.heappop(heap) # remove current
        else:
            heapq.heapreplace(heap, (e, it)) # shift current to new location
    pfinal(count, total)
コード例 #30
0
    def __iter__(self):
        """Iterate over records in input files.

    self._offsets is always correctly updated so that stopping iterations
    doesn't skip records and doesn't read the same record twice.

    Raises:
      Exception: when Files list and offsets do not match.

    Yields:
      The result.
    """
        ctx = context.get()
        mapper_spec = ctx.mapreduce_spec.mapper
        shard_number = ctx._shard_state.shard_number
        filenames = mapper_spec.params[self.FILES_PARAM][shard_number]

        if len(filenames) != len(self._offsets):
            raise Exception("Files list and offsets do not match.")

        # Heap with (Key, Value, Index, reader) pairs.
        readers = []

        # Initialize heap
        for (i, filename) in enumerate(filenames):
            offset = self._offsets[i]
            # TODO(user): Shrinking the buffer size is a workaround until
            # a tiered/segmented merge is implemented.
            reader = records.RecordsReader(
                cloudstorage.open(filename,
                                  read_buffer_size=self.GCS_BUFFER_SIZE))
            reader.seek(offset)
            readers.append((None, None, i, reader))

        # Read records from heap and merge values with the same key.

        # current_result is yielded and consumed buy _merge_map.
        # current_result = (key, value, is_partial)
        current_result = None
        current_count = 0
        current_size = 0
        while readers:
            (key, value, index, reader) = readers[0]

            if key is not None:
                current_count += 1
                current_size += len(value)

                should_yield = False
                if current_result:
                    if key != current_result[0]:
                        # New key encountered
                        should_yield = True
                    elif (self._max_values_count != -1
                          and current_count >= self._max_values_count):
                        # Maximum number of values encountered.
                        current_result[2] = True
                        should_yield = True
                    elif (self._max_values_size != -1
                          and current_size >= self._max_values_size):
                        # Maximum size of values encountered
                        current_result[2] = True
                        should_yield = True

                if should_yield:
                    # New key encountered or maximum count hit. Yield current key.
                    yield current_result
                if not current_result or should_yield:
                    current_result = [key, [], False]
                    current_count = 0
                    current_size = 0
                current_result[1].append(value)

            # Read next key/value from reader.
            try:
                self._offsets[index] = reader.tell()
                start_time = time.time()
                binary_record = reader.read()
                # update counters
                if context.get():
                    operation.counters.Increment(
                        input_readers.COUNTER_IO_READ_BYTES,
                        len(binary_record))(context.get())
                    operation.counters.Increment(
                        input_readers.COUNTER_IO_READ_MSEC,
                        int((time.time() - start_time) * 1000))(context.get())
                proto = kv_pb.KeyValue()
                proto.ParseFromString(binary_record)
                # Put read data back into heap.
                heapq.heapreplace(readers,
                                  (proto.key(), proto.value(), index, reader))
            except EOFError:
                heapq.heappop(readers)

        # Yield leftovers.
        if current_result:
            yield current_result
コード例 #31
0
li2 = [5, 7, 9, 4, 3]

# using heapify() to convert list into heap
heapq.heapify(li1)
heapq.heapify(li2)

# using heappushpop() to push and pop items simultaneously
# pops 2
print "\nThe popped item using heappushpop() is : ",
print (heapq.heappushpop(li1, 2))

# using heapreplace() to push and pop items simultaneously pops 3
# element is first popped- which is 3, then element is pushed.

print "\nThe popped item using heapreplace() is : ",
print (heapq.heapreplace(li2, 2))

##################################################################

# 6. nlargest(k, iterable, key = fun) :- This function is used to return the k
# largest elements from the iterable specified and satisfying the key if
# mentioned.

# 7. nsmallest(k, iterable, key = fun) :- This function is used to return the
#  k smallest elements from the iterable specified and satisfying the key if
# mentioned.

# Python code to demonstrate working of
# nlargest() and nsmallest()

print "\n------------------------------------------------"
コード例 #32
0
def uBkNN_sd(r, k=26):
    """
        Compute the rating predictions for missing values of r, using a user-based kNN model
        It also takes into account the standard deviation of the users to predicted more accurately
        :param r: the rating matrix
        :param k: the number of neighbors. Default = 26, found empirically
        :return: the rating prediction matrix
        """
    n_row, n_col = r.shape
    # Compute vertical representation of R

    # Vertical may contain empty values
    vertical = []
    for j in range(n_col):
        vertical.append(np.nonzero(r[:, j]))

    # Compute mean for each user
    means = np.true_divide(r.sum(1), (r != 0).sum(1))

    means = [0.0 if math.isnan(i) else i for i in means]

    # Compute standard deviation
    standard_deviation = [
        np.nanstd(np.where(np.isclose(a, 0), np.nan, a)) for a in r
    ]

    # Compute sim matrix
    sim_matrix = np.zeros((n_row, n_row))
    for i in range(n_row):
        for j in range(i + 1, n_row):
            a = np.dot(r[i, :], r[j, :])
            if a != 0.0:
                a = a / (np.linalg.norm(r[i, :]) * np.linalg.norm(r[j, :]))
                sim_matrix[i, j] = a
                sim_matrix[j, i] = a

    # Threshold to speed-up the computation time
    # if, for a movie j, there is less than 'threshold' users that rated this movie,
    # perform a classic search among all users that have rated this movie
    # otherwise: there are a lot of people that rated this movie
    # it is then faster, for user i, to check his neighbors in decreasing order of similarity score
    # and retrieve the user if it has rated this movie (we have a high chance of that, since the number of people
    # that rated this movie is high)
    threshold = k * 3

    r_hat = r.copy().astype(float)

    for i in tqdm(range(n_row)):
        # Sort the users according to similarity score
        # Used later if high number of users that rated a movie
        a = [(sim_matrix[i, j], j) for j in range(n_row)]
        a.sort(key=lambda iii: iii[0], reverse=True)
        for j in range(n_col):
            if r[i, j] != 0:  # Not compute if already exist
                continue
            if len(vertical[j][0]) == 0:  # In case no one purchased this item
                continue  # Useless to try to compute it

            if len(vertical[j]
                   [0]) <= k:  # Every elements of vertical[j] will be used
                kNN = [(sim_matrix[i, zz], zz) for zz in vertical[j][0]]
            elif len(
                    vertical[j][0]
            ) < threshold:  # If not much rating users: search in that set
                kNN = []
                # Simple heap search
                for client in vertical[j][0]:
                    sim = sim_matrix[i, client]
                    if len(kNN) < k:
                        heapq.heappush(kNN, (sim, client))
                    elif len(kNN
                             ) >= k and kNN[0][0] < sim:  # Full kNN and update
                        heapq.heapreplace(kNN, (sim, client))
            else:  # Search first in the most similar users, those who rated this item
                kNN = []
                for (sim, other) in a:
                    if r[other, j] > 0.0:
                        kNN.append((sim, other))
                    if len(kNN) == k:
                        break

            # We have here the kNN of user i (if at least k)
            pred = 0.0
            den = 0.0
            for sim, client in kNN:
                pred += sim * (r[client, j] -
                               means[client]) / standard_deviation[client]
                den += abs(sim)
            if den != 0:  # 0 similarity: could happen
                pred /= den

                r_hat[i, j] = standard_deviation[i] * pred + means[i]

    return r_hat
コード例 #33
0
print(nums)
nums = [2, 3, 5, 1, 54, 23, 132]
heapq.heapify(nums)  # 转成堆结构,nums改变
print(nums)
print([heapq.heappop(nums) for _ in range(len(nums))])  # 转成堆结构才能按顺序打印
print()

print('合并多个排序序列成一个排序序列,返回值的迭代器。nums=heapq.merge(nums1,nums2)')
nums1 = [2, 3, 5, 1, 54, 23, 132]
nums1 = sorted(nums1)
print('nums1', nums1)
nums2 = [22, 23, 25, 21, 254, 223, 2132]
nums2 = sorted(nums2)
print('nums2', nums2)
nums = heapq.merge(nums1, nums2)  # 值的迭代器
print('nums', list(nums))
print()

print('删除堆中最小元素并加入一个元素23。heapq.heaprepalce()')
nums = [1, 2, 4, 5, 3]
print('nums', nums)
heapq.heapreplace(nums, 23)
print([heapq.heappop(nums) for _ in range(len(nums))])
print()

print('最大/小的k个值。heapq.nlargest(3, nums)/heapq.nsmallest(3, nums)')
nums = [1, 3, 4, 5, 2, 9]
print(nums)
print(heapq.nlargest(3, nums))
print(heapq.nsmallest(3, nums))
コード例 #34
0
ファイル: utils.py プロジェクト: victorvg17/rl-uni-project
 def push(self, item, priority=0):
     count = next(self._counter)
     if len(self._q) < self._max_size:
         heappush(self._q, [priority, count, item])
     else:
         heapreplace(self._q, [priority, count, item])
コード例 #35
0
def get_top_words(query, collection):
    #get_stream(QUERY, collection)
    client = py.MongoClient('localhost', 27017)

    # NOTE: probably will need to change these for your names
    db = client.NGRAM

    if collection == 'American':
        col = db.American_1gram  # american english
    elif collection == 'English':
        col = db.English_1gram  # british english
    print(col)
    print("Collection: ", collection)

    # to check connection to db
    print('Query: ', query)
    item = col.find_one({"ngram": {'$regex': query}})
    print("Checking Connection: ", item)

    if item == None:
        print("Connection unsuccessful, aborting")
        return
    print('*' * 50)

    filehandler = open(
        "output/COUNTMIN_MODEL_" + collection + "_" + query + ".pkl", 'rb')
    counters = pickle.load(filehandler)
    filehandler = open(
        "output/COUNTMIN_SALT_" + collection + "_" + query + ".pkl", 'rb')
    salt = pickle.load(filehandler)
    cms = CountMinSketch(10, 2000, counters, salt)
    print('getting top frequency words')
    # query all documents to find the top frequency words
    seen = set()
    top_freq_heap = []
    for doc in col.find():
        if not isinstance(doc['ngram'], str):
            continue
        word = doc['ngram'].lower()
        if word in seen:
            continue
        seen.add(word)
        freq = cms.query(word)
        if len(top_freq_heap) < n_top_words:
            heapq.heappush(top_freq_heap, [freq, word])
        elif freq > top_freq_heap[0][0]:
            heapq.heapreplace(top_freq_heap, [freq, word])
    top_freq_heap.sort()
    top_freq_heap = top_freq_heap[::-1]
    top_freq_df = pd.DataFrame(top_freq_heap, columns=['count', 'label'])
    print(top_freq_df)

    if not os.path.exists("output"):  # if output dir doesnt exists, creates it
        os.makedirs("output")

    # TODO probably need to update the replacing
    with open("output/COUNTMIN_TOP_FREQ_" + collection + "_" + query + ".pkl",
              "wb") as fp:  #Pickling
        obj = {
            'L': top_freq_df['label'].to_list(),
            'C': top_freq_df['count'].to_list()
        }
        pickle.dump(obj, fp)
コード例 #36
0
            heappop(self.nums)
        return self.nums[0] #回傳最小值




from heapq import *
class KthLargest:

    def __init__(self, k: int, nums: List[int]):
        self.nums = nums
        heapify(self.nums)
        self.k = k
        while len(self.nums) > self.k:
            heappop(self.nums)
        
        

    def add(self, val: int) -> int:
        if len(self.nums) < self.k:
            heappush(self.nums, val)
        else:
            heappushpop(self.nums, val)
        return self.nums[0]

heapq.heapreplace(heap, item)
從 heap 取出並回傳最小的元素,接著將新的 item 放進heap。heap 的大小不會改變。如果 heap 是空的會產生 IndexError 錯誤。

這個一次完成的操作會比呼叫 heappop() 之後呼叫 heappush() 更有效率,並在維護 heap 的大小不變時更為適當,取出/放入的組合函式一定會從 heap 回傳一個元素並用 item 取代他。

函式的回傳值可能會大於被加入的 item 。如果這不是你期望發生的,可以考慮使用 heappushpop() 替代,他會回傳 heap 的最小值和 item 兩個當中比較小的那個,並將大的留在 heap 內。
コード例 #37
0
#Heaps in Python:
# Heap is a tree Data structure where each parent node is less than or equal to its child node. This is called Min Heap.
# If each parent node is greater than or equal to child node then it is called Max Heap.

# Creating a Heap: It is created by inbuild functon in python "heapq".
import heapq
H = [1, 14, 21, 22, 0, 54]

#To arrange elements we use heapify
heapq.heapify(H)
print(H)

#Inserting into heap
heapq.heappush(H, 9)
print(H)

#Removing from heap
#It always remove the function at index 1
heapq.heappop(H)
print(H)

#Replace an element
#It removes the smallest element of heap and insert new element at some place
heapq.heapreplace(H, 8)
print(H)
コード例 #38
0
ファイル: data_learn.py プロジェクト: menhuan/notes
print(counter)

# 堆操作
import heapq

data = [1, 13, 45, 21, 89, 31, 28, 44, 19, 99]
heapq.heapify(data)
print(data)
data = [1, 13, 45, 21, 89, 31, 28, 44, 19, 99]
heap = []
for item in data:
    heapq.heappush(heap, item)
print(heap)

print(heapq.heappop(heap))
print(heapq.heappushpop(heap, 56))
print(heap)
print(heapq.heapreplace(heap, 78))
print(heap)

data = [1, 13, 45, 21, 89, 31, 28, 44, 19, 99]
heapq.heapify(data)
print(data)
print(heapq.nlargest(1, heap))
print(heapq.nlargest(3, heap))
print(heapq.nsmallest(1, heap))
print(heapq.nsmallest(3, heap))

print(heapq.merge([1, 2, 3, 10], [78, 23, 99, 10], [5, 3, 0]))
print(list(heapq.merge([1, 2, 3, 10], [78, 23, 99, 10], [5, 3, 0])))
コード例 #39
0
 def add(self, num):
     if self.min_heap[0] < num:
         heapq.heapreplace(self.min_heap, num)
     return self.min_heap[0]
コード例 #40
0
# using heappop() to pop smallest element
hq.heappop(mylist)
print(mylist)  # [3, 5, 4, 7, 9]

hq.heappop(mylist)
print(mylist)  # [4, 5, 9, 7]

# heappushpop(heap, element)  his function combines the functioning of both push and pop operations in one statement,
# increasing efficiency. Heap order is maintained after this operation.
hq.heappushpop(mylist, 8)
print(mylist)  # [5, 7, 9, 8]

# heapreplace(heap, ele) :- This function also inserts and pops element in one statement, but it is different from
# above function. In this, element is first popped, then element is pushed.i.e, the value larger than the pushed
# value can be return
hq.heapreplace(mylist, 11)
print(mylist)  # [7, 8, 9, 11]

#  nlargest(k, iterable, key = fun) :- This function is used to return the k largest elements from the iterable
# specified and satisfying the key if mentioned.

##################################################################################################################
# initializing list
mylist = [6, 7, 9, 4, 3, 5, 8, 10, 1]

# using heapify() to convert list into heap
hq.heapify(mylist)

# using nlargest to print 3 largest numbers
# prints 10, 9 and 8
print("The 3 largest numbers in list are : ", end="")
コード例 #41
0
def _minmaxhash_add_ngrams(heap: list, heapmap: dict, maxsize: int, nsize: int,
                           subs, nsubs: int, hashbuffer, heaptop, extracthash,
                           make_elt, update_elt, replace, anynew,
                           minmax_op) -> int:
    """
    Process/add elements to the sketch (See warning below).

    This function is where most of time is spent when building a minhash or
    a maxhash.

    .. warning::

       If calling this method directly, updating the attribute
       `nvisited` is under your responsibility.

    :param heap: a heap (in a :class:`list`)
    :param heapmap: a :class:dict: with the content of the heap (for O(1)
        lookup of the content of the sketch)
    :param maxsize: maximum size of the heap
    :param nsize: size of ngrams
    :param subs: (sub-)sequence
    :param nsubs: number of hash values in the hashbuffer
    :param hashbuffer: buffer with hash values
    :param heaptop: hash value that is at the top of the heap
    :param extracthash: function extract the hash from an element
    :param make_elt: factory to make a new element
    :param update_elt: in-place update of an element
    :param replace: callback if replacing
    :param anynew: callback if new entry
    :param minmax_op: a pair that is expected to be either (1, `<`) if
        minhash or (-1, `>`) if maxhash.

    :return: new hash value for heaptop.
    """

    lheap = len(heap)
    sign, comparator = minmax_op

    for j in range(nsubs):
        h = hashbuffer[j]
        if h not in heapmap:
            if lheap < maxsize:
                elt = make_elt(sign * h, subs, j, nsize)
                # Add element to set and heap.
                heapmap[h] = elt
                heappush(heap, elt)

                heaptop = extracthash(heap[0])
                lheap += 1
                if anynew is not None:
                    anynew(h)
            elif comparator(h, heaptop):
                elt = make_elt(sign * h, subs, j, nsize)
                # Replace the maximum value in the heap.
                heapmap[h] = elt
                out = heapreplace(heap, elt)
                del (heapmap[sign * out[0]])
                # The negative of the hash is needed for MinHash.
                heaptop = sign * heap[0][0]
                if anynew is not None:
                    anynew(h)
        else:
            if update_elt is not None:
                elt = heapmap[h]
                update_elt(elt)
    return heaptop
コード例 #42
0
def totalTime(input_array, number_of_items):
    result_array = [0] * number_of_items
    for item in input_array:
        heapq.heapreplace(result_array, result_array[0] + item)
    return heapq.nlargest(1, result_array)[0]
コード例 #43
0
 def cb(w, acc):
     nonlocal hp
     if len(hp) == n and acc > hp[0][0]:
         heapq.heapreplace(hp, (acc, w))
     elif len(hp) < n:
         heapq.heappush(hp, (acc, w))
コード例 #44
0
    def __iter__(self):
        """Iterate over records in input files.

    self._offsets is always correctly updated so that stopping iterations
    doesn't skip records and doesn't read the same record twice.
    """
        ctx = context.get()
        mapper_spec = ctx.mapreduce_spec.mapper
        shard_number = ctx.shard_state.shard_number
        filenames = mapper_spec.params[self.FILES_PARAM][shard_number]

        if len(filenames) != len(self._offsets):
            raise Exception("Files list and offsets do not match.")

        # Heap with (Key, Value, Index, reader) pairs.
        readers = []

        # Initialize heap
        for (i, filename) in enumerate(filenames):
            offset = self._offsets[i]
            reader = records.RecordsReader(files.BufferedFile(filename))
            reader.seek(offset)
            readers.append((None, None, i, reader))

        # Read records from heap and merge values with the same key.
        current_result = None
        while readers:
            (key, value, index, reader) = readers[0]

            if key is not None:
                if current_result and key != current_result[0]:
                    # New key encountered. Yield corrent key.
                    yield current_result
                if not current_result or key != current_result[0]:
                    current_result = (key, [])
                current_result[1].append(value)

            # Read next key/value from reader.
            try:
                self._offsets[index] = reader.tell()
                start_time = time.time()
                binary_record = reader.read()
                # update counters
                if context.get():
                    operation.counters.Increment(
                        input_readers.COUNTER_IO_READ_BYTES,
                        len(binary_record))(context.get())
                    operation.counters.Increment(
                        input_readers.COUNTER_IO_READ_MSEC,
                        int((time.time() - start_time) * 1000))(context.get())
                proto = file_service_pb.KeyValue()
                proto.ParseFromString(binary_record)
                # Put read data back into heap.
                heapq.heapreplace(readers,
                                  (proto.key(), proto.value(), index, reader))
            except EOFError:
                heapq.heappop(readers)

        # Yield leftovers.
        if current_result:
            yield current_result
コード例 #45
0
 def add(self, val):
     if len(self.pool) < self.k:
         heapq.heappush(self.pool, val)
     elif val > self.pool[0]:
         heapq.heapreplace(self.pool, val)  #heapreplace 先pop最小的, 再把新的元素放進去(放進去一樣會執行 heap堆疊排序)
     return self.pool[0]  #pop出來的一定就是 kth largest element, 因為 nums' length ≥ k-1, 第一次add 元素剛好達成k個
コード例 #46
0
'''
Write a Python program to delete the smallest element from the given Heap and then inserts a new item.
'''
import heapq
l = [4, 3, 6, 2, 1, 6, 7, 4, 10, 93, 21, 34]
heapq.heapify(l)
heapq.heapreplace(l, 0)
print(l)
コード例 #47
0
def get_predicted_sentence(args,
                           input_sentence,
                           vocab,
                           rev_vocab,
                           model,
                           sess,
                           debug=False,
                           return_raw=False):
    def model_step(enc_inp, dec_inp, dptr, target_weights, bucket_id):
        _, _, logits = model.step(sess,
                                  enc_inp,
                                  dec_inp,
                                  target_weights,
                                  bucket_id,
                                  forward_only=True)
        prob = softmax(logits[dptr][0])
        # print("model_step @ %s" % (datetime.now()))
        return prob

    def greedy_dec(output_logits, rev_vocab):
        selected_token_ids = [
            int(np.argmax(logit, axis=1)) for logit in output_logits
        ]
        if data_utils.EOS_ID in selected_token_ids:
            eos = selected_token_ids.index(data_utils.EOS_ID)
            selected_token_ids = selected_token_ids[:eos]
        output_sentence = ' '.join(
            [dict_lookup(rev_vocab, t) for t in selected_token_ids])
        return output_sentence

    input_token_ids = data_utils.sentence_to_token_ids(input_sentence, vocab)

    # Which bucket does it belong to?
    bucket_id = min([
        b for b in range(len(args.buckets))
        if args.buckets[b][0] > len(input_token_ids)
    ])
    outputs = []
    feed_data = {bucket_id: [(input_token_ids, outputs)]}

    # Get a 1-element batch to feed the sentence to the model.
    encoder_inputs, decoder_inputs, target_weights = model.get_batch(
        feed_data, bucket_id)
    if debug:
        print("\n[get_batch]\n", encoder_inputs, decoder_inputs,
              target_weights)

    ### Original greedy decoding
    if args.beam_size == 1:
        _, _, output_logits = model.step(sess,
                                         encoder_inputs,
                                         decoder_inputs,
                                         target_weights,
                                         bucket_id,
                                         forward_only=True)
        return [{"dec_inp": greedy_dec(output_logits, rev_vocab), 'prob': 1}]

    # Get output logits for the sentence.
    beams, new_beams, results = [(1, 0, {
        'eos': 0,
        'dec_inp': decoder_inputs,
        'prob': 1,
        'prob_ts': 1,
        'prob_t': 1
    })], [], []  # initialize beams as (log_prob, empty_string, eos)
    dummy_encoder_inputs = [
        np.array([data_utils.PAD_ID]) for _ in range(len(encoder_inputs))
    ]

    for dptr in range(len(decoder_inputs) - 1):
        if dptr > 0:
            target_weights[dptr] = [1.]
            beams, new_beams = new_beams[:args.beam_size], []
        if debug: print("=====[beams]=====", beams)
        heapq.heapify(beams)  # since we will remove something
        for prob, _, cand in beams:
            if cand['eos']:
                results += [(prob, 0, cand)]
                continue

            # normal seq2seq
            if debug:
                print(
                    cand['prob'], " ".join(
                        [dict_lookup(rev_vocab, w) for w in cand['dec_inp']]))

            all_prob_ts = model_step(encoder_inputs, cand['dec_inp'], dptr,
                                     target_weights, bucket_id)
            if args.antilm:
                # anti-lm
                all_prob_t = model_step(dummy_encoder_inputs, cand['dec_inp'],
                                        dptr, target_weights, bucket_id)
                # adjusted probability
                all_prob = all_prob_ts - args.antilm * all_prob_t  #+ args.n_bonus * dptr + random() * 1e-50
            else:
                all_prob_t = [0] * len(all_prob_ts)
                all_prob = all_prob_ts

            # suppress copy-cat (respond the same as input)
            if dptr < len(input_token_ids):
                all_prob[input_token_ids[dptr]] = all_prob[
                    input_token_ids[dptr]] * 0.01

            # for debug use
            if return_raw: return all_prob, all_prob_ts, all_prob_t

            # beam search
            for c in np.argsort(all_prob)[::-1][:args.beam_size]:
                new_cand = {
                    'eos': (c == data_utils.EOS_ID),
                    'dec_inp': [(np.array([c]) if i == (dptr + 1) else k)
                                for i, k in enumerate(cand['dec_inp'])],
                    'prob_ts':
                    cand['prob_ts'] * all_prob_ts[c],
                    'prob_t':
                    cand['prob_t'] * all_prob_t[c],
                    'prob':
                    cand['prob'] * all_prob[c],
                }
                new_cand = (new_cand['prob'], random(), new_cand
                            )  # stuff a random to prevent comparing new_cand

                try:
                    if (len(new_beams) < args.beam_size):
                        heapq.heappush(new_beams, new_cand)
                    elif (new_cand[0] > new_beams[0][0]):
                        heapq.heapreplace(new_beams, new_cand)
                except Exception as e:
                    print("[Error]", e)
                    print("-----[new_beams]-----\n", new_beams)
                    print("-----[new_cand]-----\n", new_cand)

    results += new_beams  # flush last cands

    # post-process results
    res_cands = []
    for prob, _, cand in sorted(results, reverse=True):
        cand['dec_inp'] = " ".join(
            [dict_lookup(rev_vocab, w) for w in cand['dec_inp']])
        res_cands.append(cand)
    return res_cands[:args.beam_size]
コード例 #48
0
 def add(self, val: int) -> int:
     if len(self.pool) < self.k:
         heapq.heappush(self.pool, val)
     elif val > self.pool[0]:
         heapq.heapreplace(self.pool, val)
     return self.pool[0]
コード例 #49
0
ファイル: 4196433.py プロジェクト: qifanyyy/CLCDSA
import heapq
N=int(input())
s=set()
q=[0,0]
for a in map(int,input().split()):
    if a>q[0]:
        try:
            s.remove(a)
            heapq.heapreplace(q,a)
        except:
            s.add(a)
print(q[0]*q[1])
コード例 #50
0
def CELF_improved(k, seedset):
    '''
    Add some improvement to the tradictional CELF
    The speed is more fasted than tradictional CELF
    :param k: num of seed
    :param seedset: seedset from heuristic
    :return: seedset
    '''
    global p, q_in, q_out, final_seed
    Rs = {1000: 10000}
    nodeHeap = []
    preSpread = 0
    for node in seedset:
        for qin in q_in:
            qin.put(False)
            qin.put(1000 / 7)
            qin.put({node})
            qin.put(preSpread)
        result = []
        for qout in q_out:
            result.append(qout.get(True))
        high = sum(result) / len(result)
        nodeHeap.append((-high, high, node, -1, 100))
    heapq.heapify(nodeHeap)

    for i1 in range(k):

        while nodeHeap[0][3] != i1 or nodeHeap[0][4] != 10000:
            maxOne = nodeHeap[0]
            newSeed = final_seed.copy()
            newSeed.add(maxOne[2])
            if maxOne[3] == i1:
                thisR = Rs[maxOne[4]]
            else:
                thisR = 1000

            if thisR == 10000:
                for qin in q_in:
                    qin.put(True)
                    qin.put(10000 / 7)
                    qin.put(newSeed)
                    qin.put(preSpread)
                result = []
                for qout in q_out:
                    result.append(qout.get(True))
                delta = sum(result) / len(result)
                heapq.heapreplace(nodeHeap,
                                  (-delta, delta, maxOne[2], i1, thisR))
            else:
                for qin in q_in:
                    qin.put(False)
                    qin.put(thisR / 7)
                    qin.put(newSeed)
                    qin.put(preSpread)
                result = []
                for qout in q_out:
                    result.append(qout.get(True))
                high = sum(result) / len(result)
                heapq.heapreplace(nodeHeap,
                                  (-high, high, maxOne[2], i1, thisR))

        winner = heapq.heappop(nodeHeap)
        preSpread = winner[1] + preSpread
        final_seed.add(winner[2])
コード例 #51
0
 def replace(self, item):
     self._validate_push(item)
     return heapq.heapreplace(self.heap, item)
コード例 #52
0
"""

Replacing in a Heap
The heapreplace function always removes the smallest element of the heap and inserts
 the new incoming element at some place not fixed by any order.



"""

import heapq

H = [21, 1, 45, 78, 3, 5]
# Create the heap

heapq.heapify(H)
print(H)

# Replace an element
heapq.heapreplace(H, 6)
print(H)
コード例 #53
0
 def add(self, val):
     if len(self.nums) < self.k:
         heappush(self.nums, val)
     elif val > self.nums[0]:
         heapreplace(self.nums, val)
     return self.nums[0]
コード例 #54
0
# insert an element into heap
heapq.heappush(lst, 10)
print('Heap Push: ', lst)

# pop min element from heap and return it
# if heap is empty then it return an Index Error
print('Heap pop:', heapq.heappop(lst))
print('Heap after pop: ', lst)

# heap pushpop insert an element and return min ele from heap
# this run fater than a comb of push and then pop
print("Pop and push: ", heapq.heappushpop(lst, 12))
print("Heap: ", lst)

# pop then push element to heap
# raise index error if heap is empty
print("Heap replace: ", heapq.heapreplace(lst, 0))
print("Heap: ", lst)

# n largest element, in sorted order
# if n is larger than size of heap no index error is shown
print("3 largest Heap element: ", heapq.nlargest(3, lst))

# n minimum element, in sorted order
# if n is larger than size of heap no index error is shown
print("3 smallest elemet: ", heapq.nsmallest(3, lst))

# get no of elements in heap
print(len(lst))
print(lst)
コード例 #55
0
      )  # -> Value10    //     The ChainMap will point to updated dict

### HeapQueue
###
# Creating a HeapQueue
import heapq

list1 = [5, 7, 9, 1, 3]
heapq.heapify(list1)  #Convert list1 into a Heap
heapq.heappush(list1, 4)  #Push 4 elements into Heap
print(
    list(list1)
)  # -> [1, 3, 4, 7, 5, 9]             // Heap could be converted into list type to be workable
heapq.heappop(list1)  #Return and remove the smalest element
heapq.heappushpop(list1, 2)  #Push and Pop simultaneosly
heapq.heapreplace(list1, 2)  #Pop first and Push after
heapq.nlargest(3, list1)  #Return the first '3' largest elements
heapq.nsmallest(3, list1)  #Return the first '3' smallest elements

### UserDict
###
# Creating a UserDict
from collections import UserDict

dict1 = {'Key1': 'Value1', 'Key2': 'Value2', 'Key3': 5}
user_dict = UserDict(dict1)
user_dict.data  #Access the dict1 content


## Creating a dict class with modified behavior -> Deletion not allowed
class MyDict(UserDict):
コード例 #56
0
    def find_maximums(self, model, num, exclusive):
        tic = time.time()
        temp, n_iter, early_stop, log_interval = (
            self.temp,
            self.n_iter,
            self.early_stop,
            self.log_interval,
        )

        if self.persistent and self.points is not None:
            points = self.points
        else:
            points = np.array(sample_ints(0, len(self.task.config_space), self.parallel_size))

        scores = model.predict(points)

        # build heap and insert initial points
        heap_items = [(float("-inf"), -1 - i) for i in range(num)]
        heapq.heapify(heap_items)
        in_heap = set(exclusive)
        in_heap.update([x[1] for x in heap_items])

        for s, p in zip(scores, points):
            if s > heap_items[0][0] and p not in in_heap:
                pop = heapq.heapreplace(heap_items, (s, p))
                in_heap.remove(pop[1])
                in_heap.add(p)

        k = 0
        k_last_modify = 0

        if isinstance(temp, (tuple, list, np.ndarray)):
            t = temp[0]
            cool = 1.0 * (temp[0] - temp[1]) / (n_iter + 1)
        else:
            t = temp
            cool = 0

        while k < n_iter and k < k_last_modify + early_stop:
            new_points = np.empty_like(points)
            for i, p in enumerate(points):
                new_points[i] = random_walk(p, self.dims)

            new_scores = model.predict(new_points)

            ac_prob = np.exp(np.minimum((new_scores - scores) / (t + 1e-5), 1))
            ac_index = np.random.random(len(ac_prob)) < ac_prob

            points[ac_index] = new_points[ac_index]
            scores[ac_index] = new_scores[ac_index]

            for s, p in zip(new_scores, new_points):
                if s > heap_items[0][0] and p not in in_heap:
                    pop = heapq.heapreplace(heap_items, (s, p))
                    in_heap.remove(pop[1])
                    in_heap.add(p)
                    k_last_modify = k

            k += 1
            t -= cool

            if log_interval and k % log_interval == 0:
                t_str = "%.2f" % t
                logger.debug(
                    "SA iter: %d\tlast_update: %d\tmax-0: %.2f\tmax-1: %.2f\ttemp: %s\t"
                    "elapsed: %.2f",
                    k,
                    k_last_modify,
                    heap_items[0][0],
                    np.max([v for v, _ in heap_items]),
                    t_str,
                    time.time() - tic,
                )

        heap_items.sort(key=lambda item: -item[0])
        heap_items = [x for x in heap_items if x[0] >= 0]
        logger.debug(
            "SA iter: %d\tlast_update: %d\telapsed: %.2f", k, k_last_modify, time.time() - tic
        )
        logger.debug("SA Maximums: %s", heap_items)

        if self.persistent:
            self.points = points

        return [x[1] for x in heap_items]
コード例 #57
0
 def append(self, log_probability, new_state):
     if len(self.beam) < self.beam_size:
         heapq.heappush(self.beam, (log_probability, new_state))
     else:
         heapq.heapreplace(self.beam, (log_probability, new_state))
コード例 #58
0
ファイル: genetic.py プロジェクト: flynn162/gavm
def population_pop_push(heap, individual):
    """Put gene in heap, pushing out the one with the lowest fitness"""
    if individual[0] > heap[0][0]:
        heapq.heapreplace(heap, individual)
コード例 #59
0
import heapq as h

arr = [23, 45, 43, 56, 13, 69, 52, 64, 31, 45]

h.heapify(arr)
print(arr)

h.heappush(arr, 44)
print(arr)

h.heappop(arr)
print(arr)

h.heapreplace(arr, 104)
print(arr)
コード例 #60
-1
ファイル: identify.py プロジェクト: huxiaoqian/project
 def Push(self, elem):
     if len(self.data) < self.k:
         heapq.heappush(self.data, elem)
     else:
         topk_small = self.data[0][0]
         if elem[0] > topk_small:
             heapq.heapreplace(self.data, elem)