def _merge_iters(self, iterlist): # Merge-sorts terms coming from a list of # term iterators (IndexReader.__iter__() or # IndexReader.iter_from()). # Fill in the list with the head term from each iterator. current = [] for it in iterlist: fnum, text, docfreq, termcount = it.next() current.append((fnum, text, docfreq, termcount, it)) heapify(current) # Number of active iterators active = len(current) while active > 0: # Peek at the first term in the sorted list fnum, text = current[0][:2] docfreq = 0 termcount = 0 # Add together all terms matching the first term in the list. while current and current[0][0] == fnum and current[0][1] == text: docfreq += current[0][2] termcount += current[0][3] it = current[0][4] try: fn, t, df, tc = it.next() heapreplace(current, (fn, t, df, tc, it)) except StopIteration: heappop(current) active -= 1 # Yield the term with the summed doc frequency and term count. yield (fnum, text, docfreq, termcount)
def add(self, item, priority=None): if priority is None: priority = item if len(self.lst) < self.capacity: heapq.heappush(self.lst, (priority, item)) elif priority > self.lst[0][0]: heapq.heapreplace(self.lst, (priority, item))
def scored_groups(self, scores_and_docnums, limit=None): """Takes a sequence of (score, docnum) pairs and returns a dictionary mapping key values to sorted lists of (score, docnum) pairs. If you specify the ``limit`` keyword, the sorted lists will contain only the ``limit`` highest-scoring items. """ groups = defaultdict(list) key_for = self.key_for for score, docnum in scores_and_docnums: key = key_for(docnum) ritem = (0 - score, docnum) ls = groups[key] if limit: if len(ls) < limit: heappush(ls, ritem) elif ritem[0] > ls[0][0]: heapreplace(ls, ritem) else: ls.append(ritem) for v in groups.values(): v.sort() return groups
def _search(self, nearest, node, q): if not node: return tau = -nearest[0][0] x = self.distance(q, self.X[node.p]) self.n_traversed += 1 if x < tau: tau = x heapq.heapreplace(nearest, (-x, node.p)) if node.left and node.right: middle = (node.left.upper_bnd + node.right.lower_bnd)/2 if x < middle: if node.left.lower_bnd - tau < x < node.left.upper_bnd + tau: self._search(nearest, node.left, q) if node.right.lower_bnd - tau < x < node.right.upper_bnd + tau: self._search(nearest, node.right, q) else: if node.right.lower_bnd - tau < x < node.right.upper_bnd + tau: self._search(nearest, node.right, q) if node.left.lower_bnd - tau < x < node.left.upper_bnd + tau: self._search(nearest, node.left, q) elif node.left and not node.right: if node.left.lower_bnd - tau < x < node.left.upper_bnd + tau: self._search(nearest, node.left, q) elif node.right and not node.left: if node.right.lower_bnd - tau < x < node.right.upper_bnd + tau: self._search(nearest, node.right, q)
def get_suggestions(user, user_vector, svd, already_mapped, top_n=5): heap = [] start = time.time() #debug(user_vector.shape) #debug(svd.shape) repos = svd * user_vector #.v_dotproducts_with(user_vector) #debug("dot products took %0.4fs" % (time.time() - start), 4) i = 0 for repo, value in repos.iteritems(): i += 1 if not value: continue if (user, repo) in already_mapped: continue newval = (value, repo) if not heap: heap.append(newval) elif len(heap) == top_n: if newval > heap[0]: heapreplace(heap, newval) else: heappush(heap, newval) #if i % 10000 == 0: # debug("Finished with repo %s" % i) heap.sort(reverse=True) return "%s:%s" % (user, ','.join("%s;%0.4f" % (x[1][0], x[0]) for x in heap))
def push(self, elem): elem = -elem if len(self.data)<self.k: heapq.heappush(self.data, elem) else: if elem>self.data[0]: heapq.heapreplace(self.data, elem)
def _merge(*subsequences): # Python Cookbook Recipe 19.14 # prepare a priority queue whose items are pairs of the form # (current-value, iterator), one each per (non-empty) subsequence heap = [ ] for subseq in subsequences: iterator = iter(subseq) for current_value in iterator: # subseq is not empty, therefore add this subseq's pair # (current-value, iterator) to the list heap.append((current_value, iterator)) break # make the priority queue into a heap heapq.heapify(heap) while heap: # get and yield lowest current value (and corresponding iterator) current_value, iterator = heap[0] yield current_value for current_value in iterator: # subseq is not finished, therefore add this subseq's pair # (current-value, iterator) back into the priority queue heapq.heapreplace(heap, (current_value, iterator)) break else: # subseq has been exhausted, therefore remove it from # the queue heapq.heappop(heap)
def hq_nlargest_impl(n, iterable): if n == 0: return [iterable[0] for _ in range(0)] elif n == 1: out = max(iterable) return [out] size = len(iterable) if n >= size: return sorted(iterable)[::-1][:n] it = iter(iterable) result = [(elem, i) for i, elem in zip(range(0, -n, -1), it)] hq.heapify(result) top = result[0][0] order = -n for elem in it: if top < elem: hq.heapreplace(result, (elem, order)) top, _order = result[0] order -= 1 result.sort(reverse=True) return [elem for (elem, order) in result]
def aes(strm, k=1): """ Weighted reservoir sampling without replacement implementation. See [Efraimidis et. al][1]. k = reservoir size rsv = reservoir strm = stream wts = associated weights for the stream [1]: http://arxiv.org/pdf/1012.0256.pdf """ rsv = [] heapq.heapify(rsv) # generate a key and fill the reservoir to k elements with associated keys for n, (el, wi) in enumerate(strm): ki = random()**(1. / wi) if n < k: heapq.heappush(rsv, (ki, el)) # if the reservoir is full, find a minimum threshold, t. # if ki is large then t, pop t and push ki onto the heap. else: if len(rsv) > 1: t, _ = heapq.nsmallest(1, rsv)[0] if ki > t: heapq.heapreplace(rsv, (ki, el)) # yield k elements with the largest keys, this is the reservoir sample. for elem in heapq.nlargest(k, rsv): yield elem[1]
def suggest(self, text, limit=5, maxdist=2, prefix=0): """ :param text: the text to check. This word will **not** be added to the suggestions, even if it appears in the word graph. :param limit: only return up to this many suggestions. If there are not enough terms in the field within ``maxdist`` of the given word, the returned list will be shorter than this number. :param maxdist: the largest edit distance from the given word to look at. Values higher than 2 are not very effective or efficient. :param prefix: require suggestions to share a prefix of this length with the given word. This is often justifiable since most misspellings do not involve the first letter of the word. Using a prefix dramatically decreases the time it takes to generate the list of words. """ _suggestions = self._suggestions heap = [] for item in _suggestions(text, maxdist, prefix): # Note that the *higher* scores (item[0]) are better! if len(heap) < limit: heappush(heap, item) elif item > heap[0]: heapreplace(heap, item) sugs = sorted(heap, key=lambda x: (0 - x[0], x[1])) return [sug for _, sug in sugs]
def top_k(self, rules): n = 0 best = self.best and max(self.best, key=lambda ro: ro.inf) or None for ro in rules: if len(self.best) >= self.max_bests: bound = best.inf - self.best[0].inf thresh = self.best[0].inf + bound * 0.02 if ro.inf <= thresh: continue if ro in self.best: continue if math.isnan(ro.inf): continue if not best or ro.inf > best.inf: n += 1 _logger.debug(str(ro)) if len(self.best) < self.max_bests: heapq.heappush(self.best, ro) else: heapq.heapreplace(self.best, ro) best = best and max(best, ro) or ro return n
def merge_scan_with_prefix(self,table,prefix,columns): #TODO: Need assertion that columns is array containing at least one string """ A generator based iterator that yields totally ordered rows starting with a given prefix. The implementation opens up 16 scanners (one for each leading hex character of the salt) simultaneously and then yields the next row in order from the pool on each iteration. """ iterators = [] next_items_queue = [] for salt in '0123456789abcdef': salted_prefix = "%s%s" % (salt,prefix) scanner = self.client.scannerOpenWithPrefix(table, salted_prefix, columns) iterators.append(salted_scanner_iterable(self.logger,self.client,self._make_row_nice,salted_prefix,scanner)) # The i below is so we can advance whichever scanner delivers us the polled item. for i,it in enumerate(iterators): try: next = it.next next_items_queue.append([next(),i,next]) except StopIteration: pass heapq.heapify(next_items_queue) while 1: try: while 1: row_tuple,iter_index,next = s = next_items_queue[0] #tuple[1] is the actual nice row. yield row_tuple[1] s[0] = next() heapq.heapreplace(next_items_queue, s) except StopIteration: heapq.heappop(next_items_queue) except IndexError: return
def median(self, nums): length = len(nums) if length == 0: return [] min_heap = [nums[0]] # min root heap max_heap = [] # max root heap result = [nums[0]] for i in range(1, length): if i % 2 == 0: if nums[i] > min_heap[0]: # nums[i] heappush(max_heap, -min_heap[0]) heapreplace(min_heap, nums[i]) else: heappush(max_heap, -nums[i]) elif i % 2 != 0: if nums[i] > min_heap[0]: # nums[i] heappush(min_heap, nums[i]) else: max_heap_root = heappushpop(max_heap, -nums[i]) heappush(min_heap, -max_heap_root) result.append(min_heap[0]) return result
def get_bloat(self): """ get max database bloat of all databases of cluster """ q = pgq.BLOAT pages = 0 otta = 0 bloatest = [] # list of bloatest tables ret = self.query_eachdb(q, exclude=['template0']) for db in ret.keys(): # loop through all databases for r in ret[db]: # and its tables pages += r[4] otta += r[5] if pages > 1000: # add to list of bloatest tables bloat = 100 - 100*(pages-otta) / pages item = (bloat, "%s.%s.%s->%s" % (db, r[0], r[1], r[2])) if len(bloatest) < 5: heapq.heappush(bloatest, item) else: heapq.heapreplace(bloatest, item) self.logger.debug("pages: %i, otta: %i" % (pages, otta)) while len(bloatest): b = heapq.heappop(bloatest) self.logger.debug("bloatest: %s: %.2f%%" % (b[1], 100-b[0])) if pages < 5000: # cluster < then 40 Mb is no serious return 0 else: return 100*(pages - otta) / pages
def matches(self, text): """Returns a list of possible matches for given source text. :type text: String :param text: The text that will be search for in the translation memory :rtype: list :return: a list of units with the source and target strings from the translation memory. If :attr:`self.addpercentage` is *True* (default) the match quality is given as a percentage in the notes. """ bestcandidates = [(0.0, None)] * self.MAX_CANDIDATES #We use self.MIN_SIMILARITY, but if we already know we have max_candidates #that are better, we can adjust min_similarity upwards for speedup min_similarity = self.MIN_SIMILARITY # We want to limit our search in self.candidates, so we want to ignore # all units with a source string that is too short or too long. We use # a binary search to find the shortest string, from where we start our # search in the candidates. # minimum source string length to be considered startlength = self.getstartlength(min_similarity, text) startindex = 0 endindex = len(self.candidates.units) while startindex < endindex: mid = (startindex + endindex) // 2 if sourcelen(self.candidates.units[mid]) < startlength: startindex = mid + 1 else: endindex = mid # maximum source string length to be considered stoplength = self.getstoplength(min_similarity, text) lowestscore = 0 for candidate in self.candidates.units[startindex:]: cmpstring = candidate.source if len(cmpstring) > stoplength: break similarity = self.comparer.similarity(text, cmpstring, min_similarity) if similarity < min_similarity: continue if similarity > lowestscore: heapq.heapreplace(bestcandidates, (similarity, candidate)) lowestscore = bestcandidates[0][0] if lowestscore >= 100: break if min_similarity < lowestscore: min_similarity = lowestscore stoplength = self.getstoplength(min_similarity, text) #Remove the empty ones: def notzero(item): score = item[0] return score != 0 bestcandidates = filter(notzero, bestcandidates) #Sort for use as a general list, and reverse so the best one is at index 0 bestcandidates.sort(reverse=True) return self.buildunits(bestcandidates)
def logsample_weighted(options, args, fh): """Implemented Weighted Reservoir Sampling, assuming integer weights. See Weighted random sampling with a reservoir, Efraimidis et al.""" N = options.num_samples delimiter = options.delimiter # NOTE: Convert to 0-based indexing since we expose as 1-based field = options.field-1 R = [] min_val = float("inf") i = 0 for line in fh: w = int(line.split(delimiter)[field]) if w < 1: continue r = random() k = r ** (1./w) if i < N: heappush(R, (k, line)) if k < min_val: min_val = k else: if k > min_val: # Replace smallest item in record list heapreplace(R, (k, line)) i+=1 # Emit output for key, record in R: yield key, record.strip()
def _search(self, nearest, node, q, depth): if not node: return axis = depth % self.X.shape[1] # compare with pivot d = -nearest[0][0] x = np.sqrt(np.sum(np.power((q - self.X[node.p]).data, 2))) self.n_traversed += 1 if x < d: d = x heapq.heapreplace(nearest, (-d, node.p)) # check if left/right nodes need to be visited, visit the one with lower minimum axis distance first mu = self.X[node.p,axis] left_dist = q[0,axis] - mu + node.left_lower_bnd if node.left else None right_dist = mu - q[0,axis] + node.left_lower_bnd if node.right else None if node.left and node.right: if left_dist < right_dist: if left_dist <= d: self._search(nearest, node.left, q, depth+1) if right_dist <= d: self._search(nearest, node.right, q, depth+1) else: if right_dist <= d: self._search(nearest, node.right, q, depth+1) if left_dist <= d: self._search(nearest, node.left, q, depth+1) elif node.left and not node.right: if left_dist <= d: self._search(nearest, node.left, q, depth+1) elif node.right and not node.left: if right_dist <= d: self._search(nearest, node.right, q, depth+1)
def magic_loop(models_to_run, clfs, grid, X, y): ''' Takes a list of models to use, two dictionaries of classifiers and parameters, and array of X Set to find ten models with best precision at 5 percent recall ''' table = {} top = [] for i in range(10): top.append((0, " ")) heapq.heapify(top) k = 0.05 for n in range(1, 2): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) for index, clf in enumerate([clfs[x] for x in models_to_run]): for p in ParameterGrid(grid[models_to_run[index]]): try: clf.set_params(**p) print (clf) y_pred_probs = clf.fit(X_train, y_train).predict_proba(X_test)[:,1] plot_precision_recall_n(y_test, y_pred_probs, clf) l = scoring(k, y_test, y_pred_probs) m, s = top[0] p = l['precision'] if p > m: heapq.heapreplace(top, (p, clf)) table[str(clf)] = l except: print ('Error:') continue print (top) return top, table
def run(filename): '''create statistics on the json alias file''' with open(filename, 'rU') as f: aliases = json.load(f) count = {} vals = aliases.values() for val in vals: if val in count: count[val] = count[val] +1 else: count[val] = 1 h = [(0, " ")] * K heapq.heapify(h) for key, val in count.items(): replacement = (val, key) if replacement > min(h): heapq.heapreplace(h,replacement) print "There are", len(count.keys()), "unique businesses represented in the json \n" print "There are", len(aliases), "aliases in the json \n" print "The top 10 companies with the most aliases are: \n" h = list(h) h.sort(reverse=True) for i in h: print i[1], "had " , i[0], " aliases \n" print "Here are some the aliases \n" att = find_aliases(aliases,'AT&T') coke = find_aliases(aliases,'Coca-Cola Enterprises, Inc.') sachs = find_aliases(aliases,'The Goldman Sachs Group, Inc.') print(att, "\n") print(coke, "\n") print(sachs, "\n")
def imerge(*iterlist, **key): """Merge a sequence of sorted iterables. Returns pairs [value, index] where each value comes from iterlist[index], and the pairs are sorted if each of the iterators is sorted. Hint use groupby(imerge(...), operator.itemgetter(0)) to get the items one by one. """ # thanks # http://mail.python.org/pipermail/python-bugs-list/2005-August/029983.html if key.keys() not in ([], ["key"]): raise TypeError("Excess keyword arguments for imerge") key = key.get("key", lambda x: x) # initialize the heap containing this tuple: # (inited, value, index, currentItem, iterator) # this automatically makes sure all iterators are initialized, then run, # and finally emptied heap = [(False, None, index, None, iter(iterator)) for index, iterator in enumerate(iterlist)] while heap: inited, item, index, value, iterator = heap[0] if inited: yield value, index try: item = iterator.next() except StopIteration: heappop(heap) else: heapreplace(heap, (True, key(item), index, item, iterator))
def _merge_terms(self, iterlist): # Merge-sorts terms coming from a list of term iterators. # Create a map so we can look up each iterator by its id() value itermap = {} for it in iterlist: itermap[id(it)] = it # Fill in the list with the head term from each iterator. current = [] for it in iterlist: term = next(it) current.append((term, id(it))) heapify(current) # Number of active iterators active = len(current) while active: # Peek at the first term in the sorted list term = current[0][0] # Re-iterate on all items in the list that have that term while active and current[0][0] == term: it = itermap[current[0][1]] try: nextterm = next(it) heapreplace(current, (nextterm, id(it))) except StopIteration: heappop(current) active -= 1 # Yield the term yield term
def isample_without_replacement(self, k): """ Return a sample of size k, without replacement k <= n O(n) Use a heap to keep track of selection. """ if k > len(self.weights): raise ValueError("Sample size should be <= %d" % len(self.weights)) heap = [] random = self.random.random_sample weights = random(len(self.weights)) ** (1.0/self.weights) for ix, weight in enumerate(weights): if ix < k: heapq.heappush(heap, (weight, ix)) else: if heap[0][0] < weight: heapq.heapreplace(heap, (weight, ix)) # now sort the heap -- this is to make things repeatable heap.sort() # return permuted indices return(self.random.permutation([x[1] for x in heap]))
def _iter_phrases(self, phrases): """Uses a heap to yield from lists of `phrases` in sorted order. Sadly, this is 5% faster than the built-in heapq.merge solution: yield from heapq.merge( *[best_items.items() for pos, best_items in phrases.items()], key=lambda x: x[1], reverse=True) """ if not phrases: return best_phrases = [] cache = [] for pos, l in phrases.items(): best_items = iter(l.items()) next_best_tuple = next(best_items, _EMPTY) if next_best_tuple is not _EMPTY: item, weight = next_best_tuple # Cache the real data for this queue entry somewhere else. # cache[id] contains a tuple of (next_best_tuple, best_items) heapq.heappush(best_phrases, (-weight, len(cache))) cache.append((next_best_tuple, best_items)) while best_phrases: _, cache_id = best_phrases[0] next_best_tuple, best_items = cache[cache_id] yield next_best_tuple next_best_tuple = next(best_items, _EMPTY) if next_best_tuple is _EMPTY: heapq.heappop(best_phrases) else: item, weight = next_best_tuple cache[cache_id] = (next_best_tuple, best_items) heapq.heapreplace(best_phrases, (-weight, cache_id))
def findShortestSniplet(m, word, alphabets): size = len(m) if len(m) < len(alphabets): return (-1, -1) minh = [] maxh = [] for w in m: i = iter(m[w]) val = next(i, sys.maxint) heapq.heappush(minh, (val, i)) heapq.heappush(maxh, -val) mind = sys.maxint start = 0 end = 0 minval = maxval = 0 while minval != sys.maxint and maxval != sys.maxint: (minval, it1) = minh[0] maxval = -maxh[0] print minval, maxval if minval != sys.maxint and maxval != sys.maxint and mind > maxval - minval: mind = maxval - minval start = minval end = maxval val = next(it1, sys.maxint) heapq.heapreplace(minh, (val, it1)) heapq.heappush(maxh, -val) print start, end return (start, end)
def next(self): if self.id is None: raise ReadTooFar state = self.state if len(state) < self.minmatch: # Can't match the minimum if there aren't enough readers left self.id = None elif len(state) == 1: # Short circuit if there's only one reader r = state[0] r.next() self.id = r.id else: # Advance all the readers that match the current id lowid = state[0].id while state and state[0].id == lowid: r = state[0] r.next() if r.id is None: heappop(state) else: heapreplace(state, r) if state: self.id = state[0].id else: self.id = None
def addNum(self, num): """ Adds a num into the data structure. :type num: int :rtype: void """ if (not self.small_heap) and (self.pivort == None): self.pivort = num return if self.pivort == None: max_of_smalls = - self.small_heap[0] min_of_bigs = self.big_heap[0] if max_of_smalls <= num <= min_of_bigs: self.pivort = num elif num < max_of_smalls: # swap with max_of_smalls self.pivort = - heapq.heapreplace(self.small_heap, - num) else: self.pivort = heapq.heapreplace(self.big_heap, num) return # when pivort already exists if num <= self.pivort: small, big = num, self.pivort else: small, big = self.pivort, num self.pivort = None heapq.heappush(self.small_heap, - small) heapq.heappush(self.big_heap, big)
def get_topk(l, k): """Finds the top K elements in a list of integers. Parameters: - l: An iterable object with integers - k: an integer """ counts = {} for i in l: n = int(i) counts[n] = counts.setdefault(n, 0) + 1 h = [(count,n) for (n,count) in counts.items()[:k]] heapq.heapify(h) for n, count in counts.items()[k:]: min_count, min_n = h[0] if count > min_count: heapq.heapreplace(h, (count, n)) h.sort(reverse=True) return h
def fill(gw): if gw < maxSingleW: return nonlocal maxRate, bestSize, bestPos rowRests = [] heapq.heappush(rowRests, (-gw, 0)) rowYs = [0] rowHeight = sizes[0][0][1] for (w, h), id in sizes: maxW = -rowRests[0][0] if maxW >= w: # Add to this row i = rowRests[0][1] heapq.heapreplace(rowRests, (- (maxW - w), i)) pos[id] = (gw - maxW, rowYs[i]) else: # Create new row heapq.heappush(rowRests, (-(gw - w), len(rowRests))) rowYs.append(rowYs[-1] + rowHeight) rowHeight = h pos[id] = (0, rowYs[-1]) size = (gw, rowYs[-1] + rowHeight) rate = area / (size[0] * size[1]) if rate > maxRate: maxRate = rate bestSize = size bestPos = pos[:]
def merge_iter(iters, pfreq, pfunc, pfinal, key=None): if key: samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None) else: samekey = operator.eq count = 0 total = sum(len(it) for it in iters) iters = (iter(it) for it in iters) heap = ((next(it),it) for it in iters) heap = [(e,it) for e,it in heap if e] heapq.heapify(heap) pe = None while heap: if not count % pfreq: pfunc(count, total) e, it = heap[0] if not samekey(e, pe): pe = e yield e count += 1 try: e = it.next() # Don't use next() function, it's too expensive except StopIteration: heapq.heappop(heap) # remove current else: heapq.heapreplace(heap, (e, it)) # shift current to new location pfinal(count, total)
def __iter__(self): """Iterate over records in input files. self._offsets is always correctly updated so that stopping iterations doesn't skip records and doesn't read the same record twice. Raises: Exception: when Files list and offsets do not match. Yields: The result. """ ctx = context.get() mapper_spec = ctx.mapreduce_spec.mapper shard_number = ctx._shard_state.shard_number filenames = mapper_spec.params[self.FILES_PARAM][shard_number] if len(filenames) != len(self._offsets): raise Exception("Files list and offsets do not match.") # Heap with (Key, Value, Index, reader) pairs. readers = [] # Initialize heap for (i, filename) in enumerate(filenames): offset = self._offsets[i] # TODO(user): Shrinking the buffer size is a workaround until # a tiered/segmented merge is implemented. reader = records.RecordsReader( cloudstorage.open(filename, read_buffer_size=self.GCS_BUFFER_SIZE)) reader.seek(offset) readers.append((None, None, i, reader)) # Read records from heap and merge values with the same key. # current_result is yielded and consumed buy _merge_map. # current_result = (key, value, is_partial) current_result = None current_count = 0 current_size = 0 while readers: (key, value, index, reader) = readers[0] if key is not None: current_count += 1 current_size += len(value) should_yield = False if current_result: if key != current_result[0]: # New key encountered should_yield = True elif (self._max_values_count != -1 and current_count >= self._max_values_count): # Maximum number of values encountered. current_result[2] = True should_yield = True elif (self._max_values_size != -1 and current_size >= self._max_values_size): # Maximum size of values encountered current_result[2] = True should_yield = True if should_yield: # New key encountered or maximum count hit. Yield current key. yield current_result if not current_result or should_yield: current_result = [key, [], False] current_count = 0 current_size = 0 current_result[1].append(value) # Read next key/value from reader. try: self._offsets[index] = reader.tell() start_time = time.time() binary_record = reader.read() # update counters if context.get(): operation.counters.Increment( input_readers.COUNTER_IO_READ_BYTES, len(binary_record))(context.get()) operation.counters.Increment( input_readers.COUNTER_IO_READ_MSEC, int((time.time() - start_time) * 1000))(context.get()) proto = kv_pb.KeyValue() proto.ParseFromString(binary_record) # Put read data back into heap. heapq.heapreplace(readers, (proto.key(), proto.value(), index, reader)) except EOFError: heapq.heappop(readers) # Yield leftovers. if current_result: yield current_result
li2 = [5, 7, 9, 4, 3] # using heapify() to convert list into heap heapq.heapify(li1) heapq.heapify(li2) # using heappushpop() to push and pop items simultaneously # pops 2 print "\nThe popped item using heappushpop() is : ", print (heapq.heappushpop(li1, 2)) # using heapreplace() to push and pop items simultaneously pops 3 # element is first popped- which is 3, then element is pushed. print "\nThe popped item using heapreplace() is : ", print (heapq.heapreplace(li2, 2)) ################################################################## # 6. nlargest(k, iterable, key = fun) :- This function is used to return the k # largest elements from the iterable specified and satisfying the key if # mentioned. # 7. nsmallest(k, iterable, key = fun) :- This function is used to return the # k smallest elements from the iterable specified and satisfying the key if # mentioned. # Python code to demonstrate working of # nlargest() and nsmallest() print "\n------------------------------------------------"
def uBkNN_sd(r, k=26): """ Compute the rating predictions for missing values of r, using a user-based kNN model It also takes into account the standard deviation of the users to predicted more accurately :param r: the rating matrix :param k: the number of neighbors. Default = 26, found empirically :return: the rating prediction matrix """ n_row, n_col = r.shape # Compute vertical representation of R # Vertical may contain empty values vertical = [] for j in range(n_col): vertical.append(np.nonzero(r[:, j])) # Compute mean for each user means = np.true_divide(r.sum(1), (r != 0).sum(1)) means = [0.0 if math.isnan(i) else i for i in means] # Compute standard deviation standard_deviation = [ np.nanstd(np.where(np.isclose(a, 0), np.nan, a)) for a in r ] # Compute sim matrix sim_matrix = np.zeros((n_row, n_row)) for i in range(n_row): for j in range(i + 1, n_row): a = np.dot(r[i, :], r[j, :]) if a != 0.0: a = a / (np.linalg.norm(r[i, :]) * np.linalg.norm(r[j, :])) sim_matrix[i, j] = a sim_matrix[j, i] = a # Threshold to speed-up the computation time # if, for a movie j, there is less than 'threshold' users that rated this movie, # perform a classic search among all users that have rated this movie # otherwise: there are a lot of people that rated this movie # it is then faster, for user i, to check his neighbors in decreasing order of similarity score # and retrieve the user if it has rated this movie (we have a high chance of that, since the number of people # that rated this movie is high) threshold = k * 3 r_hat = r.copy().astype(float) for i in tqdm(range(n_row)): # Sort the users according to similarity score # Used later if high number of users that rated a movie a = [(sim_matrix[i, j], j) for j in range(n_row)] a.sort(key=lambda iii: iii[0], reverse=True) for j in range(n_col): if r[i, j] != 0: # Not compute if already exist continue if len(vertical[j][0]) == 0: # In case no one purchased this item continue # Useless to try to compute it if len(vertical[j] [0]) <= k: # Every elements of vertical[j] will be used kNN = [(sim_matrix[i, zz], zz) for zz in vertical[j][0]] elif len( vertical[j][0] ) < threshold: # If not much rating users: search in that set kNN = [] # Simple heap search for client in vertical[j][0]: sim = sim_matrix[i, client] if len(kNN) < k: heapq.heappush(kNN, (sim, client)) elif len(kNN ) >= k and kNN[0][0] < sim: # Full kNN and update heapq.heapreplace(kNN, (sim, client)) else: # Search first in the most similar users, those who rated this item kNN = [] for (sim, other) in a: if r[other, j] > 0.0: kNN.append((sim, other)) if len(kNN) == k: break # We have here the kNN of user i (if at least k) pred = 0.0 den = 0.0 for sim, client in kNN: pred += sim * (r[client, j] - means[client]) / standard_deviation[client] den += abs(sim) if den != 0: # 0 similarity: could happen pred /= den r_hat[i, j] = standard_deviation[i] * pred + means[i] return r_hat
print(nums) nums = [2, 3, 5, 1, 54, 23, 132] heapq.heapify(nums) # 转成堆结构,nums改变 print(nums) print([heapq.heappop(nums) for _ in range(len(nums))]) # 转成堆结构才能按顺序打印 print() print('合并多个排序序列成一个排序序列,返回值的迭代器。nums=heapq.merge(nums1,nums2)') nums1 = [2, 3, 5, 1, 54, 23, 132] nums1 = sorted(nums1) print('nums1', nums1) nums2 = [22, 23, 25, 21, 254, 223, 2132] nums2 = sorted(nums2) print('nums2', nums2) nums = heapq.merge(nums1, nums2) # 值的迭代器 print('nums', list(nums)) print() print('删除堆中最小元素并加入一个元素23。heapq.heaprepalce()') nums = [1, 2, 4, 5, 3] print('nums', nums) heapq.heapreplace(nums, 23) print([heapq.heappop(nums) for _ in range(len(nums))]) print() print('最大/小的k个值。heapq.nlargest(3, nums)/heapq.nsmallest(3, nums)') nums = [1, 3, 4, 5, 2, 9] print(nums) print(heapq.nlargest(3, nums)) print(heapq.nsmallest(3, nums))
def push(self, item, priority=0): count = next(self._counter) if len(self._q) < self._max_size: heappush(self._q, [priority, count, item]) else: heapreplace(self._q, [priority, count, item])
def get_top_words(query, collection): #get_stream(QUERY, collection) client = py.MongoClient('localhost', 27017) # NOTE: probably will need to change these for your names db = client.NGRAM if collection == 'American': col = db.American_1gram # american english elif collection == 'English': col = db.English_1gram # british english print(col) print("Collection: ", collection) # to check connection to db print('Query: ', query) item = col.find_one({"ngram": {'$regex': query}}) print("Checking Connection: ", item) if item == None: print("Connection unsuccessful, aborting") return print('*' * 50) filehandler = open( "output/COUNTMIN_MODEL_" + collection + "_" + query + ".pkl", 'rb') counters = pickle.load(filehandler) filehandler = open( "output/COUNTMIN_SALT_" + collection + "_" + query + ".pkl", 'rb') salt = pickle.load(filehandler) cms = CountMinSketch(10, 2000, counters, salt) print('getting top frequency words') # query all documents to find the top frequency words seen = set() top_freq_heap = [] for doc in col.find(): if not isinstance(doc['ngram'], str): continue word = doc['ngram'].lower() if word in seen: continue seen.add(word) freq = cms.query(word) if len(top_freq_heap) < n_top_words: heapq.heappush(top_freq_heap, [freq, word]) elif freq > top_freq_heap[0][0]: heapq.heapreplace(top_freq_heap, [freq, word]) top_freq_heap.sort() top_freq_heap = top_freq_heap[::-1] top_freq_df = pd.DataFrame(top_freq_heap, columns=['count', 'label']) print(top_freq_df) if not os.path.exists("output"): # if output dir doesnt exists, creates it os.makedirs("output") # TODO probably need to update the replacing with open("output/COUNTMIN_TOP_FREQ_" + collection + "_" + query + ".pkl", "wb") as fp: #Pickling obj = { 'L': top_freq_df['label'].to_list(), 'C': top_freq_df['count'].to_list() } pickle.dump(obj, fp)
heappop(self.nums) return self.nums[0] #回傳最小值 from heapq import * class KthLargest: def __init__(self, k: int, nums: List[int]): self.nums = nums heapify(self.nums) self.k = k while len(self.nums) > self.k: heappop(self.nums) def add(self, val: int) -> int: if len(self.nums) < self.k: heappush(self.nums, val) else: heappushpop(self.nums, val) return self.nums[0] heapq.heapreplace(heap, item) 從 heap 取出並回傳最小的元素,接著將新的 item 放進heap。heap 的大小不會改變。如果 heap 是空的會產生 IndexError 錯誤。 這個一次完成的操作會比呼叫 heappop() 之後呼叫 heappush() 更有效率,並在維護 heap 的大小不變時更為適當,取出/放入的組合函式一定會從 heap 回傳一個元素並用 item 取代他。 函式的回傳值可能會大於被加入的 item 。如果這不是你期望發生的,可以考慮使用 heappushpop() 替代,他會回傳 heap 的最小值和 item 兩個當中比較小的那個,並將大的留在 heap 內。
#Heaps in Python: # Heap is a tree Data structure where each parent node is less than or equal to its child node. This is called Min Heap. # If each parent node is greater than or equal to child node then it is called Max Heap. # Creating a Heap: It is created by inbuild functon in python "heapq". import heapq H = [1, 14, 21, 22, 0, 54] #To arrange elements we use heapify heapq.heapify(H) print(H) #Inserting into heap heapq.heappush(H, 9) print(H) #Removing from heap #It always remove the function at index 1 heapq.heappop(H) print(H) #Replace an element #It removes the smallest element of heap and insert new element at some place heapq.heapreplace(H, 8) print(H)
print(counter) # 堆操作 import heapq data = [1, 13, 45, 21, 89, 31, 28, 44, 19, 99] heapq.heapify(data) print(data) data = [1, 13, 45, 21, 89, 31, 28, 44, 19, 99] heap = [] for item in data: heapq.heappush(heap, item) print(heap) print(heapq.heappop(heap)) print(heapq.heappushpop(heap, 56)) print(heap) print(heapq.heapreplace(heap, 78)) print(heap) data = [1, 13, 45, 21, 89, 31, 28, 44, 19, 99] heapq.heapify(data) print(data) print(heapq.nlargest(1, heap)) print(heapq.nlargest(3, heap)) print(heapq.nsmallest(1, heap)) print(heapq.nsmallest(3, heap)) print(heapq.merge([1, 2, 3, 10], [78, 23, 99, 10], [5, 3, 0])) print(list(heapq.merge([1, 2, 3, 10], [78, 23, 99, 10], [5, 3, 0])))
def add(self, num): if self.min_heap[0] < num: heapq.heapreplace(self.min_heap, num) return self.min_heap[0]
# using heappop() to pop smallest element hq.heappop(mylist) print(mylist) # [3, 5, 4, 7, 9] hq.heappop(mylist) print(mylist) # [4, 5, 9, 7] # heappushpop(heap, element) his function combines the functioning of both push and pop operations in one statement, # increasing efficiency. Heap order is maintained after this operation. hq.heappushpop(mylist, 8) print(mylist) # [5, 7, 9, 8] # heapreplace(heap, ele) :- This function also inserts and pops element in one statement, but it is different from # above function. In this, element is first popped, then element is pushed.i.e, the value larger than the pushed # value can be return hq.heapreplace(mylist, 11) print(mylist) # [7, 8, 9, 11] # nlargest(k, iterable, key = fun) :- This function is used to return the k largest elements from the iterable # specified and satisfying the key if mentioned. ################################################################################################################## # initializing list mylist = [6, 7, 9, 4, 3, 5, 8, 10, 1] # using heapify() to convert list into heap hq.heapify(mylist) # using nlargest to print 3 largest numbers # prints 10, 9 and 8 print("The 3 largest numbers in list are : ", end="")
def _minmaxhash_add_ngrams(heap: list, heapmap: dict, maxsize: int, nsize: int, subs, nsubs: int, hashbuffer, heaptop, extracthash, make_elt, update_elt, replace, anynew, minmax_op) -> int: """ Process/add elements to the sketch (See warning below). This function is where most of time is spent when building a minhash or a maxhash. .. warning:: If calling this method directly, updating the attribute `nvisited` is under your responsibility. :param heap: a heap (in a :class:`list`) :param heapmap: a :class:dict: with the content of the heap (for O(1) lookup of the content of the sketch) :param maxsize: maximum size of the heap :param nsize: size of ngrams :param subs: (sub-)sequence :param nsubs: number of hash values in the hashbuffer :param hashbuffer: buffer with hash values :param heaptop: hash value that is at the top of the heap :param extracthash: function extract the hash from an element :param make_elt: factory to make a new element :param update_elt: in-place update of an element :param replace: callback if replacing :param anynew: callback if new entry :param minmax_op: a pair that is expected to be either (1, `<`) if minhash or (-1, `>`) if maxhash. :return: new hash value for heaptop. """ lheap = len(heap) sign, comparator = minmax_op for j in range(nsubs): h = hashbuffer[j] if h not in heapmap: if lheap < maxsize: elt = make_elt(sign * h, subs, j, nsize) # Add element to set and heap. heapmap[h] = elt heappush(heap, elt) heaptop = extracthash(heap[0]) lheap += 1 if anynew is not None: anynew(h) elif comparator(h, heaptop): elt = make_elt(sign * h, subs, j, nsize) # Replace the maximum value in the heap. heapmap[h] = elt out = heapreplace(heap, elt) del (heapmap[sign * out[0]]) # The negative of the hash is needed for MinHash. heaptop = sign * heap[0][0] if anynew is not None: anynew(h) else: if update_elt is not None: elt = heapmap[h] update_elt(elt) return heaptop
def totalTime(input_array, number_of_items): result_array = [0] * number_of_items for item in input_array: heapq.heapreplace(result_array, result_array[0] + item) return heapq.nlargest(1, result_array)[0]
def cb(w, acc): nonlocal hp if len(hp) == n and acc > hp[0][0]: heapq.heapreplace(hp, (acc, w)) elif len(hp) < n: heapq.heappush(hp, (acc, w))
def __iter__(self): """Iterate over records in input files. self._offsets is always correctly updated so that stopping iterations doesn't skip records and doesn't read the same record twice. """ ctx = context.get() mapper_spec = ctx.mapreduce_spec.mapper shard_number = ctx.shard_state.shard_number filenames = mapper_spec.params[self.FILES_PARAM][shard_number] if len(filenames) != len(self._offsets): raise Exception("Files list and offsets do not match.") # Heap with (Key, Value, Index, reader) pairs. readers = [] # Initialize heap for (i, filename) in enumerate(filenames): offset = self._offsets[i] reader = records.RecordsReader(files.BufferedFile(filename)) reader.seek(offset) readers.append((None, None, i, reader)) # Read records from heap and merge values with the same key. current_result = None while readers: (key, value, index, reader) = readers[0] if key is not None: if current_result and key != current_result[0]: # New key encountered. Yield corrent key. yield current_result if not current_result or key != current_result[0]: current_result = (key, []) current_result[1].append(value) # Read next key/value from reader. try: self._offsets[index] = reader.tell() start_time = time.time() binary_record = reader.read() # update counters if context.get(): operation.counters.Increment( input_readers.COUNTER_IO_READ_BYTES, len(binary_record))(context.get()) operation.counters.Increment( input_readers.COUNTER_IO_READ_MSEC, int((time.time() - start_time) * 1000))(context.get()) proto = file_service_pb.KeyValue() proto.ParseFromString(binary_record) # Put read data back into heap. heapq.heapreplace(readers, (proto.key(), proto.value(), index, reader)) except EOFError: heapq.heappop(readers) # Yield leftovers. if current_result: yield current_result
def add(self, val): if len(self.pool) < self.k: heapq.heappush(self.pool, val) elif val > self.pool[0]: heapq.heapreplace(self.pool, val) #heapreplace 先pop最小的, 再把新的元素放進去(放進去一樣會執行 heap堆疊排序) return self.pool[0] #pop出來的一定就是 kth largest element, 因為 nums' length ≥ k-1, 第一次add 元素剛好達成k個
''' Write a Python program to delete the smallest element from the given Heap and then inserts a new item. ''' import heapq l = [4, 3, 6, 2, 1, 6, 7, 4, 10, 93, 21, 34] heapq.heapify(l) heapq.heapreplace(l, 0) print(l)
def get_predicted_sentence(args, input_sentence, vocab, rev_vocab, model, sess, debug=False, return_raw=False): def model_step(enc_inp, dec_inp, dptr, target_weights, bucket_id): _, _, logits = model.step(sess, enc_inp, dec_inp, target_weights, bucket_id, forward_only=True) prob = softmax(logits[dptr][0]) # print("model_step @ %s" % (datetime.now())) return prob def greedy_dec(output_logits, rev_vocab): selected_token_ids = [ int(np.argmax(logit, axis=1)) for logit in output_logits ] if data_utils.EOS_ID in selected_token_ids: eos = selected_token_ids.index(data_utils.EOS_ID) selected_token_ids = selected_token_ids[:eos] output_sentence = ' '.join( [dict_lookup(rev_vocab, t) for t in selected_token_ids]) return output_sentence input_token_ids = data_utils.sentence_to_token_ids(input_sentence, vocab) # Which bucket does it belong to? bucket_id = min([ b for b in range(len(args.buckets)) if args.buckets[b][0] > len(input_token_ids) ]) outputs = [] feed_data = {bucket_id: [(input_token_ids, outputs)]} # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( feed_data, bucket_id) if debug: print("\n[get_batch]\n", encoder_inputs, decoder_inputs, target_weights) ### Original greedy decoding if args.beam_size == 1: _, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, forward_only=True) return [{"dec_inp": greedy_dec(output_logits, rev_vocab), 'prob': 1}] # Get output logits for the sentence. beams, new_beams, results = [(1, 0, { 'eos': 0, 'dec_inp': decoder_inputs, 'prob': 1, 'prob_ts': 1, 'prob_t': 1 })], [], [] # initialize beams as (log_prob, empty_string, eos) dummy_encoder_inputs = [ np.array([data_utils.PAD_ID]) for _ in range(len(encoder_inputs)) ] for dptr in range(len(decoder_inputs) - 1): if dptr > 0: target_weights[dptr] = [1.] beams, new_beams = new_beams[:args.beam_size], [] if debug: print("=====[beams]=====", beams) heapq.heapify(beams) # since we will remove something for prob, _, cand in beams: if cand['eos']: results += [(prob, 0, cand)] continue # normal seq2seq if debug: print( cand['prob'], " ".join( [dict_lookup(rev_vocab, w) for w in cand['dec_inp']])) all_prob_ts = model_step(encoder_inputs, cand['dec_inp'], dptr, target_weights, bucket_id) if args.antilm: # anti-lm all_prob_t = model_step(dummy_encoder_inputs, cand['dec_inp'], dptr, target_weights, bucket_id) # adjusted probability all_prob = all_prob_ts - args.antilm * all_prob_t #+ args.n_bonus * dptr + random() * 1e-50 else: all_prob_t = [0] * len(all_prob_ts) all_prob = all_prob_ts # suppress copy-cat (respond the same as input) if dptr < len(input_token_ids): all_prob[input_token_ids[dptr]] = all_prob[ input_token_ids[dptr]] * 0.01 # for debug use if return_raw: return all_prob, all_prob_ts, all_prob_t # beam search for c in np.argsort(all_prob)[::-1][:args.beam_size]: new_cand = { 'eos': (c == data_utils.EOS_ID), 'dec_inp': [(np.array([c]) if i == (dptr + 1) else k) for i, k in enumerate(cand['dec_inp'])], 'prob_ts': cand['prob_ts'] * all_prob_ts[c], 'prob_t': cand['prob_t'] * all_prob_t[c], 'prob': cand['prob'] * all_prob[c], } new_cand = (new_cand['prob'], random(), new_cand ) # stuff a random to prevent comparing new_cand try: if (len(new_beams) < args.beam_size): heapq.heappush(new_beams, new_cand) elif (new_cand[0] > new_beams[0][0]): heapq.heapreplace(new_beams, new_cand) except Exception as e: print("[Error]", e) print("-----[new_beams]-----\n", new_beams) print("-----[new_cand]-----\n", new_cand) results += new_beams # flush last cands # post-process results res_cands = [] for prob, _, cand in sorted(results, reverse=True): cand['dec_inp'] = " ".join( [dict_lookup(rev_vocab, w) for w in cand['dec_inp']]) res_cands.append(cand) return res_cands[:args.beam_size]
def add(self, val: int) -> int: if len(self.pool) < self.k: heapq.heappush(self.pool, val) elif val > self.pool[0]: heapq.heapreplace(self.pool, val) return self.pool[0]
import heapq N=int(input()) s=set() q=[0,0] for a in map(int,input().split()): if a>q[0]: try: s.remove(a) heapq.heapreplace(q,a) except: s.add(a) print(q[0]*q[1])
def CELF_improved(k, seedset): ''' Add some improvement to the tradictional CELF The speed is more fasted than tradictional CELF :param k: num of seed :param seedset: seedset from heuristic :return: seedset ''' global p, q_in, q_out, final_seed Rs = {1000: 10000} nodeHeap = [] preSpread = 0 for node in seedset: for qin in q_in: qin.put(False) qin.put(1000 / 7) qin.put({node}) qin.put(preSpread) result = [] for qout in q_out: result.append(qout.get(True)) high = sum(result) / len(result) nodeHeap.append((-high, high, node, -1, 100)) heapq.heapify(nodeHeap) for i1 in range(k): while nodeHeap[0][3] != i1 or nodeHeap[0][4] != 10000: maxOne = nodeHeap[0] newSeed = final_seed.copy() newSeed.add(maxOne[2]) if maxOne[3] == i1: thisR = Rs[maxOne[4]] else: thisR = 1000 if thisR == 10000: for qin in q_in: qin.put(True) qin.put(10000 / 7) qin.put(newSeed) qin.put(preSpread) result = [] for qout in q_out: result.append(qout.get(True)) delta = sum(result) / len(result) heapq.heapreplace(nodeHeap, (-delta, delta, maxOne[2], i1, thisR)) else: for qin in q_in: qin.put(False) qin.put(thisR / 7) qin.put(newSeed) qin.put(preSpread) result = [] for qout in q_out: result.append(qout.get(True)) high = sum(result) / len(result) heapq.heapreplace(nodeHeap, (-high, high, maxOne[2], i1, thisR)) winner = heapq.heappop(nodeHeap) preSpread = winner[1] + preSpread final_seed.add(winner[2])
def replace(self, item): self._validate_push(item) return heapq.heapreplace(self.heap, item)
""" Replacing in a Heap The heapreplace function always removes the smallest element of the heap and inserts the new incoming element at some place not fixed by any order. """ import heapq H = [21, 1, 45, 78, 3, 5] # Create the heap heapq.heapify(H) print(H) # Replace an element heapq.heapreplace(H, 6) print(H)
def add(self, val): if len(self.nums) < self.k: heappush(self.nums, val) elif val > self.nums[0]: heapreplace(self.nums, val) return self.nums[0]
# insert an element into heap heapq.heappush(lst, 10) print('Heap Push: ', lst) # pop min element from heap and return it # if heap is empty then it return an Index Error print('Heap pop:', heapq.heappop(lst)) print('Heap after pop: ', lst) # heap pushpop insert an element and return min ele from heap # this run fater than a comb of push and then pop print("Pop and push: ", heapq.heappushpop(lst, 12)) print("Heap: ", lst) # pop then push element to heap # raise index error if heap is empty print("Heap replace: ", heapq.heapreplace(lst, 0)) print("Heap: ", lst) # n largest element, in sorted order # if n is larger than size of heap no index error is shown print("3 largest Heap element: ", heapq.nlargest(3, lst)) # n minimum element, in sorted order # if n is larger than size of heap no index error is shown print("3 smallest elemet: ", heapq.nsmallest(3, lst)) # get no of elements in heap print(len(lst)) print(lst)
) # -> Value10 // The ChainMap will point to updated dict ### HeapQueue ### # Creating a HeapQueue import heapq list1 = [5, 7, 9, 1, 3] heapq.heapify(list1) #Convert list1 into a Heap heapq.heappush(list1, 4) #Push 4 elements into Heap print( list(list1) ) # -> [1, 3, 4, 7, 5, 9] // Heap could be converted into list type to be workable heapq.heappop(list1) #Return and remove the smalest element heapq.heappushpop(list1, 2) #Push and Pop simultaneosly heapq.heapreplace(list1, 2) #Pop first and Push after heapq.nlargest(3, list1) #Return the first '3' largest elements heapq.nsmallest(3, list1) #Return the first '3' smallest elements ### UserDict ### # Creating a UserDict from collections import UserDict dict1 = {'Key1': 'Value1', 'Key2': 'Value2', 'Key3': 5} user_dict = UserDict(dict1) user_dict.data #Access the dict1 content ## Creating a dict class with modified behavior -> Deletion not allowed class MyDict(UserDict):
def find_maximums(self, model, num, exclusive): tic = time.time() temp, n_iter, early_stop, log_interval = ( self.temp, self.n_iter, self.early_stop, self.log_interval, ) if self.persistent and self.points is not None: points = self.points else: points = np.array(sample_ints(0, len(self.task.config_space), self.parallel_size)) scores = model.predict(points) # build heap and insert initial points heap_items = [(float("-inf"), -1 - i) for i in range(num)] heapq.heapify(heap_items) in_heap = set(exclusive) in_heap.update([x[1] for x in heap_items]) for s, p in zip(scores, points): if s > heap_items[0][0] and p not in in_heap: pop = heapq.heapreplace(heap_items, (s, p)) in_heap.remove(pop[1]) in_heap.add(p) k = 0 k_last_modify = 0 if isinstance(temp, (tuple, list, np.ndarray)): t = temp[0] cool = 1.0 * (temp[0] - temp[1]) / (n_iter + 1) else: t = temp cool = 0 while k < n_iter and k < k_last_modify + early_stop: new_points = np.empty_like(points) for i, p in enumerate(points): new_points[i] = random_walk(p, self.dims) new_scores = model.predict(new_points) ac_prob = np.exp(np.minimum((new_scores - scores) / (t + 1e-5), 1)) ac_index = np.random.random(len(ac_prob)) < ac_prob points[ac_index] = new_points[ac_index] scores[ac_index] = new_scores[ac_index] for s, p in zip(new_scores, new_points): if s > heap_items[0][0] and p not in in_heap: pop = heapq.heapreplace(heap_items, (s, p)) in_heap.remove(pop[1]) in_heap.add(p) k_last_modify = k k += 1 t -= cool if log_interval and k % log_interval == 0: t_str = "%.2f" % t logger.debug( "SA iter: %d\tlast_update: %d\tmax-0: %.2f\tmax-1: %.2f\ttemp: %s\t" "elapsed: %.2f", k, k_last_modify, heap_items[0][0], np.max([v for v, _ in heap_items]), t_str, time.time() - tic, ) heap_items.sort(key=lambda item: -item[0]) heap_items = [x for x in heap_items if x[0] >= 0] logger.debug( "SA iter: %d\tlast_update: %d\telapsed: %.2f", k, k_last_modify, time.time() - tic ) logger.debug("SA Maximums: %s", heap_items) if self.persistent: self.points = points return [x[1] for x in heap_items]
def append(self, log_probability, new_state): if len(self.beam) < self.beam_size: heapq.heappush(self.beam, (log_probability, new_state)) else: heapq.heapreplace(self.beam, (log_probability, new_state))
def population_pop_push(heap, individual): """Put gene in heap, pushing out the one with the lowest fitness""" if individual[0] > heap[0][0]: heapq.heapreplace(heap, individual)
import heapq as h arr = [23, 45, 43, 56, 13, 69, 52, 64, 31, 45] h.heapify(arr) print(arr) h.heappush(arr, 44) print(arr) h.heappop(arr) print(arr) h.heapreplace(arr, 104) print(arr)
def Push(self, elem): if len(self.data) < self.k: heapq.heappush(self.data, elem) else: topk_small = self.data[0][0] if elem[0] > topk_small: heapq.heapreplace(self.data, elem)