class Beam(object): # WORKING: testing optimization bugs def __init__(self, size, lower_better=True): # def __init__(self, size, lower_better=False): # are bigger scores better or worse? if lower_better: self.hypotheses = SortedListWithKey(key=lambda x: x['score']) else: self.hypotheses = SortedListWithKey(key=lambda x: -x['score']) self.size = size def add(self, hyp, beam_constraints=[]): if all(check(hyp) for check in beam_constraints): self.hypotheses.add(hyp) if len(self.hypotheses) > self.size: assert len(self.hypotheses) == self.size + 1 del self.hypotheses[-1] def __len__(self): return len(self.hypotheses) def __iter__(self): for hyp in self.hypotheses: yield hyp
class PQ_Frequency(Sampling_Frequency): def __init__(self): super(PQ_Frequency, self).__init__(sampling=0) self.nodes = SortedListWithKey(key=self.objective_f) self.Nodes = DummyList() def add_node(self, item, cost): new_node = super(PQ_Frequency, self).add_node(item, cost) self.nodes.add(new_node) return new_node def touch(self, node): if not node.is_restoring: self.nodes.remove(node) super(PQ_Frequency, self).touch(node) if not node.is_restoring: self.nodes.add(node) def evict_node(self): to_evict = self.nodes.pop(0) if self.saving_counts: self.historical_nodes[to_evict.value] = to_evict self.last_obj_f = self.objective_f(to_evict) return to_evict
def test_copy_copy(): import copy slt = SortedListWithKey(range(100), load=7, key=modulo) two = copy.copy(slt) slt.add(100) assert len(slt) == 101 assert len(two) == 100
class ChanThread: def __init__(self, chan, thread_id): self.posts = SortedListWithKey(key=lambda post: post.timestamp) # post_id -> set(replies) self.replies_by_post_id = {} self.timestamp = 0 self.thread_id = thread_id self.chan = chan def get_posts(self): return self.posts def delete_post(self, post): try: self.posts.remove(post) except Exception as e: logger.exception("Exception removing post: {}".format(e)) def add_post(self, post): self.posts.add(post) self.update_post_links(post) if post.timestamp > self.timestamp: self.timestamp = post.timestamp def update_post_links(self, post): for postId in post.target_posts: if postId not in self.replies_by_post_id: self.replies_by_post_id[postId] = set([]) self.replies_by_post_id[postId].add(post.post_id) def get_post_replies(self, post_id): if post_id not in self.replies_by_post_id: return set([]) return self.replies_by_post_id[post_id]
class QueueTimer(TimerService): TimerEvent = NamedTuple('TimerEvent', [('timestamp', float), ('callback', Callable)]) def __init__(self, get_current_time=time.perf_counter): self._get_current_time = get_current_time self._events = SortedListWithKey(key=lambda v: v.timestamp) def queue_size(self): return len(self._events) def service(self): while len(self._events ) and self._events[0].timestamp <= self._get_current_time(): self._events.pop(0).callback() def get_current_time(self) -> float: return self._get_current_time() def schedule(self, delay: float, callback: Callable): timestamp = self._get_current_time() + delay self._events.add( self.TimerEvent(timestamp=timestamp, callback=callback)) def cancel(self, callback: Callable): indexes = [ i for i, ev in enumerate(self._events) if ev.callback == callback ] for i in reversed(indexes): del self._events[i]
def test_copy(): slt = SortedListWithKey(range(100), key=negate) slt._reset(7) two = slt.copy() slt.add(100) assert len(slt) == 101 assert len(two) == 100
def __attrs_post_init__(self): types = {} for k, v in self.types.items(): try: kind, name = k.split() assert kind in ['r32'] types[name] = Type(name, kind, **v) except: sys.stderr.write("Note: in type '{}':\n".format(k)) raise self.types = types registers = SortedListWithKey(key=lambda x: x.location) for k, v in self.registers.items(): try: kind, location = k.split(' ', 1) if isinstance(v, str): name = v config = {} else: name = v.pop('name') config = v assert name in self.types.keys() register = Register(name, kind, location, config.get('type')) registers.add(register) except: sys.stderr.write("Note: in register '{}':\n".format(k)) raise self.registers = registers
class ListEventStream(SimEventStream): def __init__(self, events: Iterable[SimEvent] = ()): self._events = SortedListWithKey(iterable=events, key=lambda ev: ev.timestamp) def add(self, event): self._events.add(event) def extend(self, events): self._events.update(events) def remove_all(self, predicate: Callable): indexes = [i for i, ev in enumerate(self._events) if predicate(ev)] for i in reversed(indexes): del self._events[i] @property def events(self): return self._events[:] def advance(self, _): self._events.pop(0) def peek(self) -> Optional[SimEvent]: if len(self._events) > 0: return self._events[0] def sort(self): pass
def _characterize_signal(beg, end): """ Characterizes the available signal in a specific time interval. Parameters ---------- beg: Starting time point of the interval. end: Last time point of the interval. Returns ------- out: sortedlist with one entry by lead. Each entry is a 5-size tuple with the lead, the signal samples, the relevant points to represent the samples, the baseline level estimation for the fragment, and the quality of the fragment in that lead. """ siginfo = SortedListWithKey(key=lambda v: -v.quality) for lead in sig_buf.get_available_leads(): baseline, quality = characterize_baseline(lead, beg, end) sig = sig_buf.get_signal_fragment(beg, end, lead=lead)[0] if len(sig) == 0: return None #We build a signal simplification taking at most 9 points, and with #a minimum relevant deviation of 50 uV. points = RDP.arrayRDP(sig, C.RDP_MIN_DIST, C.RDP_NPOINTS) siginfo.add(LeadInfo(lead, sig, points, baseline, quality)) return siginfo
def rangecover(whole: Range, covered: Iterable[Range]) -> Iterable[Range]: remainings = [whole] covered = set(covered) selected = SortedListWithKey(key=lambda x: x[0]) while len(remainings) and len(covered): bestval, best = 0.0, None for curr in covered: currval = 0.0 for gap in remainings: cover = intersect(gap, curr) if cover: currval += cover[1] - cover[0] if currval > bestval: bestval, best = currval, curr if not best: return yield best selected.add(best) covered.remove(best) remainings = list(gaps(selected, whole))
class PositionColumn(NumericColumn): def __init__(self, table, col_id, col_info): super(PositionColumn, self).__init__(table, col_id, col_info) # This is a list of row_ids, ordered by the position. self._sorted_rows = SortedListWithKey(key=self.raw_get) def set(self, row_id, value): self._sorted_rows.discard(row_id) super(PositionColumn, self).set(row_id, value) if value != self.getdefault(): self._sorted_rows.add(row_id) def copy_from_column(self, other_column): super(PositionColumn, self).copy_from_column(other_column) self._sorted_rows = SortedListWithKey(other_column._sorted_rows[:], key=self.raw_get) def prepare_new_values(self, values, ignore_data=False, action_summary=None): # This does the work of adjusting positions and relabeling existing rows with new position # (without changing sort order) to make space for the new positions. Note that this is also # used for updating a position for an existing row: we'll find a new value for it; later when # this value is set, the old position will be removed and the new one added. if ignore_data: rows = SortedListWithKey([], key=self.raw_get) else: rows = self._sorted_rows adjustments, new_values = relabeling.prepare_inserts(rows, values) return new_values, [(self._sorted_rows[i], pos) for (i, pos) in adjustments]
def CreateString(n, k): initialValue = 'A' * n char_list = list(initialValue) h = Strategy(k) candidates = SortedListWithKey(key=h.evaluate_node) candidates.add(char_list) altura = 0 while altura < k + 1: try: candidate = candidates.pop(0) punctuation = h.evaluate_node(candidate) if punctuation == 0: return ''.join(candidate) l = expand(candidate) for expanded in l: punctuation = h.evaluate_node(expanded) if punctuation >= 0: candidates.add(expanded) altura += 1 except IndexError: return '' return ''
def extract_collocations(self, metric_class): assert issubclass(metric_class, Metric) metric = metric_class() collocations = SortedListWithKey(key=lambda x: -x[0]) unigram_counts = self.language_model.get_unigrams() bigram_counts = self.language_model.get_bigrams() for (first, last), freq_bigram in bigram_counts.items(): if self.exclude_punctuation: if first in self.PUNCT or last in self.PUNCT or \ self.INITIALS.match(first) or self.INITIALS.match(last): continue if self.exclude_conj: if first in self.CONJ_RU or last in self.CONJ_RU: continue if self.exclude_props: if first in self.PROPOSITIONS_RU or last in self.PROPOSITIONS_RU: continue freq_first, freq_last = unigram_counts[first], unigram_counts[last] metric_val = metric.evaluate(freq_first, freq_last, freq_bigram, self.language_model.get_vocab_size()) collocations.add( (metric_val, freq_first, freq_last, freq_bigram, first, last)) return collocations
def search(self, word: str, distance=0) -> SortedListWithKey: """ Returns candidates list of words that equal to the given word after its modifying with Levenstein (DL) distance :param word Misspelled word :param distance Maximum distance for candidates where their cost could be less than given parameter :return array of candidates with their distances """ candidates = SortedListWithKey(key=lambda x: x[::-1]) stack = [(children, [letter], None, [*range(self.__get_row_len(word))]) for letter, children in self.root.items()] while stack: node, prefix, pre_prev_row, prev_row = stack.pop() curr_row, min_dist = self.__calculate_distance(word, prefix, pre_prev_row, prev_row) if min_dist > distance: continue if curr_row[-1] <= distance and self.__END in node: candidates.add((''.join(prefix), curr_row[-1])) stack.extend( (children, prefix + [letter], prev_row if self.use_damerau_modification else None, curr_row) for letter, children in node.items() if letter != self.__END ) return candidates
def test_getitem_slice(): random.seed(0) slt = SortedListWithKey(key=negate) slt._reset(17) lst = list() for rpt in range(100): val = random.random() slt.add(val) lst.append(val) lst.sort(reverse=True) assert all(slt[start:] == lst[start:] for start in [-75, -25, 0, 25, 75]) assert all(slt[:stop] == lst[:stop] for stop in [-75, -25, 0, 25, 75]) assert all(slt[::step] == lst[::step] for step in [-5, -1, 1, 5]) assert all(slt[start:stop] == lst[start:stop] for start in [-75, -25, 0, 25, 75] for stop in [-75, -25, 0, 25, 75]) assert all(slt[:stop:step] == lst[:stop:step] for stop in [-75, -25, 0, 25, 75] for step in [-5, -1, 1, 5]) assert all(slt[start::step] == lst[start::step] for start in [-75, -25, 0, 25, 75] for step in [-5, -1, 1, 5]) assert all(slt[start:stop:step] == lst[start:stop:step] for start in [-75, -25, 0, 25, 75] for stop in [-75, -25, 0, 25, 75] for step in [-5, -1, 1, 5])
class Map: default_converters = { 'any': AnyConverter, 'default': StringConverter, 'float': FloatConverter, 'int': IntegerConverter, 'path': PathConverter, 'string': StringConverter, 'uuid': UUIDConverter, } def __init__(self, host_matching: bool = False) -> None: self.rules = SortedListWithKey(key=lambda rule: rule.match_key) self.endpoints: Dict[str, SortedListWithKey] = defaultdict( lambda: SortedListWithKey(key=lambda rule: rule.build_key)) # noqa self.converters = self.default_converters.copy() self.host_matching = host_matching def add(self, rule: 'Rule') -> None: rule.bind(self) self.endpoints[rule.endpoint].add(rule) self.rules.add(rule) def bind_to_request( self, scheme: str, server_name: str, method: str, path: str, ) -> 'MapAdapter': return MapAdapter(self, scheme, server_name, method, path) def bind(self, scheme: str, server_name: str) -> 'MapAdapter': return MapAdapter(self, scheme, server_name)
def _characterize_signal(beg, end): """ Characterizes the available signal in a specific time interval. Parameters ---------- beg: Starting time point of the interval. end: Last time point of the interval. Returns ------- out: sortedlist with one entry by lead. Each entry is a 5-size tuple with the lead, the signal samples, the relevant points to represent the samples, the baseline level estimation for the fragment, and the quality of the fragment in that lead. """ siginfo = SortedListWithKey(key=lambda v: -v.quality) for lead in sig_buf.get_available_leads(): baseline, quality = characterize_baseline(lead, beg, end) sig = sig_buf.get_signal_fragment(beg, end, lead=lead)[0] if len(sig) == 0: return None # We build a signal simplification taking at most 9 points, and with # a minimum relevant deviation of 50 uV. points = RDP.arrayRDP(sig, C.RDP_MIN_DIST, C.RDP_NPOINTS) siginfo.add(LeadInfo(lead, sig, points, baseline, quality)) return siginfo
def test_getitem_slice(): random.seed(0) slt = SortedListWithKey(load=17, key=modulo) lst = list() for rpt in range(100): val = random.random() slt.add(val) lst.append(val) lst.sort(key=modulo) assert all(slt[start:] == lst[start:] for start in [-75, -25, 0, 25, 75]) assert all(slt[:stop] == lst[:stop] for stop in [-75, -25, 0, 25, 75]) assert all(slt[::step] == lst[::step] for step in [-5, -1, 1, 5]) assert all(slt[start:stop] == lst[start:stop] for start in [-75, -25, 0, 25, 75] for stop in [-75, -25, 0, 25, 75]) assert all(slt[:stop:step] == lst[:stop:step] for stop in [-75, -25, 0, 25, 75] for step in [-5, -1, 1, 5]) assert all(slt[start::step] == lst[start::step] for start in [-75, -25, 0, 25, 75] for step in [-5, -1, 1, 5]) assert all(slt[start:stop:step] == lst[start:stop:step] for start in [-75, -25, 0, 25, 75] for stop in [-75, -25, 0, 25, 75] for step in [-5, -1, 1, 5])
def CreateString(n, k): initialValue = 'A' * n char_list = list(initialValue) h = Strategy(k) candidates = SortedListWithKey(key=h.evaluate_node) candidates.add(char_list) altura = 0 while altura < k+1: try: candidate = candidates.pop(0) punctuation = h.evaluate_node(candidate) if punctuation == 0: return ''.join(candidate) l = expand(candidate) for expanded in l: punctuation = h.evaluate_node(expanded) if punctuation >= 0: candidates.add(expanded) altura += 1 except IndexError: return '' return ''
def solve(self, n_threads: int, n_individuals: int, topol=topology.unconnected()) -> SortedList: solutions = SortedListWithKey(key=Solution.get_cost) try: iteration = 1 solution = self._generate_solution(self._start_problem, n_individuals, n_threads, topol) solutions.add(solution) self._logger.info("({}) - New solution found.".format(iteration)) problem, do_iteration = self._iterate(self._probl_factory, solution) while do_iteration: solution = self._generate_solution(problem, n_individuals, n_threads, topol) solutions.add(solution) iteration += 1 self._logger.info( "({}) - New solution found.".format(iteration)) problem, do_iteration = self._iterate(self._probl_factory, solution) self._logger.info("Differential evolution completed.") # FIXME - is horrible to have to catch all possible exceptions but # requires a bit of time to understand all the possible exceptions # that can be thrown. except: self._logger.exception("Exception occurred during solution...") self._logger.error("Returning solutions found so far") return solutions
def sort_list(scenario): l = SortedListWithKey(key=lambda item: item[0]) #leemos el archivo con los resultados finales aux = [] archivo = os.path.join(app.config['RESULTS']) with open(archivo, 'r') as f: data = f.readlines() for line in data: item = line.split(',') item = [convertir(x) for x in item] l.add(item) # Imprimimos al archivo en la salida final save_path = os.path.join(app.config['OUT']) file = open(save_path, "w") file.write( '###################################################################\n' ) file.write( '# Escenario {} #\n' .format(scenario)) file.write( '###################################################################\n\n' ) file.write('#t,Sb,DCc,Vj,Vcpu,Vram,Vnet,Ucpu,Uram,Unet,R,SLA,tinit,tend\n') file.write('\n') for item in l: cadena = ','.join(map(str, item)) file.write(cadena) file.write('\n') file.close()
def test_key2(): class Incomparable: pass a = Incomparable() b = Incomparable() slt = SortedListWithKey(key=lambda val: 1, value_orderable=False) slt.add(a) slt.add(b) assert slt == [a, b]
def test_key2(): class Incomparable: pass a = Incomparable() b = Incomparable() slt = SortedListWithKey(key=lambda val: 1) slt.add(a) slt.add(b) assert slt == [a, b]
class OrderedNonUnique(Index): unique = False def __init__(self, key, name=None): super(OrderedNonUnique, self).__init__(key, name) self._data = SortedListWithKey(key=self._getkey) def _insert(self, value): self._data.add(value) def _update(self, old_value, new_value): self._delete(old_value) self._insert(new_value) def _delete(self, value): self._data.remove(value) def __getitem__(self, key): """Not sure if we should return an iterator or a list of values... I'm thinking iterator!""" return self.irange_key(key, key) def __len__(self): return len(self._data) def __contains__(self, value): return value in self._data def __reversed__(self): return reversed(self._data) def __iter__(self): return self._data.irange() def __repr__(self): return self._data.__repr__() def count(self, val): return self._data.count(val) def count_key(self, key): itr = self._data.irange_key(key, key) num_items = 0 for _ in itr: num_items += 1 return num_items def islice(self, start=None, stop=None, reverse=False): return self._data.islice(start, stop, reverse) def irange(self, minimum=None, maximum=None, inclusive=(True, True), reverse=False): return self._data.irange(minimum, maximum, inclusive, reverse) def irange_key(self, min_key=None, max_key=None, inclusive=(True, True), reverse=False): return self._data.irange_key(min_key, max_key, inclusive, reverse)
def __mul__(self, other): assert isinstance(other, Set) if len(self.list) == 0: return Set() list = SortedListWithKey(key=self.list._key) for x in self.list: if x in other.list: list.add(x) s = Set(list=list) return s
class HighestReplay(Replay): def __init__(self, max_size: int): super().__init__() self.episodes = SortedListWithKey(key=get_reward) self.max_size = max_size def add(self, episode: Episode): self.episodes.add(episode) self.known_returns.append(episode.total_reward) self.known_horizons.append(episode.steps) if len(self.episodes) > self.max_size: self.episodes.pop(0)
def test_count(): slt = SortedListWithKey(load=7, key=negate) assert slt.count(0) == 0 for iii in range(100): for jjj in range(iii): slt.add(iii) slt._check() for iii in range(100): assert slt.count(iii) == iii
def test_count(): slt = SortedListWithKey(load=7, key=modulo, value_orderable=False) assert slt.count(0) == 0 for iii in range(100): for jjj in range(iii): slt.add(iii) slt._check() for iii in range(100): assert slt.count(iii) == iii
class LRUCache: READ = 'read' WRITE = 'write' def __init__(self, size, mode): self._size = size self._mode = mode self._cache = {} self._used = {} if mode == self.READ: self._time = 0 elif mode == self.WRITE: self._times = SortedListWithKey(key=self._used.get) def _use(self, key): if self._mode == self.READ: self._used[key] = self._time self._time += 1 elif self._mode == self.WRITE: if self._times: if key in self._used: self._times.discard(key) self._used[key] = self._used[self._times[-1]] + 1 else: self._used[key] = 0 self._times.add(key) def _remove(self): if self._mode == self.READ: lru = min(self._used, key=self._used.get) del self._cache[lru] del self._used[lru] elif self._mode == self.WRITE: lru = self._times.pop(0) del self._cache[lru] del self._used[lru] def get(self, key): item = self._cache.get(key) if item is None: return None self._use(key) return item def set(self, key, value): if key not in self._cache: if len(self._cache) == self._size: self._remove() self._cache[key] = value self._use(key) def __repr__(self): return repr(self._cache)
class Map: default_converters = { 'any': AnyConverter, 'default': StringConverter, 'float': FloatConverter, 'int': IntegerConverter, 'path': PathConverter, 'string': StringConverter, 'uuid': UUIDConverter, } def __init__(self, host_matching: bool = False) -> None: self.rules = SortedListWithKey(key=lambda rule: rule.match_key) self.endpoints: Dict[str, SortedListWithKey] = defaultdict( lambda: SortedListWithKey(key=lambda rule: rule.build_key)) # noqa self.converters = self.default_converters.copy() self.host_matching = host_matching def add(self, rule: 'Rule') -> None: rule.bind(self) self.endpoints[rule.endpoint].add(rule) self.rules.add(rule) def bind_to_request( self, secure: bool, server_name: str, method: str, path: str, query_string: bytes, websocket: bool, root_path: str, ) -> 'MapAdapter': return MapAdapter( self, secure, server_name, method, path, query_string, websocket, root_path, ) def bind(self, secure: bool, server_name: str) -> 'MapAdapter': return MapAdapter(self, secure, server_name) def iter_rules(self, endpoint: Optional[str] = None) -> Iterator['Rule']: if endpoint is not None: return iter(self.endpoints[endpoint]) return iter(self.rules)
def create_palette(color_depth=8): """ Create palette of all colors for color_depth bit rate. """ palette = SortedListWithKey(load=1000, key=lambda c: c.avg) scale = (MAX_COLOR_DEPTH / 2**color_depth) for x in range(0, 2**color_depth): for y in range(0, 2**color_depth): for z in range(0, 2**color_depth): r = x*scale g = y*scale b = z*scale palette.add( Color(r=r, g=g, b=b, avg=int(avg([r,g,b]))) ) return palette
def __add__(self, other): assert isinstance(other, Set) if len(self.list) == 0 and len(other.list) == 0: return Set() elif len(self.list) == 0: return copy.copy(other) else: list = SortedListWithKey(key=self.list._key) list.update(copy.deepcopy(self.list)) other_list_copy = copy.deepcopy(other.list) for x in other_list_copy: if x not in list: list.add(x) return Set(list=list)
class ExamRoom: def __init__(self, N): self.N = N self.p = {-1: (-1, N)} self.q = {N: (-1, N)} # 如果距离相同,选序号最小的 self.pq = SortedListWithKey([(-1, N)], key=lambda x: (self.distance(x[0], x[1]), -x[0])) def distance(self, a, b): if a == -1: return b if b == self.N: return self.N - 1 - a return (b - a) // 2 def _remove(self, a, b): self.p.pop(a) self.q.pop(b) self.pq.remove((a, b)) def _add(self, a, b): self.p[a] = (a, b) self.q[b] = (a, b) self.pq.add((a, b)) def seat(self): a, b = self.pq[-1] if a == -1: p = 0 elif b == self.N: p = self.N - 1 else: p = (b - a) // 2 + a self._remove(a, b) self._add(a, p) self._add(p, b) return p def leave(self, p): la, lb = self.q[p] ra, rb = self.p[p] self._remove(la, lb) self._remove(ra, rb) self._add(la, rb) def reqp(self): return self.pq
def neiborDistances(self, node): """Return distances to neighbor nodes (including self). Args: node (int): The node identifier from which to calculate distances. Returns: :obj:`SortedListWithKey` of :obj:`list` of int: List of [neighbor, distance] lists sorted in ascending order. """ neighborDist = SortedListWithKey(key=lambda n: n[1]) for n in self.__nodes.iterkeys(): neighborDist.add([n, bin(node ^ n).count("1")]) return neighborDist
def resize(self, size=None): """ remove edges (and their corresponding nodes, in case they become disconnected from the other components), until self.size() == size. if no specific size parameter is provided, self.max_size is used. Edges with highest distances are removed first. :param int size: size of graph after finishing """ #: do nothing if no specific size is given if size is None and self.max_size is None: return elif size is None: size = self.max_size n_edges = self.number_of_edges() if n_edges <= size: return #: find out how many edges have to be removed in order to #: achieve size as final size after the operation size_diff = n_edges - size #: store removal candidates in sorted list, sorted by distance candidates = SortedListWithKey(key=lambda x: x[2]) #: iterate over all edges and store candidates with highest #: distance value for node1, node2 in self.edges(): dist = self.edge[node1][node2]['distance'] if len(candidates) < size_diff: candidates.add((node1, node2, dist)) elif candidates[0][2] < dist: # remove candidate with lowest distance del candidates[0] # and add new candidate instead candidates.add((node1, node2, dist)) # remove candidates self.remove_edges_from(candidates) # get a set of nodes from list of tuples nodes = set([node for tup in candidates for node in tup[:2]]) # now remove nodes for node in nodes: if len(self.edge[node]) == 0: self.remove_node(node)
def test_getitem(): random.seed(0) slt = SortedListWithKey(load=17, key=negate) lst = list() for rpt in range(100): val = random.random() slt.add(val) lst.append(val) lst.sort(reverse=True) assert all(slt[idx] == lst[idx] for idx in range(100)) assert all(slt[idx - 99] == lst[idx - 99] for idx in range(100))
def test_count(): slt = SortedListWithKey(load=7, key=modulo, value_orderable=False) assert slt.count(0) == 0 for iii in range(100): for jjj in range(iii): slt.add(iii) slt._check() for iii in range(100): assert slt.count(iii) == iii slt = SortedListWithKey(range(8), key=modulo, value_orderable=False) assert slt.count(9) == 0
class GD_PQ(object): def __init__(self, name = None, **kwargs): super(GD_PQ, self).__init__() if name: self.name = name else: self.name = self.__class__.__name__ self.H = 0 # self.nodes = SortedCollection(key=attrgetter('priority')) self.error_numer = 0 self.error_denom = 1 self.size_aware = False self.nodes = SortedListWithKey(key = attrgetter('priority')) self.time = 0 def add_node(self, item, cost, size = 1): if self.size_aware and size != 1: cost = float(cost) / size new_node = PQNode(item, cost) new_node.priority = self.H + new_node.cost # self.nodes.insert_right(new_node) self.nodes.add(new_node) self.time += 1 return new_node def touch(self, node): self.nodes.remove(node) self.update_priority(node) # self.nodes.insert_right(node) self.nodes.add(node) self.time += 1 def update_priority(self, node): node.priority = self.H + node.cost def evict_node(self): # to_evict = self.nodes[0] # del self.nodes[0] to_evict = self.nodes.pop(0) self.H = to_evict.priority return to_evict
def sorted_iterable(iterable, key=None, buffer=100): """sorts an "almost sorted" (infinite) iterable :param iterable: iterable :param key: function used as sort key :param buffer: int size of buffer. elements to swap should not be further than that """ key=key or identity from sortedcontainers import SortedListWithKey b=SortedListWithKey(key=key) for x in iterable: if buffer and len(b)>=buffer: res=b.pop(0) yield res b.add(x) for x in b: # this never happens if iterable is infinite yield x
def anysegmentsintersect(segments): """returns True or False :param segments: line segments :type segments: list of pairs of tuples representing endpoints :return: whether there are any intersections """ l_endpoints = {seg[0]:seg[1] for seg in segments} r_endpoints = {seg[1]:seg[0] for seg in segments} if (len(l_endpoints) < len(segments)) or (len(r_endpoints) < len(segments)): return True endpoints = sorted(reduce(lambda xs,x:xs+[(x[0],0,x[0][1])]+[(x[1],1,x[1][1])],segments,[])) sweep = SortedListWithKey(endpoints[0], key=itemgetter(1)) for e in endpoints[1:]: if e in l_endpoints and e in r_endpoints: return True elif e in l_endpoints: sweep.add(e) # well this is dumb ind = sweep.index(e) try: if segmentsintersect((e,l_endpoints[e]), (sweep[ind+1],l_endpoints[sweep[ind+1]])): return True except IndexError: try: if segmentsintersect((e,l_endpoints[e]), (sweep[ind-1],l_endpoints[sweep[ind-1]])): return True except IndexError: pass elif e in r_endpoints: # well this is dumb ind = sweep.index(e) try: if (segmentsintersect((e,l_endpoints[e]), (sweep[ind+1],l_endpoints[sweep[ind+1]])) and segmentsintersect((e,l_endpoints[e]), (sweep[ind-1],l_endpoints[sweep[ind-1]]))): return True except IndexError: pass del sweep[ind] return False
class TwitterNode(collections.Iterable): """Class representing a Twitter Node or Vertex on Graph.""" def __init__(self, name, tweet): """initialize the node class. name:string tweet:Tweet """ self.__nodename__ = name self.__tweets__ = SortedListWithKey(key=lambda d: d.created_at) self.add(tweet) @property def name(self): """get the name for the node.""" return self.__nodename__ @property def tweets(self): """get the tweets for node.""" return list(self.__tweets__) def __iter__(self): """iterate through tweets.""" return self.__tweets__ def add(self, tweet): """add tweet for node.""" self.__tweets__.add(tweet) def remove(self, tweet): """add remove for node.""" self.__tweets__.remove(tweet) def __len__(self): """len of tweets for node.""" return len(self.__tweets__) def __str__(self): """string representing node.""" return "Name: %s Tweets: %s" % (self.name, self.tweets)
def test_add(): random.seed(0) slt = SortedListWithKey(key=modulo) for val in range(1000): slt.add(val) slt._check() slt = SortedListWithKey(key=modulo) for val in range(1000, 0, -1): slt.add(val) slt._check() slt = SortedListWithKey(key=modulo) for val in range(1000): slt.add(random.random()) slt._check()
class Set(): """ Constructor, to either pass generators or a already existing list containing the data (mostly used internally). Attention: If a set is passed, the reference to the internal member list. """ def __init__(self, *args, list=None, keep_generators=False): if list is not None: assert isinstance(list, SortedListWithKey) self.list = list self.has_key = True else: self.list = SortedListWithKey() self.has_key = False for arg in args: if isinstance(arg, types.GeneratorType) \ and not keep_generators: for x in arg: self._put(x) else: self._put(arg) """ Overrides the add operator. Creates a new set not referencing the added sets. """ def __add__(self, other): assert isinstance(other, Set) if len(self.list) == 0 and len(other.list) == 0: return Set() elif len(self.list) == 0: return copy.copy(other) else: list = SortedListWithKey(key=self.list._key) list.update(copy.deepcopy(self.list)) other_list_copy = copy.deepcopy(other.list) for x in other_list_copy: if x not in list: list.add(x) return Set(list=list) """ Overrides the += operator. Adds all elements of an other set to the current set. """ def __iadd__(self, other): assert isinstance(other, Set) for x in other.list: self._put(x) return self """ Overrides the substraction operator. Removes every element from the current set, that is in the other set. """ def __sub__(self, other): assert isinstance(other, Set) if len(self.list) == 0: return Set() list = SortedListWithKey(key=self.list._key) for x in self.list: if x not in other.list: list.add(x) s = Set(list=list) return s """ Overrides the multiplication operator. Creates a new set only containing elements existing in both sets. """ def __mul__(self, other): assert isinstance(other, Set) if len(self.list) == 0: return Set() list = SortedListWithKey(key=self.list._key) for x in self.list: if x in other.list: list.add(x) s = Set(list=list) return s """ Method to represent the current set as string. """ def __str__(self): return '{ %s }' % ', '.join([str(x) for x in self.list]) """ Overrides the pow operator (set ** 2). Returns the powerset. """ def __pow__(self, other): return self.__rpow__(other) """ Overrides the pow operator (2 ** set). Returns the powerset. """ def __rpow__(self, other): assert isinstance(other, int) if other != 2: raise Exception('Lefthandside is not 2. To generate the powerset use "2 ** myset"') from copy import deepcopy copied_set = deepcopy(self) return Set.power(copied_set) """ Overrides the modulo operator. """ def __mod__(self, other): assert isinstance(other, Set) return (self - other) + (other - self) """ Returns the iterator of the internal list. """ def __iter__(self): return self.list.__iter__() """ Returns the number of elements contained in the set. """ def __len__(self): return len(self.list) """ Overrides the lower than equal operator, indicating if a set is contained in an other set or both sets are equal. """ def __lt__(self, other): assert isinstance(other, Set) if len(self.list) >= len(other.list): return False return all(x in other.list for x in self.list) """ Overrides the greater than equal operator, indicating if a set contains an other set or both sets are equal. """ def __gt__(self, other): assert isinstance(other, Set) if len(self.list) <= len(other.list): return False return all(x in self.list for x in other.list) """ Overrides the greater than operator indicating if a set contains an other set but is not the same. """ def __ge__(self, other): assert isinstance(other, Set) for x in other.list: if x not in self.list: return False return True """ Overrides the lower than operator indicating if this set is contained in an other set but is not the smae. """ def __le__(self, other): assert isinstance(other, Set) for x in self.list: if x not in other.list: return False return True """ Overrides the equals operator. Indicates if two sets contain the same elements. """ def __eq__(self, other): assert isinstance(other, Set) if len(self.list) != len(other.list): return False for x in self.list: if x not in other.list: return False for x in other.list: if x not in self.list: return False return True """ Overrides the not equals operator. Indicates if two sets are not equal. """ def __ne__(self, other): assert isinstance(other, Set) return not self == other """ Overrides the in operator, indicates if the set contains the element. """ def __contains__(self, other): return other in self.list """ Returns the __getitem__ method of the internal list, to support array slicing. """ def __getitem__(self, key): return self.list.__getitem__(key) """ Returns the cartesian product of the current set. """ def cartesian_product(self, other): assert isinstance(other, Set) s = Set() for x in self.list: for y in other.list: s.list.add((x, y)) return s """ Creates a power set from a set. """ @staticmethod def power(s): if len(s) == 0: return Set(Set()) x = s.pop() y = Set.power(s) z = Set(m + Set(x) for m in y) return y + z """ Returns a arbitary element from set. """ def arb(self): return self.list[-1] if len(self.list) % 2 == 0 else self.list[0] """ Returns a random element from set. """ def rnd(self): return self.list[random.randrange(0, len(self.list))] """ Adds an element to the current set. """ def put(self, other): self._put(other) """ Internal _put method to keep track of the added element. To define if the current set contains sets. """ def _put(self, other): if self.has_key is False: self.has_key = True if isinstance(other, Set): self.list._key = gen_set_key if other not in self.list: self.list.add(other) """ Returns the last element of the set. """ def peek(self): return self.list[-1] """ Returns the last element of the set and removes it. """ def pop(self): x = self.list.pop() return x """ Returns the sum of all elements inside the sets. Using the + / += operators. """ def sum(self): import copy temp = None for x in self.list: if temp is None: temp = copy.deepcopy(x) else: temp += x return temp
def test_copy(): slt = SortedListWithKey(range(100), load=7, key=negate) two = slt.copy() slt.add(100) assert len(slt) == 101 assert len(two) == 100
new_indices = [ (1,0), (0,1) ] largest_33_found = None largest_index_found = None TO_FIND = 3 for n in itertools.count(2): ## openings.pop() ## openings_values.pop() ## openings_indices.pop() combined.pop() for new_opening, new_opening_index in zip(new_cands, new_indices): combined.add([new_opening, solve(new_opening)[0], new_opening_index]) ### value = solve(new_opening)[0] ### insertion_point = bisect.bisect_left(openings_values, value) ### insertion_point = combined.bisect_left(value) ### openings.insert(insertion_point, new_opening) ### openings_values.insert(insertion_point, value) ### openings_indices.insert(insertion_point, new_opening_index) # opening = openings[-1] opening = combined[-1][0] ind = combined[-1][2] solved = solve(opening) new_selection = solved[0] new_cands = solved[1]
def export_hit_summary(multiClusterDict,outfile,hitDictID,maxJump = 100,minClusterSize = 2, hitsToConsider = set(), hitsToIgnore = set(),writeFile=False): # First Unpack Cluster Analysis to Filter Eligible Clusters filtered_clusters = SortedListWithKey(key=lambda x: -clusterHits) for (species,species_clusters) in multiClusterDict.items(): for cluster in species_clusters: clusterHits = len(cluster) if hitsToConsider: clusterHits = sum([1 for protein in cluster if len(hitsToConsider & protein.hit_dict[hitDictID].hits) > 0]) - \ sum([1 for protein in cluster if len(hitsToIgnore & protein.hit_dict[hitDictID].hits) > 0]) else: clusterHits = len(cluster) - \ sum([1 for protein in cluster if len(hitsToIgnore & protein.hit_dict[hitDictID].hits) > 0]) # Check size first if clusterHits >= minClusterSize: proteinIdx = [protein.idx for protein in cluster] proteinIdxDiff = [abs(j-i) for i,j in zip(proteinIdx, proteinIdx[1:])] proteinMaxJump = max(proteinIdxDiff) proteinMaxIdx = proteinIdxDiff.index(proteinMaxJump) # if the max gap happens at the start or end of the cluster see if removing that protein will have # it fit the threshhold while clusterHits > minClusterSize and (max(proteinIdxDiff) > maxJump) and \ (proteinMaxIdx in [0,len(proteinIdxDiff)-1]): if proteinMaxIdx == 0: cluster.pop(proteinMaxIdx) else: cluster.pop(proteinMaxIdx+1) if hitsToConsider: clusterHits = sum([1 for protein in cluster if len(hitsToConsider & protein.hit_dict[hitDictID].hits) > 0]) - \ sum([1 for protein in cluster if len(hitsToIgnore & protein.hit_dict[hitDictID].hits) > 0]) else: clusterHits = len(cluster) - \ sum([1 for protein in cluster if len(hitsToIgnore & protein.hit_dict[hitDictID].hits) > 0]) proteinIdx = [protein.idx for protein in cluster] proteinIdxDiff = [abs(j-i) for i,j in zip(proteinIdx, proteinIdx[1:])] proteinMaxJump = max(proteinIdxDiff) proteinMaxIdx = proteinIdxDiff.index(proteinMaxJump) if max(proteinIdxDiff) < maxJump: if hitsToConsider: clusterHits = sum([1 for protein in cluster if len(hitsToConsider & protein.hit_dict[hitDictID].hits) > 0]) - \ sum([1 for protein in cluster if len(hitsToIgnore & protein.hit_dict[hitDictID].hits) > 0]) else: clusterHits = len(cluster) - \ sum([1 for protein in cluster if len(hitsToIgnore & protein.hit_dict[hitDictID].hits) > 0]) print(cluster) filtered_clusters.add(cluster) print("Found %i clusters" % len(filtered_clusters)) if writeFile: with open(outfile,'w') as outHandle: outHandle.write('Protein Hit\tProtein Idx\tCluster Hit\n') ctr = 1 for cluster in filtered_clusters: outHandle.write('# Species: %s\tNumber Proteins: %i\tDNA Size: %i\n' % (cluster[0].species,len(cluster),cluster.size())) outHandle.write('# Protein Hit\tProtein Idx\tCluster Hit\n') for protein in cluster: if len(hitsToIgnore & protein.hit_dict[hitDictID].hits) >= 1: outHandle.write('%s\t%i\t##%s\n'% (protein.name,protein.idx,list(protein.hit_dict[hitDictID].hits & hitsToIgnore)[0][0])) elif len(hitsToConsider & protein.hit_dict[hitDictID].hits) >= 1: outHandle.write('%s\t%i\t**%s\n'% (protein.name,protein.idx,list(protein.hit_dict[hitDictID].hits & hitsToConsider)[0][0])) elif len(protein.hit_dict[hitDictID].hits) >= 1: outHandle.write('%s\t%i\t%s\n'% (protein.name,protein.idx,list(protein.hit_dict[hitDictID].hits)[0][0])) else: outHandle.write('%s\t%i\tNo Hits\n'% (protein.name,protein.idx)) return filtered_clusters
# openings.pop() # openings_values.pop() # openings_indices.pop() combined.pop() for new_opening, new_opening_index in zip(new_cands, new_indices): value = solve(new_opening)[0] ## insertion_point = bisect.bisect_left(openings_values, value) ### insertion_point = openings_values.bisect_left(value) # openings_values.insert(insertion_point, value) # openings_indices.insert(insertion_point, new_opening_index) combined.add( [new_opening, value, new_opening_index] ) index = combined[-1][2] opening = combined[-1][0] solved = solve(opening) new_selection = solved[0] new_cands = solved[1] new_indices = (( (index[0] + 1, index[1] + 0), (index[0] + 0, index[1] + 1) )) ## if largest_index_found is None or index_replaced > largest_index_found: ## largest_index_found = index_replaced if openings_indices[-1] == (TO_FIND, TO_FIND):
def test_len(): slt = SortedListWithKey(key=modulo, value_orderable=False) for val in range(10000): slt.add(val) assert len(slt) == (val + 1)
def _retrieve_raw_observations(self): self._log.info("\tRetrieving raw observations...") raw_obs_sheets = self._get_raw_obs_sheets() for raw_obs_sheet in raw_obs_sheets: # Per year sheet_year = re.match(self._config.get("RAW_OBSERVATIONS", "SHEET_NAME_PATTERN"), raw_obs_sheet.name).group("year") empty_row_error_cache = {} year_column = get_column_number(self._config_get("RAW_OBSERVATIONS", "OBSERVATION_YEAR_COLUMN", sheet_year)) iso3_column = get_column_number(self._config_get("RAW_OBSERVATIONS", "OBSERVATION_ISO3_COLUMN", sheet_year)) observation_name_row = self._config_getint("RAW_OBSERVATIONS", "OBSERVATION_NAME_ROW", sheet_year) observation_start_row = self._config_getint("RAW_OBSERVATIONS", "OBSERVATION_START_ROW", sheet_year) observation_start_column = get_column_number( self._config_get("RAW_OBSERVATIONS", "OBSERVATION_START_COLUMN", sheet_year)) check_column = get_column_number( self._config_get("RAW_OBSERVATIONS", "OBSERVATION_CHECK_COLUMN", sheet_year)) for column_number in range(observation_start_column, raw_obs_sheet.ncols): # Per indicator # Maintain sorted list with elements sorted by value # Elements are tuples of the form (ExcelObservation, Area, Indicator) # We're using tuples just to avoid some additional round trips to the db in order to get area and indicator per_indicator_observations = SortedListWithKey( key=lambda x: x[0].value if x[0].value is not None and na_to_none(x[0].value) is not None else 0) # HACK: Curate data by stripping year indicator_code_retrieved = raw_obs_sheet.cell(observation_name_row, column_number).value if len(indicator_code_retrieved.split()) > 1: self._log.debug('Indicator %s in had to be stripped of year while parsing %s', indicator_code_retrieved, raw_obs_sheet.name) try: indicator_code = indicator_code_retrieved.split()[0] except IndexError: self._log.warn( 'Wrong Indicator name %s while parsing %s[%s], skipping column' % ( indicator_code_retrieved, raw_obs_sheet.name, colname(column_number))) continue try: indicator = self._indicator_repo.find_indicator_by_code(indicator_code) except IndicatorRepositoryError: self._log.warn( "No indicator with code %s found while parsing %s" % (indicator_code, raw_obs_sheet.name)) indicator = create_indicator(indicator=indicator_code) # Orphan indicator for row_number in range(observation_start_row, raw_obs_sheet.nrows): # Per country if not raw_obs_sheet.cell(row_number, check_column).value or row_number in empty_row_error_cache: if row_number not in empty_row_error_cache: self._log.debug( "Skipping row while parsing %s[%s] (did not detect value on check column, additional errors regarding this row will be omitted)" % ( raw_obs_sheet.name, row_number)) empty_row_error_cache[row_number] = True continue try: year = int(raw_obs_sheet.cell(row_number, year_column).value) iso3 = raw_obs_sheet.cell(row_number, iso3_column).value area = self._area_repo.find_by_iso3(iso3) value_retrieved = raw_obs_sheet.cell(row_number, column_number).value value = na_to_none(value_retrieved) excel_observation = ExcelObservation(iso3=iso3, indicator_code=indicator_code, value=value, year=year) per_indicator_observations.add((excel_observation, area, indicator)) except AreaRepositoryError: self._log.error("No area found with code %s for indicator %s while parsing %s" % ( iso3, indicator_code, raw_obs_sheet.name)) except: self._log.error("Unexpected error parsing %s[%s]" % (raw_obs_sheet.name, row_number)) self._update_observation_ranking(per_indicator_observations, observation_getter=lambda x: x[0]) self._excel_raw_observations.extend(per_indicator_observations)
class Schedule: """A quantum program with operations happening at specific times. Supports schedule[time] point lookups and schedule[inclusive_start_time:exclusive_end_time] slice lookups. Attributes: device: The hardware this will schedule on. scheduled_operations: A SortedListWithKey containing the ScheduledOperations for this schedule. The key is the start time of the ScheduledOperation. """ def __init__(self, device: Device, scheduled_operations: Iterable[ScheduledOperation] = () ) -> None: """Initializes a new schedule. Args: device: The hardware this schedule will run on. scheduled_operations: Initial list of operations to apply. These will be moved into a sorted list, with a key equal to each operation's start time. """ self.device = device self.scheduled_operations = SortedListWithKey(scheduled_operations, key=lambda e: e.time) self._max_duration = max( [e.duration for e in self.scheduled_operations] or [Duration()]) def __eq__(self, other): if not isinstance(other, Schedule): return NotImplemented return self.scheduled_operations == other.scheduled_operations def __ne__(self, other): return not self == other __hash__ = None # type: ignore def query(self, *, # Forces keyword args. time: Timestamp, duration: Duration = Duration(), qubits: Iterable[QubitId] = None, include_query_end_time=False, include_op_end_times=False) -> List[ScheduledOperation]: """Finds operations by time and qubit. Args: time: Operations must end after this time to be returned. duration: Operations must start by time+duration to be returned. qubits: If specified, only operations touching one of the included qubits will be returned. include_query_end_time: Determines if the query interval includes its end time. Defaults to no. include_op_end_times: Determines if the scheduled operation intervals include their end times or not. Defaults to no. Returns: A list of scheduled operations meeting the specified conditions. """ earliest_time = time - self._max_duration end_time = time + duration qubits = None if qubits is None else frozenset(qubits) def overlaps_interval(op): if not include_op_end_times and op.time + op.duration == time: return False if not include_query_end_time and op.time == end_time: return False return op.time + op.duration >= time and op.time <= end_time def overlaps_qubits(op): if qubits is None: return True return not qubits.isdisjoint(op.operation.qubits) potential_matches = self.scheduled_operations.irange_key(earliest_time, end_time) return [op for op in potential_matches if overlaps_interval(op) and overlaps_qubits(op)] def __getitem__(self, item: Union[Timestamp, slice]): """Finds operations overlapping a given time or time slice. Args: item: Either a Timestamp or a slice containing start and stop Timestamps. Returns: The scheduled operations that occurs during the given time. """ if isinstance(item, slice): if item.step: raise ValueError('Step not supported.') start = cast(Timestamp, item.start) stop = cast(Timestamp, item.stop) return self.query(time=start, duration=stop - start) return self.query(time=item, include_query_end_time=True) def operations_happening_at_same_time_as( self, scheduled_operation: ScheduledOperation ) -> List[ScheduledOperation]: """Finds operations happening at the same time as the given operation. Args: scheduled_operation: The operation specifying the time to query. Returns: Scheduled operations that overlap with the given operation. """ overlaps = self.query( time=scheduled_operation.time, duration=scheduled_operation.duration) return [e for e in overlaps if e != scheduled_operation] def include(self, scheduled_operation: ScheduledOperation): """Adds a scheduled operation to the schedule. Args: scheduled_operation: The operation to add. Raises: ValueError: The operation collided with something already in the schedule. """ collisions = self.query(time=scheduled_operation.time, duration=scheduled_operation.duration, qubits=scheduled_operation.operation.qubits) if collisions: raise ValueError('Operation {} has collisions: {}'.format( scheduled_operation.operation, collisions)) self.scheduled_operations.add(scheduled_operation) self._max_duration = max(self._max_duration, scheduled_operation.duration) def exclude(self, scheduled_operation: ScheduledOperation) -> bool: """Omits a scheduled operation from the schedule, if present. Args: scheduled_operation: The operation to try to remove. Returns: True if the operation was present and is now removed, False if it was already not present. """ try: self.scheduled_operations.remove(scheduled_operation) return True except ValueError: return False def to_circuit(self) -> Circuit: """Convert the schedule to a circuit. This discards most timing information from the schedule, but does place operations that are scheduled at the same time in the same Moment. """ circuit = Circuit() ops = [] # type: List[Operation] time = None # type: Optional[Timestamp] for so in self.scheduled_operations: if so.time != time: circuit.append(ops) ops = [so.operation] time = so.time else: ops.append(so.operation) circuit.append(ops) return circuit
class StreamHist(object): """A StreamHist implementation.""" def __init__(self, maxbins=64, weighted=False, freeze=None): """Create a Histogram with a max of n bins.""" super(StreamHist, self).__init__() # self.bins = [] self.bins = SortedListWithKey(key=lambda b: b.value) self.maxbins = maxbins # A useful property self.total = 0 self.weighted = weighted self._min = None # A useful property self._max = None # A useful property self.freeze = freeze self.missing_count = 0 def update(self, n, count=1): """Add a point to the histogram.""" if n is None: # We simply keep a count of the number of missing values self.missing_count += count return self if isinstance(n, iterator_types): # Shortcut for updating a histogram with an iterable # This works for anything that supports iteration, including # file-like objects and readers # This also means that nested lists (and similar structures) will # be 'unpacked' and added to the histogram 'automatically' for p in n: self.update(p, count) # Count is assumed to apply for all else: self.insert(n, count) return self.trim() def insert(self, n, count): """Inserts a point to the histogram. This method implements Steps 1-4 from Algorithm 1 (Update) in ref [1]. Notes ----- It is better to use `update` when inserting data into the histogram, as `insert` does not automatically update the total point count, or call `trim` after the insertion. For large batches of inserts, insert may be more efficient, but you are responsible for updating counts and trimming the bins 'manually'. Examples -------- >>> # Using insert >>> h = StreamHist().insert(1).insert(2).insert(3) >>> h.update_total(3) >>> h.trim() >>> # Using update >>> h = StreamHist().update([1, 2, 3]) """ self.update_total(count) if self._min is None or self._min > n: self._min = n if self._max is None or self._max < n: self._max = n b = Bin(value=n, count=count) if b in self.bins: index = self.bins.index(b) self.bins[index].count += count else: if self.freeze is not None and self.total >= self.freeze: index = self.bins.bisect(Bin(n, count)) if index: prev_dist = n - self.bins[index-1].value else: prev_dist = sys.float_info.max if index and index < len(self.bins): next_dist = self.bins[index].value - n else: next_dist = sys.float_info.max if prev_dist < next_dist: self.bins[index-1].count += count else: self.bins[index].count += count else: self.bins.add(b) def cdf(self, x): """Return the value of the cumulative distribution function at x.""" return self.sum(x) / self.total def pdf(self, x): """Return the value of the probability density function at x.""" return self.density(x) / self.total def bounds(self): """Return the upper (max( and lower (min) bounds of the distribution.""" if len(self): return (self._min, self._max) return (None, None) def count(self): """Return the number of bins in this histogram.""" return self.total def median(self): """Return a median for the points inserted into the histogram. This will be the true median whenever the histogram has less than the maximum number of bins, otherwise it will be an approximation. """ if self.total == 0: return None if len(self.bins) >= self.maxbins: # Return the approximate median return self.quantiles(0.5)[0] else: # Return the 'exact' median when possible mid = (self.total)/2 if self.total % 2 == 0: return (self.bins[mid-1] + self.bins[mid]).value return self.bins[mid].value def mean(self): """Return the sample mean of the distribution.""" if self.total == 0: return None s = 0.0 # Sum for b in self.bins: s += b.value * b.count return s / float(self.total) def var(self): """Return the variance of the distribution.""" if self.total < 2: return None s = 0.0 m = self.mean() # Mean for b in self.bins: s += (b.count * (b.value - m)**2) return s / float(self.total) def min(self): """Return the minimum value in the histogram.""" return self._min def max(self): """Return the maximum value in the histogram.""" return self._max def trim(self): """Merge adjacent bins to decrease bin count to the maximum value. This method implements Steps 5-6 from Algorithm 1 (Update) in ref [1]. """ while len(self.bins) > self.maxbins: index = argmin(bin_diff(self.bins, self.weighted)) prv = self.bins.pop(index) self.bins[index] += prv return self def scale_down(self, exclude): pass # By default, we do nothing def __str__(self): """Return a string reprentation of the histogram.""" if len(self.bins): string = "Mean\tCount\n----\t-----\n" for b in self.bins: string += "%d\t%i\n" % (b.value, b.count) string += "----\t-----\n" string += "Missing values: %s\n" % self.missing_count string += "Total count: %s" % self.total return string return "Empty histogram" def to_dict(self): """Return a dictionary representation of the histogram.""" bins = list() for b in self.bins: bins.append({"mean": b.value, "count": b.count}) info = dict(missing_count=self.missing_count, maxbins=self.maxbins, weighted=self.weighted, freeze=self.freeze) return dict(bins=bins, info=info) @classmethod def from_dict(cls, d): """Create a StreaHist object from a dictionary representation. The dictionary must be in the format given my `to_dict`. This class method, combined with the `to_dict` instance method, can facilitate communicating StreamHist objects across processes or networks. """ info = d["info"] bins = d["bins"] hist = cls(info["maxbins"], info["weighted"], info["freeze"]) hist.missing_count = info["missing_count"] for b in bins: count = b["count"] value = b["mean"] hist.bins.append(Bin(value, count)) return hist def __len__(self): """Return the number of bins in this histogram.""" return len(self.bins) def update_total(self, size=1): """Update the internally-stored total number of points.""" self.total += size def __add__(self, other): """Merge two StreamHist objects into one.""" res = self.copy() return res.merge(other) def __iadd__(self, other): """Merge another StreamHist object into this one.""" return self.merge(other) def __radd__(self, other): """Reverse merge two objects. This is useful for merging a list of histograms via sum or similar. """ return self + other def merge(self, other, size=None): """Merge another StreamHist object into this one. This method implements Algorithm 2 (Merge) in ref [1]. """ if other == 0: # Probably using sum here... return self # This is a little hacky... for b in other.bins: self.bins.add(b) self.total += other.total if size is not None: self.maxbins = size self.trim() if self._min is None: self._min = other._min else: if other._min is not None: self._min = min(self._min, other._min) if self._max is None: self._max = other._max else: if other._max is not None: self._max = max(self._max, other._max) self.missing_count += other.missing_count return self def copy(self): """Make a deep copy of this histogram.""" res = type(self)(int(self.maxbins), bool(self.weighted)) res.bins = self.bins.copy() res._min = float(self._min) if self._min is not None else None res._max = float(self._max) if self._max is not None else None res.total = int(self.total) res.missing_count = int(self.missing_count) res.freeze = int(self.freeze) if self.freeze is not None else None return res def describe(self, quantiles=[0.25, 0.50, 0.75]): """Generate various summary statistics.""" data = [self.count(), self.mean(), self.var(), self.min()] data += self.quantiles(*quantiles) + [self.max()] names = ["count", "mean", "var", "min"] names += ["%i%%" % round(q*100., 0) for q in quantiles] + ["max"] return dict(zip(names, data)) def compute_breaks(self, n=50): """Return output like that of numpy.histogram.""" last = 0.0 counts = [] bounds = linspace(*self.bounds(), num=n) for e in bounds[1:]: new = self.sum(e) counts.append(new-last) last = new return counts, bounds def print_breaks(self, num=50): """Print a string reprentation of the histogram.""" string = "" for c, b in zip(*self.compute_breaks(num)): bar = str() for i in range(int(c/float(self.total)*200)): bar += "." string += str(b) + "\t" + bar + "\n" print(string) def sum(self, x): """Return the estimated number of points in the interval [−∞, b].""" x = float(x) if x < self._min: ss = 0.0 # Sum is zero! elif x >= self._max: ss = float(self.total) elif x == self.bins[-1].value: # Shortcut for when i == max bin (see Steps 3-6) last = self.bins[-1] ss = float(self.total) - (float(last.count) / 2.0) # elif x <= self.bins[0].value: # # Shortcut for when i == min bin (see Steps 3-6) # first = self.bins[0] # ss = float(first.count) / 2.0 else: bin_i = self.floor(x) if bin_i is None: bin_i = Bin(value=self._min, count=0) bin_i1 = self.higher(x) if bin_i1 is None: bin_i1 = Bin(value=self._max, count=0) if bin_i.value == self._min: prev_sum = self.bins[0].count / 2.0 else: temp = bin_sums(self.bins, less=x) if len(temp): prev_sum = sum(temp) else: prev_sum = 0.0 ss = _compute_sum(x, bin_i, bin_i1, prev_sum) return ss def density(self, p): p = float(p) if p < self._min or p > self._max: dd = 0.0 elif p == self._min and p == self._max: dd = float('inf') elif Bin(value=p, count=0) in self.bins: high = next_after(p, float("inf")) low = next_after(p, -float("inf")) dd = (self.density(low) + self.density(high)) / 2.0 else: bin_i = self.lower(p) if bin_i is None: bin_i = Bin(value=self._min, count=0) bin_i1 = self.higher(p) if bin_i1 is None: bin_i1 = Bin(value=self._max, count=0) dd = _compute_density(p, bin_i, bin_i1) return dd def quantiles(self, *quantiles): """Return the estimated data value for the given quantile(s). The requested quantile(s) must be between 0 and 1. Note that even if a single quantile is input, a list is always returned. """ temp = bin_sums(self.bins) sums = list(accumulate(temp)) result = [] for x in quantiles: target_sum = x * self.total if x <= 0: qq = self._min elif x >= self.total: qq = self._max else: index = bisect_left(sums, target_sum) bin_i = self.bins[index] if index < len(sums): bin_i1 = self.bins[index+1] else: bin_i1 = self.bins[index] if index: prev_sum = sums[index-1] else: prev_sum = 0.0 qq = _compute_quantile(target_sum, bin_i, bin_i1, prev_sum+1) result.append(qq) return result def floor(self, p): hbin = Bin(p, 0) index = self.bins.bisect_left(hbin) if hbin not in self.bins: index -= 1 return self.bins[index] if index >= 0 else None def ceiling(self, p): hbin = Bin(p, 0) index = self.bins.bisect_right(hbin) if hbin in self.bins: index -= 1 return self.bins[index] if index < len(self.bins) else None def lower(self, p): index = self.bins.bisect_left(Bin(p, 0)) - 1 return self.bins[index] if index >= 0 else None def higher(self, p): index = self.bins.bisect_right(Bin(p, 0)) return self.bins[index] if index < len(self.bins) else None
def _trie_search(self, word, d, transducer=None, allow_spaces=True, return_cost=True): """ Находит все слова в префиксном боре, расстояние до которых в соответствии с заданным преобразователем не превышает d """ if transducer is None: # разобраться с пробелами transducer = self.transducer.inverse() allow_spaces &= self.allow_spaces trie = self.dictionary # инициализация переменных used_agenda_keys = set() agenda = SortedListWithKey(key=(lambda x:x[1])) h = self.h_func(word, trie.root) # agenda[self.agenda_key("", 0, trie.root)] = (0.0, 0.0, h) key, value = ("", 0, trie.root), (0.0, 0.0, h) agenda.add((key, value)) answer = dict() k = 0 # очередь с приоритетом с промежуточными результатами while len(agenda) > 0: key, value = agenda.pop(0) if key in used_agenda_keys: continue used_agenda_keys.add(key) low, pos, index = key cost, g, h = value # g --- текущая стоимость, h --- нижняя оценка будущей стоимости # cost = g + h --- нижняя оценка суммарной стоимости k += 1 max_upperside_length = min(len(word) - pos, transducer.max_up_length) for upperside_length in range(max_upperside_length + 1): new_pos = pos + upperside_length curr_up = word[pos: new_pos] if curr_up not in transducer.operation_costs: continue for curr_low, curr_cost in transducer.operation_costs[curr_up].items(): new_g = g + curr_cost if new_g > d: #если g > d, то h можно не вычислять continue if curr_low == " ": if allow_spaces and trie.is_final(index): new_index = trie.root else: new_index = Trie.NO_NODE else: new_index = trie.descend(index, curr_low) if new_index is Trie.NO_NODE: continue new_low = low + curr_low new_h = self.h_func(word[new_pos: ], new_index) new_cost = new_g + new_h if new_cost > d: continue new_key = (new_low, new_pos, new_index) new_value = (new_cost, new_g, new_h) if new_pos == len(word) and trie.is_final(new_index): old_g = answer.get(new_low, None) if old_g is None or new_g < old_g: answer[new_low] = new_g agenda.add((new_key, new_value)) answer = sorted(answer.items(), key=(lambda x: x[1])) if return_cost: return answer else: return [elem[0] for elem in answer]
def test_len(): slt = SortedListWithKey(key=modulo) for val in range(10000): slt.add(val) assert len(slt) == (val + 1)
def test_copy(): slt = SortedListWithKey(range(100), load=7, key=modulo, value_orderable=False) two = slt.copy() slt.add(100) assert len(slt) == 101 assert len(two) == 100
def _retrieve_component_observations(self, structure_obs_sheet, subindex_name, component_short_name, component_scaled_column, sheet_year): self._log.debug("\t\tRetrieving component %s from subindex %s observations in sheet %s..." % ( component_short_name, subindex_name, structure_obs_sheet.name)) empty_row_error_cache = {} year_column = get_column_number( self._config_get("STRUCTURE_OBSERVATIONS", "OBSERVATION_YEAR_COLUMN", sheet_year)) iso3_column = get_column_number( self._config_get("STRUCTURE_OBSERVATIONS", "OBSERVATION_ISO3_COLUMN", sheet_year)) observation_start_row = self._config_getint("STRUCTURE_OBSERVATIONS", "OBSERVATION_START_ROW", sheet_year) check_column = get_column_number( self._config_get("STRUCTURE_OBSERVATIONS", "OBSERVATION_CHECK_COLUMN", sheet_year)) aliased_short_name = self._get_aliased_component(component_short_name, sheet_year) if aliased_short_name: self._log.info("Using alias %s for COMPONENT %s while parsing %s [%s]" % ( aliased_short_name, component_short_name, structure_obs_sheet.name, colname(component_scaled_column))) short_name = aliased_short_name else: short_name = component_short_name # Set up sorted list to simplify ranking (components are not ranked in the spreadsheet) sorted_observations = SortedListWithKey( key=lambda x: x[0].value if x[0].value is not None and na_to_none(x[0].value) is not None else 0) try: indicator = self._indicator_repo.find_component_by_short_name(short_name, subindex_name) for row_number in range(observation_start_row, structure_obs_sheet.nrows): # Per country if not structure_obs_sheet.cell(row_number, check_column).value or row_number in empty_row_error_cache: if row_number not in empty_row_error_cache: self._log.debug( "Skipping row while parsing %s[%s] (did not detect value on check column, additional errors regarding this row will be omitted)" % ( structure_obs_sheet.name, row_number)) empty_row_error_cache[row_number] = True continue try: year = int(structure_obs_sheet.cell(row_number, year_column).value) iso3 = structure_obs_sheet.cell(row_number, iso3_column).value area = self._area_repo.find_by_iso3(iso3) value = structure_obs_sheet.cell(row_number, component_scaled_column).value excel_observation = ExcelObservation(iso3=iso3, indicator_code=indicator.indicator, year=year, value=value) if [t for t in sorted_observations if t[0].year == year and t[1].iso3 == iso3 and t[2].indicator == indicator.indicator]: self._log.warn("Ignoring duplicate observations for COMPONENT %s while parsing %s [%s]" % ( indicator.indicator, structure_obs_sheet.name, colname(component_scaled_column))) # Will not continue parsing, we could check this also at the beginning if we extract the # year from the sheet name return else: sorted_observations.add((excel_observation, area, indicator)) except AreaRepositoryError: self._log.error("No area with code %s for indicator %s while parsing %s" % ( iso3, indicator.indicator, structure_obs_sheet.name)) except: self._log.error("Unexpected error parsing %s[%s]" % (structure_obs_sheet.name, row_number)) except IndicatorRepositoryError: self._log.error( "No COMPONENT '%s' indicator found while parsing %s [%s]" % ( short_name, structure_obs_sheet.name, colname(component_scaled_column))) # Rank them based on their scaled score self._update_observation_ranking(sorted_observations, observation_getter=lambda x: x[0]) self._excel_structure_observations.extend(sorted_observations)