class Beam(object):

    # WORKING: testing optimization bugs
    def __init__(self, size, lower_better=True):
        # def __init__(self, size, lower_better=False):
        # are bigger scores better or worse?
        if lower_better:
            self.hypotheses = SortedListWithKey(key=lambda x: x['score'])
        else:
            self.hypotheses = SortedListWithKey(key=lambda x: -x['score'])

        self.size = size

    def add(self, hyp, beam_constraints=[]):
        if all(check(hyp) for check in beam_constraints):
            self.hypotheses.add(hyp)
            if len(self.hypotheses) > self.size:
                assert len(self.hypotheses) == self.size + 1
                del self.hypotheses[-1]

    def __len__(self):
        return len(self.hypotheses)

    def __iter__(self):
        for hyp in self.hypotheses:
            yield hyp
Beispiel #2
0
class PQ_Frequency(Sampling_Frequency):
    def __init__(self):
        super(PQ_Frequency, self).__init__(sampling=0)
        self.nodes = SortedListWithKey(key=self.objective_f)
        self.Nodes = DummyList()

    def add_node(self, item, cost):
        new_node = super(PQ_Frequency, self).add_node(item, cost)
        self.nodes.add(new_node)
        return new_node

    def touch(self, node):
        if not node.is_restoring:
            self.nodes.remove(node)
        super(PQ_Frequency, self).touch(node)
        if not node.is_restoring:
            self.nodes.add(node)

    def evict_node(self):
        to_evict = self.nodes.pop(0)

        if self.saving_counts:
            self.historical_nodes[to_evict.value] = to_evict
        self.last_obj_f = self.objective_f(to_evict)

        return to_evict
def test_copy_copy():
    import copy
    slt = SortedListWithKey(range(100), load=7, key=modulo)
    two = copy.copy(slt)
    slt.add(100)
    assert len(slt) == 101
    assert len(two) == 100
Beispiel #4
0
class ChanThread:
    def __init__(self, chan, thread_id):
        self.posts = SortedListWithKey(key=lambda post: post.timestamp)
        # post_id -> set(replies)
        self.replies_by_post_id = {}
        self.timestamp = 0
        self.thread_id = thread_id
        self.chan = chan

    def get_posts(self):
        return self.posts

    def delete_post(self, post):
        try:
            self.posts.remove(post)
        except Exception as e:
            logger.exception("Exception removing post: {}".format(e))

    def add_post(self, post):
        self.posts.add(post)
        self.update_post_links(post)
        if post.timestamp > self.timestamp:
            self.timestamp = post.timestamp

    def update_post_links(self, post):
        for postId in post.target_posts:
            if postId not in self.replies_by_post_id:
                self.replies_by_post_id[postId] = set([])
            self.replies_by_post_id[postId].add(post.post_id)

    def get_post_replies(self, post_id):
        if post_id not in self.replies_by_post_id:
            return set([])
        return self.replies_by_post_id[post_id]
def test_copy_copy():
    import copy
    slt = SortedListWithKey(range(100), load=7, key=modulo)
    two = copy.copy(slt)
    slt.add(100)
    assert len(slt) == 101
    assert len(two) == 100
Beispiel #6
0
class QueueTimer(TimerService):
    TimerEvent = NamedTuple('TimerEvent', [('timestamp', float),
                                           ('callback', Callable)])

    def __init__(self, get_current_time=time.perf_counter):
        self._get_current_time = get_current_time
        self._events = SortedListWithKey(key=lambda v: v.timestamp)

    def queue_size(self):
        return len(self._events)

    def service(self):
        while len(self._events
                  ) and self._events[0].timestamp <= self._get_current_time():
            self._events.pop(0).callback()

    def get_current_time(self) -> float:
        return self._get_current_time()

    def schedule(self, delay: float, callback: Callable):
        timestamp = self._get_current_time() + delay
        self._events.add(
            self.TimerEvent(timestamp=timestamp, callback=callback))

    def cancel(self, callback: Callable):
        indexes = [
            i for i, ev in enumerate(self._events) if ev.callback == callback
        ]
        for i in reversed(indexes):
            del self._events[i]
def test_copy():
    slt = SortedListWithKey(range(100), key=negate)
    slt._reset(7)
    two = slt.copy()
    slt.add(100)
    assert len(slt) == 101
    assert len(two) == 100
Beispiel #8
0
    def __attrs_post_init__(self):
        types = {}
        for k, v in self.types.items():
            try:
                kind, name = k.split()
                assert kind in ['r32']
                types[name] = Type(name, kind, **v)
            except:
                sys.stderr.write("Note: in type '{}':\n".format(k))
                raise
        self.types = types

        registers = SortedListWithKey(key=lambda x: x.location)
        for k, v in self.registers.items():
            try:
                kind, location = k.split(' ', 1)
                if isinstance(v, str):
                    name = v
                    config = {}
                else:
                    name = v.pop('name')
                    config = v
                assert name in self.types.keys()
                register = Register(name, kind, location, config.get('type'))
                registers.add(register)
            except:
                sys.stderr.write("Note: in register '{}':\n".format(k))
                raise
        self.registers = registers
Beispiel #9
0
class ListEventStream(SimEventStream):
    def __init__(self, events: Iterable[SimEvent] = ()):
        self._events = SortedListWithKey(iterable=events,
                                         key=lambda ev: ev.timestamp)

    def add(self, event):
        self._events.add(event)

    def extend(self, events):
        self._events.update(events)

    def remove_all(self, predicate: Callable):
        indexes = [i for i, ev in enumerate(self._events) if predicate(ev)]
        for i in reversed(indexes):
            del self._events[i]

    @property
    def events(self):
        return self._events[:]

    def advance(self, _):
        self._events.pop(0)

    def peek(self) -> Optional[SimEvent]:
        if len(self._events) > 0:
            return self._events[0]

    def sort(self):
        pass
Beispiel #10
0
def _characterize_signal(beg, end):
    """
    Characterizes the available signal in a specific time interval.

    Parameters
    ----------
    beg:
        Starting time point of the interval.
    end:
        Last time point of the interval.

    Returns
    -------
    out:
        sortedlist with one entry by lead. Each entry is a 5-size tuple with
        the lead, the signal samples, the relevant points to represent the
        samples, the baseline level estimation for the fragment, and the
        quality of the fragment in that lead.
    """
    siginfo = SortedListWithKey(key=lambda v: -v.quality)
    for lead in sig_buf.get_available_leads():
        baseline, quality = characterize_baseline(lead, beg, end)
        sig = sig_buf.get_signal_fragment(beg, end, lead=lead)[0]
        if len(sig) == 0:
            return None
        #We build a signal simplification taking at most 9 points, and with
        #a minimum relevant deviation of 50 uV.
        points = RDP.arrayRDP(sig, C.RDP_MIN_DIST, C.RDP_NPOINTS)
        siginfo.add(LeadInfo(lead, sig, points, baseline, quality))
    return siginfo
Beispiel #11
0
def rangecover(whole: Range, covered: Iterable[Range]) -> Iterable[Range]:
    remainings = [whole]
    covered = set(covered)

    selected = SortedListWithKey(key=lambda x: x[0])

    while len(remainings) and len(covered):
        bestval, best = 0.0, None
        for curr in covered:
            currval = 0.0
            for gap in remainings:
                cover = intersect(gap, curr)
                if cover:
                    currval += cover[1] - cover[0]

            if currval > bestval:
                bestval, best = currval, curr

        if not best:
            return

        yield best

        selected.add(best)
        covered.remove(best)

        remainings = list(gaps(selected, whole))
Beispiel #12
0
class PositionColumn(NumericColumn):
  def __init__(self, table, col_id, col_info):
    super(PositionColumn, self).__init__(table, col_id, col_info)
    # This is a list of row_ids, ordered by the position.
    self._sorted_rows = SortedListWithKey(key=self.raw_get)

  def set(self, row_id, value):
    self._sorted_rows.discard(row_id)
    super(PositionColumn, self).set(row_id, value)
    if value != self.getdefault():
      self._sorted_rows.add(row_id)

  def copy_from_column(self, other_column):
    super(PositionColumn, self).copy_from_column(other_column)
    self._sorted_rows = SortedListWithKey(other_column._sorted_rows[:], key=self.raw_get)

  def prepare_new_values(self, values, ignore_data=False, action_summary=None):
    # This does the work of adjusting positions and relabeling existing rows with new position
    # (without changing sort order) to make space for the new positions. Note that this is also
    # used for updating a position for an existing row: we'll find a new value for it; later when
    # this value is set, the old position will be removed and the new one added.
    if ignore_data:
      rows = SortedListWithKey([], key=self.raw_get)
    else:
      rows = self._sorted_rows
    adjustments, new_values = relabeling.prepare_inserts(rows, values)
    return new_values, [(self._sorted_rows[i], pos) for (i, pos) in adjustments]
Beispiel #13
0
def CreateString(n, k):
    initialValue = 'A' * n
    char_list = list(initialValue)
    h = Strategy(k)
    candidates = SortedListWithKey(key=h.evaluate_node)
    candidates.add(char_list)

    altura = 0
    while altura < k + 1:
        try:
            candidate = candidates.pop(0)
            punctuation = h.evaluate_node(candidate)
            if punctuation == 0:
                return ''.join(candidate)

            l = expand(candidate)
            for expanded in l:
                punctuation = h.evaluate_node(expanded)
                if punctuation >= 0:
                    candidates.add(expanded)

            altura += 1
        except IndexError:
            return ''
    return ''
    def extract_collocations(self, metric_class):
        assert issubclass(metric_class, Metric)
        metric = metric_class()
        collocations = SortedListWithKey(key=lambda x: -x[0])

        unigram_counts = self.language_model.get_unigrams()
        bigram_counts = self.language_model.get_bigrams()

        for (first, last), freq_bigram in bigram_counts.items():

            if self.exclude_punctuation:
                if first in self.PUNCT or last in self.PUNCT or \
                        self.INITIALS.match(first) or self.INITIALS.match(last):
                    continue

            if self.exclude_conj:
                if first in self.CONJ_RU or last in self.CONJ_RU:
                    continue

            if self.exclude_props:
                if first in self.PROPOSITIONS_RU or last in self.PROPOSITIONS_RU:
                    continue

            freq_first, freq_last = unigram_counts[first], unigram_counts[last]

            metric_val = metric.evaluate(freq_first, freq_last, freq_bigram,
                                         self.language_model.get_vocab_size())
            collocations.add(
                (metric_val, freq_first, freq_last, freq_bigram, first, last))

        return collocations
    def search(self, word: str, distance=0) -> SortedListWithKey:
        """
        Returns candidates list of words that equal to the given word after its modifying with Levenstein (DL) distance
        :param word Misspelled word
        :param distance Maximum distance for candidates where their cost could be less than given parameter
        :return array of candidates with their distances
        """
        candidates = SortedListWithKey(key=lambda x: x[::-1])
        stack = [(children, [letter], None, [*range(self.__get_row_len(word))])
                 for letter, children in self.root.items()]

        while stack:
            node, prefix, pre_prev_row, prev_row = stack.pop()
            curr_row, min_dist = self.__calculate_distance(word, prefix, pre_prev_row, prev_row)

            if min_dist > distance:
                continue

            if curr_row[-1] <= distance and self.__END in node:
                candidates.add((''.join(prefix), curr_row[-1]))

            stack.extend(
                (children, prefix + [letter], prev_row if self.use_damerau_modification else None, curr_row)
                for letter, children in node.items() if letter != self.__END
            )

        return candidates
def test_getitem_slice():
    random.seed(0)
    slt = SortedListWithKey(key=negate)
    slt._reset(17)

    lst = list()

    for rpt in range(100):
        val = random.random()
        slt.add(val)
        lst.append(val)

    lst.sort(reverse=True)

    assert all(slt[start:] == lst[start:] for start in [-75, -25, 0, 25, 75])

    assert all(slt[:stop] == lst[:stop] for stop in [-75, -25, 0, 25, 75])

    assert all(slt[::step] == lst[::step] for step in [-5, -1, 1, 5])

    assert all(slt[start:stop] == lst[start:stop]
               for start in [-75, -25, 0, 25, 75]
               for stop in [-75, -25, 0, 25, 75])

    assert all(slt[:stop:step] == lst[:stop:step]
               for stop in [-75, -25, 0, 25, 75] for step in [-5, -1, 1, 5])

    assert all(slt[start::step] == lst[start::step]
               for start in [-75, -25, 0, 25, 75] for step in [-5, -1, 1, 5])

    assert all(slt[start:stop:step] == lst[start:stop:step]
               for start in [-75, -25, 0, 25, 75]
               for stop in [-75, -25, 0, 25, 75] for step in [-5, -1, 1, 5])
Beispiel #17
0
class Map:

    default_converters = {
        'any': AnyConverter,
        'default': StringConverter,
        'float': FloatConverter,
        'int': IntegerConverter,
        'path': PathConverter,
        'string': StringConverter,
        'uuid': UUIDConverter,
    }

    def __init__(self, host_matching: bool = False) -> None:
        self.rules = SortedListWithKey(key=lambda rule: rule.match_key)
        self.endpoints: Dict[str, SortedListWithKey] = defaultdict(
            lambda: SortedListWithKey(key=lambda rule: rule.build_key))  # noqa
        self.converters = self.default_converters.copy()
        self.host_matching = host_matching

    def add(self, rule: 'Rule') -> None:
        rule.bind(self)
        self.endpoints[rule.endpoint].add(rule)
        self.rules.add(rule)

    def bind_to_request(
        self,
        scheme: str,
        server_name: str,
        method: str,
        path: str,
    ) -> 'MapAdapter':
        return MapAdapter(self, scheme, server_name, method, path)

    def bind(self, scheme: str, server_name: str) -> 'MapAdapter':
        return MapAdapter(self, scheme, server_name)
Beispiel #18
0
def _characterize_signal(beg, end):
    """
    Characterizes the available signal in a specific time interval.

    Parameters
    ----------
    beg:
        Starting time point of the interval.
    end:
        Last time point of the interval.

    Returns
    -------
    out:
        sortedlist with one entry by lead. Each entry is a 5-size tuple with
        the lead, the signal samples, the relevant points to represent the
        samples, the baseline level estimation for the fragment, and the
        quality of the fragment in that lead.
    """
    siginfo = SortedListWithKey(key=lambda v: -v.quality)
    for lead in sig_buf.get_available_leads():
        baseline, quality = characterize_baseline(lead, beg, end)
        sig = sig_buf.get_signal_fragment(beg, end, lead=lead)[0]
        if len(sig) == 0:
            return None
        # We build a signal simplification taking at most 9 points, and with
        # a minimum relevant deviation of 50 uV.
        points = RDP.arrayRDP(sig, C.RDP_MIN_DIST, C.RDP_NPOINTS)
        siginfo.add(LeadInfo(lead, sig, points, baseline, quality))
    return siginfo
def test_getitem_slice():
    random.seed(0)
    slt = SortedListWithKey(load=17, key=modulo)

    lst = list()

    for rpt in range(100):
        val = random.random()
        slt.add(val)
        lst.append(val)

    lst.sort(key=modulo)

    assert all(slt[start:] == lst[start:] for start in [-75, -25, 0, 25, 75])

    assert all(slt[:stop] == lst[:stop] for stop in [-75, -25, 0, 25, 75])

    assert all(slt[::step] == lst[::step] for step in [-5, -1, 1, 5])

    assert all(slt[start:stop] == lst[start:stop]
               for start in [-75, -25, 0, 25, 75]
               for stop in [-75, -25, 0, 25, 75])

    assert all(slt[:stop:step] == lst[:stop:step]
               for stop in [-75, -25, 0, 25, 75] for step in [-5, -1, 1, 5])

    assert all(slt[start::step] == lst[start::step]
               for start in [-75, -25, 0, 25, 75] for step in [-5, -1, 1, 5])

    assert all(slt[start:stop:step] == lst[start:stop:step]
               for start in [-75, -25, 0, 25, 75]
               for stop in [-75, -25, 0, 25, 75] for step in [-5, -1, 1, 5])
Beispiel #20
0
def CreateString(n, k):
    initialValue = 'A' * n
    char_list = list(initialValue)
    h = Strategy(k)
    candidates = SortedListWithKey(key=h.evaluate_node)
    candidates.add(char_list)

    altura = 0
    while altura < k+1:
        try:
            candidate = candidates.pop(0)
            punctuation = h.evaluate_node(candidate)
            if punctuation == 0:
                return ''.join(candidate)

            l = expand(candidate)
            for expanded in l:
                punctuation = h.evaluate_node(expanded)
                if punctuation >= 0:
                    candidates.add(expanded)

            altura += 1
        except IndexError:
            return ''
    return ''
Beispiel #21
0
    def solve(self,
              n_threads: int,
              n_individuals: int,
              topol=topology.unconnected()) -> SortedList:
        solutions = SortedListWithKey(key=Solution.get_cost)

        try:
            iteration = 1
            solution = self._generate_solution(self._start_problem,
                                               n_individuals, n_threads, topol)
            solutions.add(solution)
            self._logger.info("({}) - New solution found.".format(iteration))

            problem, do_iteration = self._iterate(self._probl_factory,
                                                  solution)
            while do_iteration:
                solution = self._generate_solution(problem, n_individuals,
                                                   n_threads, topol)
                solutions.add(solution)
                iteration += 1
                self._logger.info(
                    "({}) - New solution found.".format(iteration))
                problem, do_iteration = self._iterate(self._probl_factory,
                                                      solution)
            self._logger.info("Differential evolution completed.")
        # FIXME - is horrible to have to catch all possible exceptions but
        # requires a bit of time to understand all the possible exceptions
        # that can be thrown.
        except:
            self._logger.exception("Exception occurred during solution...")
            self._logger.error("Returning solutions found so far")
        return solutions
Beispiel #22
0
def sort_list(scenario):
    l = SortedListWithKey(key=lambda item: item[0])

    #leemos el archivo con los resultados finales
    aux = []
    archivo = os.path.join(app.config['RESULTS'])
    with open(archivo, 'r') as f:
        data = f.readlines()
        for line in data:
            item = line.split(',')
            item = [convertir(x) for x in item]
            l.add(item)

    # Imprimimos al archivo en la salida final
    save_path = os.path.join(app.config['OUT'])
    file = open(save_path, "w")
    file.write(
        '###################################################################\n'
    )
    file.write(
        '#                         Escenario {}                             #\n'
        .format(scenario))
    file.write(
        '###################################################################\n\n'
    )
    file.write('#t,Sb,DCc,Vj,Vcpu,Vram,Vnet,Ucpu,Uram,Unet,R,SLA,tinit,tend\n')
    file.write('\n')
    for item in l:
        cadena = ','.join(map(str, item))
        file.write(cadena)
        file.write('\n')
    file.close()
Beispiel #23
0
def test_key2():
    class Incomparable:
        pass
    a = Incomparable()
    b = Incomparable()
    slt = SortedListWithKey(key=lambda val: 1, value_orderable=False)
    slt.add(a)
    slt.add(b)
    assert slt == [a, b]
def test_key2():
    class Incomparable:
        pass
    a = Incomparable()
    b = Incomparable()
    slt = SortedListWithKey(key=lambda val: 1)
    slt.add(a)
    slt.add(b)
    assert slt == [a, b]
Beispiel #25
0
class OrderedNonUnique(Index):
    unique = False

    def __init__(self, key, name=None):
        super(OrderedNonUnique, self).__init__(key, name)
        self._data = SortedListWithKey(key=self._getkey)

    def _insert(self, value):
        self._data.add(value)

    def _update(self, old_value, new_value):
        self._delete(old_value)
        self._insert(new_value)

    def _delete(self, value):
        self._data.remove(value)

    def __getitem__(self, key):
        """Not sure if we should return an iterator
        or a list of values... I'm thinking iterator!"""
        return self.irange_key(key, key)

    def __len__(self):
        return len(self._data)

    def __contains__(self, value):
        return value in self._data

    def __reversed__(self):
        return reversed(self._data)

    def __iter__(self):
        return self._data.irange()

    def __repr__(self):
        return self._data.__repr__()

    def count(self, val):
        return self._data.count(val)

    def count_key(self, key):
        itr = self._data.irange_key(key, key)
        num_items = 0
        for _ in itr:
            num_items += 1
        return num_items

    def islice(self, start=None, stop=None, reverse=False):
        return self._data.islice(start, stop, reverse)

    def irange(self, minimum=None, maximum=None, inclusive=(True, True), reverse=False):
        return self._data.irange(minimum, maximum, inclusive, reverse)


    def irange_key(self, min_key=None, max_key=None, inclusive=(True, True),
                   reverse=False):
        return self._data.irange_key(min_key, max_key, inclusive, reverse)
Beispiel #26
0
 def __mul__(self, other):
     assert isinstance(other, Set)
     if len(self.list) == 0:
         return Set()
     list = SortedListWithKey(key=self.list._key)
     for x in self.list:
         if x in other.list:
             list.add(x)
     s = Set(list=list)
     return s
Beispiel #27
0
Datei: main.py Projekt: Kavka1/RL
class HighestReplay(Replay):
    def __init__(self, max_size: int):
        super().__init__()
        self.episodes = SortedListWithKey(key=get_reward)
        self.max_size = max_size

    def add(self, episode: Episode):
        self.episodes.add(episode)
        self.known_returns.append(episode.total_reward)
        self.known_horizons.append(episode.steps)
        if len(self.episodes) > self.max_size:
            self.episodes.pop(0)
def test_count():
    slt = SortedListWithKey(load=7, key=negate)

    assert slt.count(0) == 0

    for iii in range(100):
        for jjj in range(iii):
            slt.add(iii)
        slt._check()

    for iii in range(100):
        assert slt.count(iii) == iii
def test_count():
    slt = SortedListWithKey(load=7, key=modulo, value_orderable=False)

    assert slt.count(0) == 0

    for iii in range(100):
        for jjj in range(iii):
            slt.add(iii)
    slt._check()

    for iii in range(100):
        assert slt.count(iii) == iii
def test_count():
    slt = SortedListWithKey(load=7, key=negate)

    assert slt.count(0) == 0

    for iii in range(100):
        for jjj in range(iii):
            slt.add(iii)
        slt._check()

    for iii in range(100):
        assert slt.count(iii) == iii
Beispiel #31
0
class LRUCache:
    READ = 'read'
    WRITE = 'write'

    def __init__(self, size, mode):
        self._size = size
        self._mode = mode
        self._cache = {}
        self._used = {}
        if mode == self.READ:
            self._time = 0
        elif mode == self.WRITE:
            self._times = SortedListWithKey(key=self._used.get)

    def _use(self, key):
        if self._mode == self.READ:
            self._used[key] = self._time
            self._time += 1
        elif self._mode == self.WRITE:
            if self._times:
                if key in self._used:
                    self._times.discard(key)
                self._used[key] = self._used[self._times[-1]] + 1
            else:
                self._used[key] = 0
            self._times.add(key)

    def _remove(self):
        if self._mode == self.READ:
            lru = min(self._used, key=self._used.get)
            del self._cache[lru]
            del self._used[lru]
        elif self._mode == self.WRITE:
            lru = self._times.pop(0)
            del self._cache[lru]
            del self._used[lru]

    def get(self, key):
        item = self._cache.get(key)
        if item is None:
            return None
        self._use(key)
        return item

    def set(self, key, value):
        if key not in self._cache:
            if len(self._cache) == self._size:
                self._remove()
        self._cache[key] = value
        self._use(key)

    def __repr__(self):
        return repr(self._cache)
Beispiel #32
0
class Map:

    default_converters = {
        'any': AnyConverter,
        'default': StringConverter,
        'float': FloatConverter,
        'int': IntegerConverter,
        'path': PathConverter,
        'string': StringConverter,
        'uuid': UUIDConverter,
    }

    def __init__(self, host_matching: bool = False) -> None:
        self.rules = SortedListWithKey(key=lambda rule: rule.match_key)
        self.endpoints: Dict[str, SortedListWithKey] = defaultdict(
            lambda: SortedListWithKey(key=lambda rule: rule.build_key))  # noqa
        self.converters = self.default_converters.copy()
        self.host_matching = host_matching

    def add(self, rule: 'Rule') -> None:
        rule.bind(self)
        self.endpoints[rule.endpoint].add(rule)
        self.rules.add(rule)

    def bind_to_request(
        self,
        secure: bool,
        server_name: str,
        method: str,
        path: str,
        query_string: bytes,
        websocket: bool,
        root_path: str,
    ) -> 'MapAdapter':
        return MapAdapter(
            self,
            secure,
            server_name,
            method,
            path,
            query_string,
            websocket,
            root_path,
        )

    def bind(self, secure: bool, server_name: str) -> 'MapAdapter':
        return MapAdapter(self, secure, server_name)

    def iter_rules(self, endpoint: Optional[str] = None) -> Iterator['Rule']:
        if endpoint is not None:
            return iter(self.endpoints[endpoint])
        return iter(self.rules)
Beispiel #33
0
def create_palette(color_depth=8):
    """ Create palette of all colors for color_depth bit rate.  """
    palette = SortedListWithKey(load=1000, key=lambda c: c.avg)
    scale = (MAX_COLOR_DEPTH / 2**color_depth)

    for x in range(0, 2**color_depth):
        for y in range(0, 2**color_depth):
            for z in range(0, 2**color_depth):
                r = x*scale
                g = y*scale
                b = z*scale
                palette.add( Color(r=r, g=g, b=b, avg=int(avg([r,g,b]))) )

    return palette
Beispiel #34
0
 def __add__(self, other):
     assert isinstance(other, Set)
     if len(self.list) == 0 and len(other.list) == 0:
         return Set()
     elif len(self.list) == 0:
         return copy.copy(other)
     else:
         list = SortedListWithKey(key=self.list._key)
         list.update(copy.deepcopy(self.list))
         other_list_copy = copy.deepcopy(other.list)
         for x in other_list_copy:
             if x not in list:
                 list.add(x)
         return Set(list=list)
Beispiel #35
0
class ExamRoom:
    def __init__(self, N):
        self.N = N
        self.p = {-1: (-1, N)}
        self.q = {N: (-1, N)}
        # 如果距离相同,选序号最小的
        self.pq = SortedListWithKey([(-1, N)],
                                    key=lambda x:
                                    (self.distance(x[0], x[1]), -x[0]))

    def distance(self, a, b):
        if a == -1:
            return b
        if b == self.N:
            return self.N - 1 - a
        return (b - a) // 2

    def _remove(self, a, b):
        self.p.pop(a)
        self.q.pop(b)
        self.pq.remove((a, b))

    def _add(self, a, b):
        self.p[a] = (a, b)
        self.q[b] = (a, b)
        self.pq.add((a, b))

    def seat(self):
        a, b = self.pq[-1]
        if a == -1:
            p = 0
        elif b == self.N:
            p = self.N - 1
        else:
            p = (b - a) // 2 + a

        self._remove(a, b)
        self._add(a, p)
        self._add(p, b)
        return p

    def leave(self, p):
        la, lb = self.q[p]
        ra, rb = self.p[p]
        self._remove(la, lb)
        self._remove(ra, rb)
        self._add(la, rb)

    def reqp(self):
        return self.pq
 def neiborDistances(self, node):
     """Return distances to neighbor nodes (including self).
     
     Args:
         node (int): The node identifier from which to calculate distances.
         
     Returns:
         :obj:`SortedListWithKey` of :obj:`list` of int: List of [neighbor,
             distance] lists sorted in ascending order.
     """
     neighborDist = SortedListWithKey(key=lambda n: n[1])
     for n in self.__nodes.iterkeys():
         neighborDist.add([n, bin(node ^ n).count("1")])
     return neighborDist
Beispiel #37
0
    def resize(self, size=None):
        """
        remove edges (and their corresponding nodes, in case they become
        disconnected from the other components), until self.size() == size.
        if no specific size parameter is provided, self.max_size is used.
        Edges with highest distances are removed first.
        :param int size: size of graph after finishing
        """

        #: do nothing if no specific size is given
        if size is None and self.max_size is None:
            return
        elif size is None:
            size = self.max_size

        n_edges = self.number_of_edges()

        if n_edges <= size:
            return

        #: find out how many edges have to be removed in order to
        #: achieve size as final size after the operation
        size_diff = n_edges - size

        #: store removal candidates in sorted list, sorted by distance
        candidates = SortedListWithKey(key=lambda x: x[2])

        #: iterate over all edges and store candidates with highest
        #: distance value
        for node1, node2 in self.edges():
            dist = self.edge[node1][node2]['distance']
            if len(candidates) < size_diff:
                candidates.add((node1, node2, dist))
            elif candidates[0][2] < dist:
                # remove candidate with lowest distance
                del candidates[0]
                # and add new candidate instead
                candidates.add((node1, node2, dist))

        # remove candidates
        self.remove_edges_from(candidates)

        # get a set of nodes from list of tuples
        nodes = set([node for tup in candidates for node in tup[:2]])

        # now remove nodes
        for node in nodes:
            if len(self.edge[node]) == 0:
                self.remove_node(node)
def test_getitem():
    random.seed(0)
    slt = SortedListWithKey(load=17, key=negate)

    lst = list()

    for rpt in range(100):
        val = random.random()
        slt.add(val)
        lst.append(val)

    lst.sort(reverse=True)

    assert all(slt[idx] == lst[idx] for idx in range(100))
    assert all(slt[idx - 99] == lst[idx - 99] for idx in range(100))
def test_count():
    slt = SortedListWithKey(load=7, key=modulo, value_orderable=False)

    assert slt.count(0) == 0

    for iii in range(100):
        for jjj in range(iii):
            slt.add(iii)
    slt._check()

    for iii in range(100):
        assert slt.count(iii) == iii

    slt = SortedListWithKey(range(8), key=modulo, value_orderable=False)
    assert slt.count(9) == 0
def test_getitem():
    random.seed(0)
    slt = SortedListWithKey(load=17, key=negate)

    lst = list()

    for rpt in range(100):
        val = random.random()
        slt.add(val)
        lst.append(val)

    lst.sort(reverse=True)

    assert all(slt[idx] == lst[idx] for idx in range(100))
    assert all(slt[idx - 99] == lst[idx - 99] for idx in range(100))
class GD_PQ(object):
    def __init__(self, name = None, **kwargs):
        super(GD_PQ, self).__init__()
        if name:
            self.name = name
        else:
            self.name = self.__class__.__name__


        self.H = 0
#        self.nodes = SortedCollection(key=attrgetter('priority'))
        self.error_numer = 0
        self.error_denom = 1

        self.size_aware = False

        self.nodes = SortedListWithKey(key = attrgetter('priority'))
        self.time = 0

    def add_node(self, item, cost, size = 1):
        if self.size_aware and size != 1:
            cost = float(cost) / size

        new_node = PQNode(item, cost)
        new_node.priority = self.H + new_node.cost
        
#        self.nodes.insert_right(new_node)
        self.nodes.add(new_node)

        self.time += 1
        return new_node

    def touch(self, node):
        self.nodes.remove(node)
        self.update_priority(node)
#        self.nodes.insert_right(node) 
        self.nodes.add(node) 
        self.time += 1

    def update_priority(self, node):
        node.priority = self.H + node.cost

    def evict_node(self):
#        to_evict = self.nodes[0]
#        del self.nodes[0]
        to_evict = self.nodes.pop(0)
        self.H = to_evict.priority
        return to_evict
Beispiel #42
0
def sorted_iterable(iterable, key=None, buffer=100):
    """sorts an "almost sorted" (infinite) iterable

    :param iterable: iterable
    :param key: function used as sort key
    :param buffer: int size of buffer. elements to swap should not be further than that
    """
    key=key or identity
    from sortedcontainers import SortedListWithKey
    b=SortedListWithKey(key=key)
    for x in iterable:
        if buffer and len(b)>=buffer:
            res=b.pop(0)
            yield res
        b.add(x)
    for x in b: # this never happens if iterable is infinite
        yield x
Beispiel #43
0
def anysegmentsintersect(segments):
    """returns True or False

    :param segments: line segments
    :type segments: list of pairs of tuples representing endpoints
    :return: whether there are any intersections

    """

    l_endpoints = {seg[0]:seg[1] for seg in segments}
    r_endpoints = {seg[1]:seg[0] for seg in segments}
    if (len(l_endpoints) < len(segments)) or (len(r_endpoints) < len(segments)):
        return True

    endpoints = sorted(reduce(lambda xs,x:xs+[(x[0],0,x[0][1])]+[(x[1],1,x[1][1])],segments,[]))

    sweep = SortedListWithKey(endpoints[0], key=itemgetter(1))

    for e in endpoints[1:]:
        if e in l_endpoints and e in r_endpoints:
            return True
        elif e in l_endpoints:
            sweep.add(e)
            # well this is dumb
            ind = sweep.index(e)
            try:
                if segmentsintersect((e,l_endpoints[e]), (sweep[ind+1],l_endpoints[sweep[ind+1]])):
                    return True
            except IndexError:
                try:
                    if segmentsintersect((e,l_endpoints[e]), (sweep[ind-1],l_endpoints[sweep[ind-1]])):
                        return True
                except IndexError:
                    pass
        elif e in r_endpoints:
            # well this is dumb
            ind = sweep.index(e)
            try:
                if (segmentsintersect((e,l_endpoints[e]), (sweep[ind+1],l_endpoints[sweep[ind+1]])) and
                    segmentsintersect((e,l_endpoints[e]), (sweep[ind-1],l_endpoints[sweep[ind-1]]))):
                    return True
            except IndexError:
                pass
            del sweep[ind]
    return False
class TwitterNode(collections.Iterable):
    """Class representing a Twitter Node or Vertex on Graph."""

    def __init__(self, name, tweet):
        """initialize the node class.

        name:string
        tweet:Tweet
        """
        self.__nodename__ = name
        self.__tweets__ = SortedListWithKey(key=lambda d: d.created_at)
        self.add(tweet)

    @property
    def name(self):
        """get the name for the node."""
        return self.__nodename__

    @property
    def tweets(self):
        """get the tweets for node."""
        return list(self.__tweets__)

    def __iter__(self):
        """iterate through tweets."""
        return self.__tweets__

    def add(self, tweet):
        """add tweet for node."""
        self.__tweets__.add(tweet)

    def remove(self, tweet):
        """add remove for node."""
        self.__tweets__.remove(tweet)

    def __len__(self):
        """len of tweets for node."""
        return len(self.__tweets__)

    def __str__(self):
        """string representing node."""
        return "Name: %s Tweets: %s" % (self.name, self.tweets)
def test_getitem_slice():
    random.seed(0)
    slt = SortedListWithKey(load=17, key=modulo)

    lst = list()

    for rpt in range(100):
        val = random.random()
        slt.add(val)
        lst.append(val)

    lst.sort(key=modulo)

    assert all(slt[start:] == lst[start:]
               for start in [-75, -25, 0, 25, 75])

    assert all(slt[:stop] == lst[:stop]
               for stop in [-75, -25, 0, 25, 75])

    assert all(slt[::step] == lst[::step]
               for step in [-5, -1, 1, 5])

    assert all(slt[start:stop] == lst[start:stop]
               for start in [-75, -25, 0, 25, 75]
               for stop in [-75, -25, 0, 25, 75])

    assert all(slt[:stop:step] == lst[:stop:step]
               for stop in [-75, -25, 0, 25, 75]
               for step in [-5, -1, 1, 5])

    assert all(slt[start::step] == lst[start::step]
               for start in [-75, -25, 0, 25, 75]
               for step in [-5, -1, 1, 5])

    assert all(slt[start:stop:step] == lst[start:stop:step]
               for start in [-75, -25, 0, 25, 75]
               for stop in [-75, -25, 0, 25, 75]
               for step in [-5, -1, 1, 5])
def test_add():
    random.seed(0)
    slt = SortedListWithKey(key=modulo)
    for val in range(1000):
        slt.add(val)
    slt._check()

    slt = SortedListWithKey(key=modulo)
    for val in range(1000, 0, -1):
        slt.add(val)
    slt._check()

    slt = SortedListWithKey(key=modulo)
    for val in range(1000):
        slt.add(random.random())
    slt._check()
Beispiel #47
0
class Set():

    """
    Constructor, to either pass generators or a already existing list containing the data (mostly used internally).
    Attention: If a set is passed, the reference to the internal member list.
    """
    def __init__(self, *args, list=None, keep_generators=False):
        if list is not None:
            assert isinstance(list, SortedListWithKey)
            self.list = list
            self.has_key = True
        else:
            self.list = SortedListWithKey()
            self.has_key = False

        for arg in args:
            if isinstance(arg, types.GeneratorType) \
                    and not keep_generators:
                for x in arg:
                    self._put(x)
            else:
                self._put(arg)

    """
    Overrides the add operator. Creates a new set not referencing the added sets.
    """
    def __add__(self, other):
        assert isinstance(other, Set)
        if len(self.list) == 0 and len(other.list) == 0:
            return Set()
        elif len(self.list) == 0:
            return copy.copy(other)
        else:
            list = SortedListWithKey(key=self.list._key)
            list.update(copy.deepcopy(self.list))
            other_list_copy = copy.deepcopy(other.list)
            for x in other_list_copy:
                if x not in list:
                    list.add(x)
            return Set(list=list)

    """
    Overrides the += operator. Adds all elements of an other set to the current set.
    """
    def __iadd__(self, other):
        assert isinstance(other, Set)
        for x in other.list:
            self._put(x)
        return self

    """
    Overrides the substraction operator. Removes every element from the current set, that is in the other set.
    """
    def __sub__(self, other):
        assert isinstance(other, Set)
        if len(self.list) == 0:
            return Set()
        list = SortedListWithKey(key=self.list._key)
        for x in self.list:
            if x not in other.list:
                list.add(x)
        s = Set(list=list)
        return s

    """
    Overrides the multiplication operator. Creates a new set only containing elements existing in both sets.
    """
    def __mul__(self, other):
        assert isinstance(other, Set)
        if len(self.list) == 0:
            return Set()
        list = SortedListWithKey(key=self.list._key)
        for x in self.list:
            if x in other.list:
                list.add(x)
        s = Set(list=list)
        return s

    """
    Method to represent the current set as string.
    """
    def __str__(self):
        return '{ %s }' % ', '.join([str(x) for x in self.list])

    """
    Overrides the pow operator (set ** 2). Returns the powerset.
    """
    def __pow__(self, other):
        return self.__rpow__(other)

    """
    Overrides the pow operator (2 ** set). Returns the powerset.
    """
    def __rpow__(self, other):
        assert isinstance(other, int)
        if other != 2:
            raise Exception('Lefthandside is not 2. To generate the powerset use "2 ** myset"')

        from copy import deepcopy
        copied_set = deepcopy(self)

        return Set.power(copied_set)

    """
    Overrides the modulo operator.
    """
    def __mod__(self, other):
        assert isinstance(other, Set)
        return (self - other) + (other - self)

    """
    Returns the iterator of the internal list.
    """
    def __iter__(self):
        return self.list.__iter__()

    """
    Returns the number of elements contained in the set.
    """
    def __len__(self):
        return len(self.list)

    """
    Overrides the lower than equal operator, indicating if a set is contained in an other set or both sets are equal.
    """
    def __lt__(self, other):
        assert isinstance(other, Set)
        if len(self.list) >= len(other.list):
            return False
        return all(x in other.list for x in self.list)

    """
    Overrides the greater than equal operator, indicating if a set contains an other set or both sets are equal.
    """
    def __gt__(self, other):
        assert isinstance(other, Set)
        if len(self.list) <= len(other.list):
            return False
        return all(x in self.list for x in other.list)

    """
    Overrides the greater than operator indicating if a set contains an other set but is not the same.
    """
    def __ge__(self, other):
        assert isinstance(other, Set)
        for x in other.list:
            if x not in self.list:
                return False
        return True

    """
    Overrides the lower than operator indicating if this set is contained in an other set but is not the smae.
    """
    def __le__(self, other):
        assert isinstance(other, Set)
        for x in self.list:
            if x not in other.list:
                return False
        return True

    """
    Overrides the equals operator. Indicates if two sets contain the same elements.
    """
    def __eq__(self, other):
        assert isinstance(other, Set)
        if len(self.list) != len(other.list):
            return False
        for x in self.list:
            if x not in other.list:
                return False
        for x in other.list:
            if x not in self.list:
                return False
        return True

    """
    Overrides the not equals operator. Indicates if two sets are not equal.
    """
    def __ne__(self, other):
        assert isinstance(other, Set)
        return not self == other

    """
    Overrides the in operator, indicates if the set contains the element.
    """
    def __contains__(self, other):
        return other in self.list

    """
    Returns the __getitem__ method of the internal list, to support array slicing.
    """
    def __getitem__(self, key):
        return self.list.__getitem__(key)

    """
    Returns the cartesian product of the current set.
    """
    def cartesian_product(self, other):
        assert isinstance(other, Set)
        s = Set()
        for x in self.list:
            for y in other.list:
                s.list.add((x, y))
        return s

    """
    Creates a power set from a set.
    """
    @staticmethod
    def power(s):
        if len(s) == 0:
            return Set(Set())
        x = s.pop()
        y = Set.power(s)
        z = Set(m + Set(x) for m in y)
        return y + z

    """
    Returns a arbitary element from set.
    """
    def arb(self):
        return self.list[-1] if len(self.list) % 2 == 0 else self.list[0]

    """
    Returns a random element from set.
    """
    def rnd(self):
        return self.list[random.randrange(0, len(self.list))]

    """
    Adds an element to the current set.
    """
    def put(self, other):
        self._put(other)

    """
    Internal _put method to keep track of the added element. To define if the current set contains sets.
    """
    def _put(self, other):
        if self.has_key is False:
            self.has_key = True
            if isinstance(other, Set):
                self.list._key = gen_set_key
        if other not in self.list:
            self.list.add(other)

    """
    Returns the last element of the set.
    """
    def peek(self):
        return self.list[-1]

    """
    Returns the last element of the set and removes it.
    """
    def pop(self):
        x = self.list.pop()
        return x

    """
    Returns the sum of all elements inside the sets. Using the + / += operators.
    """
    def sum(self):
        import copy
        temp = None
        for x in self.list:
            if temp is None:
                temp = copy.deepcopy(x)
            else:
                temp += x
        return temp
def test_copy():
    slt = SortedListWithKey(range(100), load=7, key=negate)
    two = slt.copy()
    slt.add(100)
    assert len(slt) == 101
    assert len(two) == 100
Beispiel #49
0
new_indices = [ (1,0), (0,1) ]

largest_33_found = None
largest_index_found = None

TO_FIND = 3

for n in itertools.count(2):

	## openings.pop()
	## openings_values.pop()
	## openings_indices.pop()
	combined.pop()

	for new_opening, new_opening_index in zip(new_cands, new_indices):
		combined.add([new_opening, solve(new_opening)[0], new_opening_index])
		### value = solve(new_opening)[0]

		### insertion_point = bisect.bisect_left(openings_values, value)
		### insertion_point = combined.bisect_left(value)

		### openings.insert(insertion_point, new_opening)
		### openings_values.insert(insertion_point, value)
		### openings_indices.insert(insertion_point, new_opening_index)

	# opening = openings[-1]
	opening = combined[-1][0]
	ind = combined[-1][2]
	solved = solve(opening)
	new_selection = solved[0]
	new_cands = solved[1]
Beispiel #50
0
def export_hit_summary(multiClusterDict,outfile,hitDictID,maxJump = 100,minClusterSize = 2, hitsToConsider = set(),
                       hitsToIgnore = set(),writeFile=False):
    # First Unpack Cluster Analysis to Filter Eligible Clusters
    filtered_clusters = SortedListWithKey(key=lambda x: -clusterHits)
    for (species,species_clusters) in multiClusterDict.items():
        for cluster in species_clusters:
            clusterHits = len(cluster)
            if hitsToConsider:
                clusterHits = sum([1 for protein in cluster if len(hitsToConsider & protein.hit_dict[hitDictID].hits) > 0]) - \
                    sum([1 for protein in cluster if len(hitsToIgnore & protein.hit_dict[hitDictID].hits) > 0])
            else:
                clusterHits = len(cluster) - \
                    sum([1 for protein in cluster if len(hitsToIgnore & protein.hit_dict[hitDictID].hits) > 0])
            # Check size first
            if clusterHits >= minClusterSize:
                proteinIdx = [protein.idx for protein in cluster]
                proteinIdxDiff = [abs(j-i) for i,j in zip(proteinIdx, proteinIdx[1:])]
                proteinMaxJump = max(proteinIdxDiff)
                proteinMaxIdx = proteinIdxDiff.index(proteinMaxJump)
                # if the max gap happens at the start or end of the cluster see if removing that protein will have
                # it fit the threshhold
                while clusterHits > minClusterSize and (max(proteinIdxDiff) > maxJump) and \
                        (proteinMaxIdx in [0,len(proteinIdxDiff)-1]):

                    if proteinMaxIdx == 0:
                        cluster.pop(proteinMaxIdx)
                    else:
                        cluster.pop(proteinMaxIdx+1)

                    if hitsToConsider:
                        clusterHits = sum([1 for protein in cluster if len(hitsToConsider & protein.hit_dict[hitDictID].hits) > 0]) - \
                                    sum([1 for protein in cluster if len(hitsToIgnore & protein.hit_dict[hitDictID].hits) > 0])
                    else:
                        clusterHits = len(cluster) - \
                                            sum([1 for protein in cluster if len(hitsToIgnore & protein.hit_dict[hitDictID].hits) > 0])

                    proteinIdx = [protein.idx for protein in cluster]
                    proteinIdxDiff = [abs(j-i) for i,j in zip(proteinIdx, proteinIdx[1:])]
                    proteinMaxJump = max(proteinIdxDiff)
                    proteinMaxIdx = proteinIdxDiff.index(proteinMaxJump)

                if max(proteinIdxDiff) < maxJump:
                    if hitsToConsider:
                        clusterHits = sum([1 for protein in cluster if len(hitsToConsider & protein.hit_dict[hitDictID].hits) > 0]) - \
                                    sum([1 for protein in cluster if len(hitsToIgnore & protein.hit_dict[hitDictID].hits) > 0])
                    else:
                        clusterHits = len(cluster) - \
                                            sum([1 for protein in cluster if len(hitsToIgnore & protein.hit_dict[hitDictID].hits) > 0])
                    print(cluster)
                    filtered_clusters.add(cluster)
    print("Found %i clusters" % len(filtered_clusters))
    if writeFile:
        with open(outfile,'w') as outHandle:
            outHandle.write('Protein Hit\tProtein Idx\tCluster Hit\n')
            ctr = 1
            for cluster in filtered_clusters:
                outHandle.write('# Species: %s\tNumber Proteins: %i\tDNA Size: %i\n' % (cluster[0].species,len(cluster),cluster.size()))
                outHandle.write('# Protein Hit\tProtein Idx\tCluster Hit\n')
                for protein in cluster:
                    if len(hitsToIgnore & protein.hit_dict[hitDictID].hits) >= 1:
                        outHandle.write('%s\t%i\t##%s\n'% (protein.name,protein.idx,list(protein.hit_dict[hitDictID].hits & hitsToIgnore)[0][0]))
                    elif len(hitsToConsider & protein.hit_dict[hitDictID].hits) >= 1:
                        outHandle.write('%s\t%i\t**%s\n'% (protein.name,protein.idx,list(protein.hit_dict[hitDictID].hits & hitsToConsider)[0][0]))
                    elif len(protein.hit_dict[hitDictID].hits) >= 1:
                            outHandle.write('%s\t%i\t%s\n'% (protein.name,protein.idx,list(protein.hit_dict[hitDictID].hits)[0][0]))
                    else:
                        outHandle.write('%s\t%i\tNo Hits\n'% (protein.name,protein.idx))

    return filtered_clusters
Beispiel #51
0
	# openings.pop()
	# openings_values.pop()
	# openings_indices.pop()
	combined.pop()

	for new_opening, new_opening_index in zip(new_cands, new_indices):
		value = solve(new_opening)[0]

		## insertion_point = bisect.bisect_left(openings_values, value)
		### insertion_point = openings_values.bisect_left(value)

		# openings_values.insert(insertion_point, value)
		# openings_indices.insert(insertion_point, new_opening_index)

		combined.add( [new_opening, value, new_opening_index] )

	index = combined[-1][2]

	opening = combined[-1][0]
	solved = solve(opening)
	new_selection = solved[0]
	new_cands = solved[1]
	new_indices = (( (index[0] + 1, index[1] + 0), (index[0] + 0, index[1] + 1) ))



	## if largest_index_found is None or index_replaced > largest_index_found:
	## 	largest_index_found = index_replaced

	if openings_indices[-1] == (TO_FIND, TO_FIND):
def test_len():
    slt = SortedListWithKey(key=modulo, value_orderable=False)

    for val in range(10000):
        slt.add(val)
        assert len(slt) == (val + 1)
    def _retrieve_raw_observations(self):
        self._log.info("\tRetrieving raw observations...")
        raw_obs_sheets = self._get_raw_obs_sheets()

        for raw_obs_sheet in raw_obs_sheets:  # Per year
            sheet_year = re.match(self._config.get("RAW_OBSERVATIONS", "SHEET_NAME_PATTERN"),
                                  raw_obs_sheet.name).group("year")
            empty_row_error_cache = {}
            year_column = get_column_number(self._config_get("RAW_OBSERVATIONS", "OBSERVATION_YEAR_COLUMN", sheet_year))
            iso3_column = get_column_number(self._config_get("RAW_OBSERVATIONS", "OBSERVATION_ISO3_COLUMN", sheet_year))
            observation_name_row = self._config_getint("RAW_OBSERVATIONS", "OBSERVATION_NAME_ROW", sheet_year)
            observation_start_row = self._config_getint("RAW_OBSERVATIONS", "OBSERVATION_START_ROW", sheet_year)
            observation_start_column = get_column_number(
                self._config_get("RAW_OBSERVATIONS", "OBSERVATION_START_COLUMN", sheet_year))
            check_column = get_column_number(
                self._config_get("RAW_OBSERVATIONS", "OBSERVATION_CHECK_COLUMN", sheet_year))

            for column_number in range(observation_start_column, raw_obs_sheet.ncols):  # Per indicator
                # Maintain sorted list with elements sorted by value
                # Elements are tuples of the form (ExcelObservation, Area, Indicator)
                # We're using tuples just to avoid some additional round trips to the db in order to get area and indicator
                per_indicator_observations = SortedListWithKey(
                    key=lambda x: x[0].value if x[0].value is not None and na_to_none(x[0].value) is not None else 0)
                # HACK: Curate data by stripping year
                indicator_code_retrieved = raw_obs_sheet.cell(observation_name_row, column_number).value
                if len(indicator_code_retrieved.split()) > 1:
                    self._log.debug('Indicator %s in had to be stripped of year while parsing %s',
                                    indicator_code_retrieved, raw_obs_sheet.name)
                try:
                    indicator_code = indicator_code_retrieved.split()[0]
                except IndexError:
                    self._log.warn(
                        'Wrong Indicator name %s while parsing %s[%s], skipping column' % (
                            indicator_code_retrieved, raw_obs_sheet.name, colname(column_number)))
                    continue

                try:
                    indicator = self._indicator_repo.find_indicator_by_code(indicator_code)
                except IndicatorRepositoryError:
                    self._log.warn(
                        "No indicator with code %s found while parsing %s" % (indicator_code, raw_obs_sheet.name))
                    indicator = create_indicator(indicator=indicator_code)  # Orphan indicator

                for row_number in range(observation_start_row, raw_obs_sheet.nrows):  # Per country
                    if not raw_obs_sheet.cell(row_number, check_column).value or row_number in empty_row_error_cache:
                        if row_number not in empty_row_error_cache:
                            self._log.debug(
                                "Skipping row while parsing %s[%s] (did not detect value on check column, additional errors regarding this row will be omitted)" % (
                                    raw_obs_sheet.name, row_number))
                        empty_row_error_cache[row_number] = True
                        continue
                    try:
                        year = int(raw_obs_sheet.cell(row_number, year_column).value)
                        iso3 = raw_obs_sheet.cell(row_number, iso3_column).value
                        area = self._area_repo.find_by_iso3(iso3)
                        value_retrieved = raw_obs_sheet.cell(row_number, column_number).value
                        value = na_to_none(value_retrieved)
                        excel_observation = ExcelObservation(iso3=iso3, indicator_code=indicator_code, value=value,
                                                             year=year)
                        per_indicator_observations.add((excel_observation, area, indicator))
                    except AreaRepositoryError:
                        self._log.error("No area found with code %s for indicator %s while parsing %s" % (
                            iso3, indicator_code, raw_obs_sheet.name))
                    except:
                        self._log.error("Unexpected error parsing %s[%s]" % (raw_obs_sheet.name, row_number))

                self._update_observation_ranking(per_indicator_observations, observation_getter=lambda x: x[0])
                self._excel_raw_observations.extend(per_indicator_observations)
Beispiel #54
0
class Schedule:
    """A quantum program with operations happening at specific times.

    Supports schedule[time] point lookups and
        schedule[inclusive_start_time:exclusive_end_time] slice lookups.


    Attributes:
        device: The hardware this will schedule on.
        scheduled_operations: A SortedListWithKey containing the
            ScheduledOperations for this schedule. The key is the start time
            of the ScheduledOperation.
    """

    def __init__(self,
            device: Device,
            scheduled_operations: Iterable[ScheduledOperation] = ()
            ) -> None:
        """Initializes a new schedule.

        Args:
            device: The hardware this schedule will run on.
            scheduled_operations: Initial list of operations to apply. These
                will be moved into a sorted list, with a key equal to each
                operation's start time.
        """
        self.device = device
        self.scheduled_operations = SortedListWithKey(scheduled_operations,
                                                      key=lambda e: e.time)
        self._max_duration = max(
            [e.duration for e in self.scheduled_operations] or [Duration()])

    def __eq__(self, other):
        if not isinstance(other, Schedule):
            return NotImplemented
        return self.scheduled_operations == other.scheduled_operations

    def __ne__(self, other):
        return not self == other

    __hash__ = None  # type: ignore

    def query(self, *,  # Forces keyword args.
              time: Timestamp,
              duration: Duration = Duration(),
              qubits: Iterable[QubitId] = None,
              include_query_end_time=False,
              include_op_end_times=False) -> List[ScheduledOperation]:
        """Finds operations by time and qubit.

        Args:
            time: Operations must end after this time to be returned.
            duration: Operations must start by time+duration to be
                returned.
            qubits: If specified, only operations touching one of the included
                qubits will be returned.
            include_query_end_time: Determines if the query interval includes
                its end time. Defaults to no.
            include_op_end_times: Determines if the scheduled operation
                intervals include their end times or not. Defaults to no.

        Returns:
            A list of scheduled operations meeting the specified conditions.
        """
        earliest_time = time - self._max_duration
        end_time = time + duration
        qubits = None if qubits is None else frozenset(qubits)

        def overlaps_interval(op):
            if not include_op_end_times and op.time + op.duration == time:
                return False
            if not include_query_end_time and op.time == end_time:
                return False
            return op.time + op.duration >= time and op.time <= end_time

        def overlaps_qubits(op):
            if qubits is None:
                return True
            return not qubits.isdisjoint(op.operation.qubits)

        potential_matches = self.scheduled_operations.irange_key(earliest_time,
                                                                 end_time)
        return [op
                for op in potential_matches
                if overlaps_interval(op) and overlaps_qubits(op)]

    def __getitem__(self, item: Union[Timestamp, slice]):
        """Finds operations overlapping a given time or time slice.

        Args:
            item: Either a Timestamp or a slice containing start and stop
                Timestamps.

        Returns:
            The scheduled operations that occurs during the given time.
        """
        if isinstance(item, slice):
            if item.step:
                raise ValueError('Step not supported.')
            start = cast(Timestamp, item.start)
            stop = cast(Timestamp, item.stop)
            return self.query(time=start, duration=stop - start)
        return self.query(time=item, include_query_end_time=True)

    def operations_happening_at_same_time_as(
        self, scheduled_operation: ScheduledOperation
    ) -> List[ScheduledOperation]:
        """Finds operations happening at the same time as the given operation.

        Args:
            scheduled_operation: The operation specifying the time to query.

        Returns:
            Scheduled operations that overlap with the given operation.
        """
        overlaps = self.query(
            time=scheduled_operation.time,
            duration=scheduled_operation.duration)
        return [e for e in overlaps if e != scheduled_operation]

    def include(self, scheduled_operation: ScheduledOperation):
        """Adds a scheduled operation to the schedule.

        Args:
            scheduled_operation: The operation to add.

        Raises:
            ValueError:
                The operation collided with something already in the schedule.
        """
        collisions = self.query(time=scheduled_operation.time,
                                duration=scheduled_operation.duration,
                                qubits=scheduled_operation.operation.qubits)
        if collisions:
            raise ValueError('Operation {} has collisions: {}'.format(
                scheduled_operation.operation, collisions))
        self.scheduled_operations.add(scheduled_operation)
        self._max_duration = max(self._max_duration,
                                 scheduled_operation.duration)

    def exclude(self, scheduled_operation: ScheduledOperation) -> bool:
        """Omits a scheduled operation from the schedule, if present.

        Args:
            scheduled_operation: The operation to try to remove.

        Returns:
            True if the operation was present and is now removed, False if it
            was already not present.
        """
        try:
            self.scheduled_operations.remove(scheduled_operation)
            return True
        except ValueError:
            return False

    def to_circuit(self) -> Circuit:
        """Convert the schedule to a circuit.

        This discards most timing information from the schedule, but does place
        operations that are scheduled at the same time in the same Moment.
        """
        circuit = Circuit()
        ops = []  # type: List[Operation]
        time = None  # type: Optional[Timestamp]
        for so in self.scheduled_operations:
            if so.time != time:
                circuit.append(ops)
                ops = [so.operation]
                time = so.time
            else:
                ops.append(so.operation)
        circuit.append(ops)
        return circuit
Beispiel #55
0
class StreamHist(object):
    """A StreamHist implementation."""

    def __init__(self, maxbins=64, weighted=False, freeze=None):
        """Create a Histogram with a max of n bins."""
        super(StreamHist, self).__init__()
        # self.bins = []
        self.bins = SortedListWithKey(key=lambda b: b.value)
        self.maxbins = maxbins  # A useful property
        self.total = 0
        self.weighted = weighted
        self._min = None   # A useful property
        self._max = None   # A useful property
        self.freeze = freeze
        self.missing_count = 0

    def update(self, n, count=1):
        """Add a point to the histogram."""
        if n is None:
            # We simply keep a count of the number of missing values
            self.missing_count += count
            return self
        if isinstance(n, iterator_types):
            # Shortcut for updating a histogram with an iterable
            # This works for anything that supports iteration, including
            # file-like objects and readers
            # This also means that nested lists (and similar structures) will
            # be 'unpacked' and added to the histogram 'automatically'
            for p in n:
                self.update(p, count)  # Count is assumed to apply for all
        else:
            self.insert(n, count)
        return self.trim()

    def insert(self, n, count):
        """Inserts a point to the histogram.

        This method implements Steps 1-4 from Algorithm 1 (Update) in ref [1].

        Notes
        -----
        It is better to use `update` when inserting data into the histogram,
        as `insert` does not automatically update the total point count, or
        call `trim` after the insertion. For large batches of inserts, insert
        may be more efficient, but you are responsible for updating counts
        and trimming the bins 'manually'.

        Examples
        --------
        >>> # Using insert
        >>> h = StreamHist().insert(1).insert(2).insert(3)
        >>> h.update_total(3)
        >>> h.trim()

        >>> # Using update
        >>> h = StreamHist().update([1, 2, 3])
        """
        self.update_total(count)
        if self._min is None or self._min > n:
            self._min = n
        if self._max is None or self._max < n:
            self._max = n
        b = Bin(value=n, count=count)
        if b in self.bins:
            index = self.bins.index(b)
            self.bins[index].count += count
        else:
            if self.freeze is not None and self.total >= self.freeze:
                index = self.bins.bisect(Bin(n, count))
                if index:
                    prev_dist = n - self.bins[index-1].value
                else:
                    prev_dist = sys.float_info.max
                if index and index < len(self.bins):
                    next_dist = self.bins[index].value - n
                else:
                    next_dist = sys.float_info.max
                if prev_dist < next_dist:
                    self.bins[index-1].count += count
                else:
                    self.bins[index].count += count
            else:
                self.bins.add(b)

    def cdf(self, x):
        """Return the value of the cumulative distribution function at x."""
        return self.sum(x) / self.total

    def pdf(self, x):
        """Return the value of the probability density function at x."""
        return self.density(x) / self.total

    def bounds(self):
        """Return the upper (max( and lower (min) bounds of the distribution."""
        if len(self):
            return (self._min, self._max)
        return (None, None)

    def count(self):
        """Return the number of bins in this histogram."""
        return self.total

    def median(self):
        """Return a median for the points inserted into the histogram.

        This will be the true median whenever the histogram has less than
        the maximum number of bins, otherwise it will be an approximation.
        """
        if self.total == 0:
            return None
        if len(self.bins) >= self.maxbins:
            # Return the approximate median
            return self.quantiles(0.5)[0]
        else:
            # Return the 'exact' median when possible
            mid = (self.total)/2
            if self.total % 2 == 0:
                return (self.bins[mid-1] + self.bins[mid]).value
            return self.bins[mid].value

    def mean(self):
        """Return the sample mean of the distribution."""
        if self.total == 0:
            return None
        s = 0.0  # Sum
        for b in self.bins:
            s += b.value * b.count
        return s / float(self.total)

    def var(self):
        """Return the variance of the distribution."""
        if self.total < 2:
            return None
        s = 0.0
        m = self.mean()  # Mean
        for b in self.bins:
            s += (b.count * (b.value - m)**2)
        return s / float(self.total)

    def min(self):
        """Return the minimum value in the histogram."""
        return self._min

    def max(self):
        """Return the maximum value in the histogram."""
        return self._max

    def trim(self):
        """Merge adjacent bins to decrease bin count to the maximum value.

        This method implements Steps 5-6 from Algorithm 1 (Update) in ref [1].
        """
        while len(self.bins) > self.maxbins:
            index = argmin(bin_diff(self.bins, self.weighted))
            prv = self.bins.pop(index)
            self.bins[index] += prv
        return self

    def scale_down(self, exclude):
        pass  # By default, we do nothing

    def __str__(self):
        """Return a string reprentation of the histogram."""
        if len(self.bins):
            string = "Mean\tCount\n----\t-----\n"
            for b in self.bins:
                string += "%d\t%i\n" % (b.value, b.count)
            string += "----\t-----\n"
            string += "Missing values: %s\n" % self.missing_count
            string += "Total count: %s" % self.total
            return string
        return "Empty histogram"

    def to_dict(self):
        """Return a dictionary representation of the histogram."""
        bins = list()
        for b in self.bins:
            bins.append({"mean": b.value, "count": b.count})
        info = dict(missing_count=self.missing_count,
                    maxbins=self.maxbins,
                    weighted=self.weighted,
                    freeze=self.freeze)
        return dict(bins=bins, info=info)

    @classmethod
    def from_dict(cls, d):
        """Create a StreaHist object from a dictionary representation.

        The dictionary must be in the format given my `to_dict`. This class
        method, combined with the `to_dict` instance method, can facilitate
        communicating StreamHist objects across processes or networks.
        """
        info = d["info"]
        bins = d["bins"]
        hist = cls(info["maxbins"], info["weighted"], info["freeze"])
        hist.missing_count = info["missing_count"]
        for b in bins:
            count = b["count"]
            value = b["mean"]
            hist.bins.append(Bin(value, count))
        return hist

    def __len__(self):
        """Return the number of bins in this histogram."""
        return len(self.bins)

    def update_total(self, size=1):
        """Update the internally-stored total number of points."""
        self.total += size

    def __add__(self, other):
        """Merge two StreamHist objects into one."""
        res = self.copy()
        return res.merge(other)

    def __iadd__(self, other):
        """Merge another StreamHist object into this one."""
        return self.merge(other)

    def __radd__(self, other):
        """Reverse merge two objects.

        This is useful for merging a list of histograms via sum or similar.
        """
        return self + other

    def merge(self, other, size=None):
        """Merge another StreamHist object into this one.

        This method implements Algorithm 2 (Merge) in ref [1].
        """
        if other == 0:   # Probably using sum here...
            return self  # This is a little hacky...
        for b in other.bins:
            self.bins.add(b)
        self.total += other.total
        if size is not None:
            self.maxbins = size
        self.trim()
        if self._min is None:
            self._min = other._min
        else:
            if other._min is not None:
                self._min = min(self._min, other._min)
        if self._max is None:
            self._max = other._max
        else:
            if other._max is not None:
                self._max = max(self._max, other._max)
        self.missing_count += other.missing_count
        return self

    def copy(self):
        """Make a deep copy of this histogram."""
        res = type(self)(int(self.maxbins), bool(self.weighted))
        res.bins = self.bins.copy()
        res._min = float(self._min) if self._min is not None else None
        res._max = float(self._max) if self._max is not None else None
        res.total = int(self.total)
        res.missing_count = int(self.missing_count)
        res.freeze = int(self.freeze) if self.freeze is not None else None
        return res

    def describe(self, quantiles=[0.25, 0.50, 0.75]):
        """Generate various summary statistics."""
        data = [self.count(), self.mean(), self.var(), self.min()]
        data += self.quantiles(*quantiles) + [self.max()]
        names = ["count", "mean", "var", "min"]
        names += ["%i%%" % round(q*100., 0) for q in quantiles] + ["max"]
        return dict(zip(names, data))

    def compute_breaks(self, n=50):
        """Return output like that of numpy.histogram."""
        last = 0.0
        counts = []
        bounds = linspace(*self.bounds(), num=n)
        for e in bounds[1:]:
            new = self.sum(e)
            counts.append(new-last)
            last = new
        return counts, bounds

    def print_breaks(self, num=50):
        """Print a string reprentation of the histogram."""
        string = ""
        for c, b in zip(*self.compute_breaks(num)):
            bar = str()
            for i in range(int(c/float(self.total)*200)):
                bar += "."
            string += str(b) + "\t" + bar + "\n"
        print(string)

    def sum(self, x):
        """Return the estimated number of points in the interval [−∞, b]."""
        x = float(x)
        if x < self._min:
            ss = 0.0  # Sum is zero!
        elif x >= self._max:
            ss = float(self.total)
        elif x == self.bins[-1].value:
            # Shortcut for when i == max bin (see Steps 3-6)
            last = self.bins[-1]
            ss = float(self.total) - (float(last.count) / 2.0)
        # elif x <= self.bins[0].value:
        #     # Shortcut for when i == min bin (see Steps 3-6)
        #     first = self.bins[0]
        #     ss = float(first.count) / 2.0
        else:
            bin_i = self.floor(x)
            if bin_i is None:
                bin_i = Bin(value=self._min, count=0)
            bin_i1 = self.higher(x)
            if bin_i1 is None:
                bin_i1 = Bin(value=self._max, count=0)
            if bin_i.value == self._min:
                prev_sum = self.bins[0].count / 2.0
            else:
                temp = bin_sums(self.bins, less=x)
                if len(temp):
                    prev_sum = sum(temp)
                else:
                    prev_sum = 0.0
            ss = _compute_sum(x, bin_i, bin_i1, prev_sum)
        return ss

    def density(self, p):
        p = float(p)
        if p < self._min or p > self._max:
            dd = 0.0
        elif p == self._min and p == self._max:
            dd = float('inf')
        elif Bin(value=p, count=0) in self.bins:
            high = next_after(p, float("inf"))
            low = next_after(p, -float("inf"))
            dd = (self.density(low) + self.density(high)) / 2.0
        else:
            bin_i = self.lower(p)
            if bin_i is None:
                bin_i = Bin(value=self._min, count=0)
            bin_i1 = self.higher(p)
            if bin_i1 is None:
                bin_i1 = Bin(value=self._max, count=0)
            dd = _compute_density(p, bin_i, bin_i1)
        return dd

    def quantiles(self, *quantiles):
        """Return the estimated data value for the given quantile(s).

        The requested quantile(s) must be between 0 and 1. Note that even if a
        single quantile is input, a list is always returned.
        """
        temp = bin_sums(self.bins)
        sums = list(accumulate(temp))
        result = []
        for x in quantiles:
            target_sum = x * self.total
            if x <= 0:
                qq = self._min
            elif x >= self.total:
                qq = self._max
            else:
                index = bisect_left(sums, target_sum)
                bin_i = self.bins[index]
                if index < len(sums):
                    bin_i1 = self.bins[index+1]
                else:
                    bin_i1 = self.bins[index]
                if index:
                    prev_sum = sums[index-1]
                else:
                    prev_sum = 0.0
                qq = _compute_quantile(target_sum, bin_i, bin_i1, prev_sum+1)
            result.append(qq)
        return result

    def floor(self, p):
        hbin = Bin(p, 0)
        index = self.bins.bisect_left(hbin)
        if hbin not in self.bins:
            index -= 1
        return self.bins[index] if index >= 0 else None

    def ceiling(self, p):
        hbin = Bin(p, 0)
        index = self.bins.bisect_right(hbin)
        if hbin in self.bins:
            index -= 1
        return self.bins[index] if index < len(self.bins) else None

    def lower(self, p):
        index = self.bins.bisect_left(Bin(p, 0)) - 1
        return self.bins[index] if index >= 0 else None

    def higher(self, p):
        index = self.bins.bisect_right(Bin(p, 0))
        return self.bins[index] if index < len(self.bins) else None
 def _trie_search(self, word, d, transducer=None,
                  allow_spaces=True, return_cost=True):
     """
     Находит все слова в префиксном боре, расстояние до которых
     в соответствии с заданным преобразователем не превышает d
     """
     if transducer is None:
         # разобраться с пробелами
         transducer = self.transducer.inverse()
     allow_spaces &= self.allow_spaces
     trie = self.dictionary
     #  инициализация переменных
     used_agenda_keys = set()
     agenda = SortedListWithKey(key=(lambda x:x[1]))
     h = self.h_func(word, trie.root)
     # agenda[self.agenda_key("", 0, trie.root)] = (0.0, 0.0, h)
     key, value = ("", 0, trie.root), (0.0, 0.0, h)
     agenda.add((key, value))
     answer = dict()
     k = 0
     # очередь с приоритетом с промежуточными результатами
     while len(agenda) > 0:
         key, value = agenda.pop(0)
         if key in used_agenda_keys:
             continue
         used_agenda_keys.add(key)
         low, pos, index = key
         cost, g, h = value
         # g --- текущая стоимость, h --- нижняя оценка будущей стоимости
         # cost = g + h --- нижняя оценка суммарной стоимости
         k += 1
         max_upperside_length = min(len(word) - pos, transducer.max_up_length)
         for upperside_length in range(max_upperside_length + 1):
             new_pos = pos + upperside_length
             curr_up = word[pos: new_pos]
             if curr_up not in transducer.operation_costs:
                 continue
             for curr_low, curr_cost in transducer.operation_costs[curr_up].items():
                 new_g = g + curr_cost
                 if new_g > d:  #если g > d, то h можно не вычислять
                     continue
                 if curr_low == " ":
                     if allow_spaces and trie.is_final(index):
                         new_index = trie.root
                     else:
                         new_index = Trie.NO_NODE
                 else:
                     new_index = trie.descend(index, curr_low)
                 if new_index is Trie.NO_NODE:
                     continue
                 new_low = low + curr_low
                 new_h = self.h_func(word[new_pos: ], new_index)
                 new_cost = new_g + new_h
                 if new_cost > d:
                     continue
                 new_key = (new_low, new_pos, new_index)
                 new_value = (new_cost, new_g, new_h)
                 if new_pos == len(word) and trie.is_final(new_index):
                     old_g = answer.get(new_low, None)
                     if old_g is None or new_g < old_g:
                         answer[new_low] = new_g
                 agenda.add((new_key, new_value))
     answer = sorted(answer.items(), key=(lambda x: x[1]))
     if return_cost:
         return answer
     else:
         return [elem[0] for elem in answer]
def test_len():
    slt = SortedListWithKey(key=modulo)

    for val in range(10000):
        slt.add(val)
        assert len(slt) == (val + 1)
def test_copy():
    slt = SortedListWithKey(range(100), load=7, key=modulo, value_orderable=False)
    two = slt.copy()
    slt.add(100)
    assert len(slt) == 101
    assert len(two) == 100
    def _retrieve_component_observations(self, structure_obs_sheet, subindex_name, component_short_name,
                                         component_scaled_column, sheet_year):
        self._log.debug("\t\tRetrieving component %s from subindex %s observations in sheet %s..." % (
            component_short_name, subindex_name, structure_obs_sheet.name))
        empty_row_error_cache = {}
        year_column = get_column_number(
            self._config_get("STRUCTURE_OBSERVATIONS", "OBSERVATION_YEAR_COLUMN", sheet_year))
        iso3_column = get_column_number(
            self._config_get("STRUCTURE_OBSERVATIONS", "OBSERVATION_ISO3_COLUMN", sheet_year))
        observation_start_row = self._config_getint("STRUCTURE_OBSERVATIONS", "OBSERVATION_START_ROW", sheet_year)
        check_column = get_column_number(
            self._config_get("STRUCTURE_OBSERVATIONS", "OBSERVATION_CHECK_COLUMN", sheet_year))

        aliased_short_name = self._get_aliased_component(component_short_name, sheet_year)

        if aliased_short_name:
            self._log.info("Using alias %s for COMPONENT %s while parsing %s [%s]" % (
                aliased_short_name, component_short_name, structure_obs_sheet.name, colname(component_scaled_column)))
            short_name = aliased_short_name
        else:
            short_name = component_short_name

        # Set up sorted list to simplify ranking (components are not ranked in the spreadsheet)
        sorted_observations = SortedListWithKey(
            key=lambda x: x[0].value if x[0].value is not None and na_to_none(x[0].value) is not None else 0)

        try:
            indicator = self._indicator_repo.find_component_by_short_name(short_name, subindex_name)
            for row_number in range(observation_start_row, structure_obs_sheet.nrows):  # Per country
                if not structure_obs_sheet.cell(row_number, check_column).value or row_number in empty_row_error_cache:
                    if row_number not in empty_row_error_cache:
                        self._log.debug(
                            "Skipping row while parsing %s[%s] (did not detect value on check column, additional errors regarding this row will be omitted)" % (
                                structure_obs_sheet.name, row_number))
                    empty_row_error_cache[row_number] = True
                    continue
                try:
                    year = int(structure_obs_sheet.cell(row_number, year_column).value)
                    iso3 = structure_obs_sheet.cell(row_number, iso3_column).value
                    area = self._area_repo.find_by_iso3(iso3)
                    value = structure_obs_sheet.cell(row_number, component_scaled_column).value
                    excel_observation = ExcelObservation(iso3=iso3, indicator_code=indicator.indicator, year=year,
                                                         value=value)
                    if [t for t in sorted_observations if
                        t[0].year == year and t[1].iso3 == iso3 and t[2].indicator == indicator.indicator]:
                        self._log.warn("Ignoring duplicate observations for COMPONENT %s while parsing %s [%s]" % (
                            indicator.indicator, structure_obs_sheet.name,
                            colname(component_scaled_column)))
                        # Will not continue parsing, we could check this also at the beginning if we extract the
                        # year from the sheet name
                        return
                    else:
                        sorted_observations.add((excel_observation, area, indicator))
                except AreaRepositoryError:
                    self._log.error("No area with code %s for indicator %s while parsing %s" % (
                        iso3, indicator.indicator, structure_obs_sheet.name))
                except:
                    self._log.error("Unexpected error parsing %s[%s]" % (structure_obs_sheet.name, row_number))
        except IndicatorRepositoryError:
            self._log.error(
                "No COMPONENT '%s' indicator found while parsing %s [%s]" % (
                    short_name, structure_obs_sheet.name, colname(component_scaled_column)))

        # Rank them based on their scaled score
        self._update_observation_ranking(sorted_observations, observation_getter=lambda x: x[0])
        self._excel_structure_observations.extend(sorted_observations)