Example #1
0
    def _add_feff_to_file(self,file,crystal_structure):
        file.write('DEBYE {0} {1}  \n\n'.format(self.optical_spectrum_options['temperature'],self.optical_spectrum_options['debye temperature']))

        scattering_atom = int(self.optical_spectrum_options['atom'])-1
        sphere_radius = float(self.optical_spectrum_options['sphere radius'])

        single_cell_coord = crystal_structure.calc_absolute_coordinates()
        Z_scattering = int( single_cell_coord[scattering_atom,3] )

        atoms = self._find_atoms_within_sphere(crystal_structure,sphere_radius,scattering_atom)

        species = SortedSet(atoms[:,3].astype('int'))

        file.write('POTENTIALS\n')
        file.write('  0 {}\n'.format(Z_scattering))
        for i,specie in enumerate(species):
            file.write('  {0} {1}\n'.format(i+1,specie))

        file.write('\nATOMS\n')
        for atom in atoms:
            coords = atom[:3]
            Z = int(atom[3])
            if np.linalg.norm(coords-single_cell_coord[scattering_atom,:3]) <1e-6:
                potential_number = 0
            else:
                potential_number = species.index(Z)+1

            in_list = [coords[0]*sst.bohr,coords[1]*sst.bohr,coords[2]*sst.bohr,potential_number]
            file.write('  {0:1.10f} {1:1.10f} {2:1.10f} {3}\n'.format(*in_list))
Example #2
0
class SortedSetKey:
    def __init__(self):
        self.dict = dict()
        self.sorted_set = SortedSet(key=self.get_key)

    def __getitem__(self, item):
        return self.sorted_set[item]

    def __len__(self):
        return len(self.sorted_set)

    def __str__(self):
        return str(self.sorted_set)

    def get_key(self, value):
        return self.dict[value]

    def get_reversed_list(self, index, count):
        return self[-1 - index:-1 - index - count:-1]

    def values(self):
        for value in self.sorted_set:
            yield value

    def clear(self):
        self.sorted_set.clear()
        self.dict.clear()

    def destroy(self):
        self.sorted_set = None

    def index(self, value):
        return self.sorted_set.index(value)

    def pop(self, index=-1):
        return self.sorted_set.pop(index)

    def add(self, value, rank):
        if value in self.sorted_set:
            self.sorted_set.remove(value)
        self.dict[value] = rank
        self.sorted_set.add(value)

    def remove(self, value):
        self.sorted_set.remove(value)
        del self.dict[value]

    def update(self, value_list, rank_list):
        self.sorted_set.difference_update(value_list)
        for i, value in enumerate(value_list):
            self.dict[value] = rank_list[i]
        self.sorted_set.update(value_list)
def test_index():
    temp = SortedSet(range(100), load=7)
    assert all(temp.index(val) == val for val in range(100))
Example #4
0
class ShotSelector(pg.LayoutWidget):

    valueChanged = pyqtSignal()
    selectionChanged = pyqtSignal()

    def __init__(self, **kwargs):

        super().__init__(**kwargs)

        self.nshots = 1

        self.setFixedHeight(100)

        self.current_idx_le = QLineEdit(self)

        self.current_idx_le.setMaximumWidth(30)
        self.current_idx_le.setValidator(QtGui.QIntValidator())

        self.current_idx_le.setText(str(-1))

        self.addWidget(self.current_idx_le)

        self.slider = QSlider(self)

        self.slider.setPageStep(1)
        self.slider.setOrientation(Qt.Horizontal)

        self.addWidget(self.slider, colspan=2)

        self.nextRow()

        self.addWidget(QLabel('index selector'))

        self.idx_select_le = QLineEdit(self)
        self.idx_select_le.setText(':')
        self.addWidget(self.idx_select_le)

        self.warning = QLabel()
        self.warning.setMargin(5)
        self.update_warning()
        self.addWidget(self.warning)

        self.update_nshots(self.nshots)

        self.idx_select_le.editingFinished.connect(self.update_selection)
        self.current_idx_le.editingFinished.connect(self.setSliderValue)
        self.slider.valueChanged.connect(self.setLabelValue)

    def update_nshots(self, nshots):
        self.nshots = nshots
        self.idx = np.arange(self.nshots)
        self.slider.setRange(0, self.nshots - 1)

        self.update_selection()
        self.setSliderValue()

    def update_warning(self, warning=''):
        if warning == '':
            self.warning.setStyleSheet("background-color: lightgreen")
            warning = 'all good'
        else:
            self.warning.setStyleSheet("background-color: red")

        self.warning.setText(warning)

    def update_selection(self):
        self.update_warning()

        slice_text = self.idx_select_le.text()
        slices = slice_text.split(',')

        self.idx_selected = SortedSet([])
        for s in slices:
            try:
                scope = locals()

                select = eval('self.idx[' + s + ']', scope)

                if isinstance(select, np.ndarray):
                    for idx in select:
                        self.idx_selected.add(idx)
                else:
                    self.idx_selected.add(select)

            except:
                self.update_warning('problem in selected indeces')
                return 0

        self.slider.setRange(0, len(self.idx_selected) - 1)

        if int(self.current_idx_le.text()
               ) % self.nshots not in self.idx_selected:
            self.current_idx_le.setText(self.idx_selected[-1])

            self.update_warning(
                'last index not in selection <br> -> setting last selected')

        self.selectionChanged.emit()

    def setLabelValue(self, value):
        newval = self.idx_selected[value]

        if newval != self.get_current_index():
            self.current_idx_le.setText(str(newval))

            self.valueChanged.emit()

    def setSliderValue(self):
        self.update_warning()

        value = int(self.current_idx_le.text())

        try:
            value_sl = self.idx_selected.index(value % len(self.idx))
            self.slider.setValue(value_sl)
        except ValueError:
            self.update_warning('set index not in selection <br> ignore')

    def get_current_index(self):
        return int(self.current_idx_le.text()) % self.nshots

    def get_selected_indices(self):
        return (np.array(self.idx_selected), )
Example #5
0
def test_index():
    temp = SortedSet(range(100))
    temp._reset(7)
    assert all(temp.index(val) == val for val in range(100))
Example #6
0
class Selection(IMutableGSlice):
    def __init__(
            self,
            universe: slice,
            revealed: list = None,
            intervals: Iterator = None,
            _length: Optional[int] = None  # For performance
    ):
        #assert isinstance(universe, slice)  # Should universe even be visible/exist?
        #assert universe.start == 0
        #assert isinstance(universe.stop, int)
        #assert universe.stop >= 1  # TODO Do we need this?
        self.universe = universe
        if intervals is None and revealed is None:
            self._intervals = self.revealed2sortedset([slice(0, universe.stop)])
        elif intervals is not None:
            self._intervals = SortedSet(intervals)
        else:
            self._intervals = self.revealed2sortedset(revealed)
        self._revealed_count = _length if isinstance(_length, int) else Selection._compute_len(self._intervals)

    @staticmethod
    def revealed2sortedset(revealed: List[Union[tuple, slice]]) -> SortedSet:
        """ Converts a list of included pairs to a sorted set of integers in O(n), n = size of @slices.
        Every number from every slice is added to the sorted set, except 0.
        """
        # 10, [] -> 10, []
        # 10, [(0, 10)] -> 10, [10]
        # 10, [(0, 7)] -> 10, [7]
        # 10, [(7, 10)] -> 10, [7, 10]
        # 10, [(3, 7)] -> 10, [3, 7]
        # 10, [(0, 3), (7, 10)] -> 10, [3, 7, 10]
        # 10, [(0, 1), (2, 3), (4, 5), (6, 7), (8, 9)] -> 10, [1, 2, 3, 4, 5, 6, 7, 8, 9]

        try:
            #intervals = SortedSet(a for a, _ in revealed).union(b for _, b in revealed)
            intervals = SortedSet()
            for a, b in revealed:
                intervals.add(a)
                intervals.add(b)
        except TypeError:  # slice
            intervals = SortedSet(sl.start for sl in revealed).union(sl.stop for sl in revealed)
        if 0 in intervals:
            intervals.remove(0)
        return intervals

    @staticmethod
    def sortedset2slices(sortedset: SortedSet) -> List[slice]:
        """ Converts a sorted set of integers to a list of included slices in O(n), n = size of @sortedset.
        If there is an even number of elements in @sortedset, the first slice is formed by the first and second
        numbers, the second slice is formed by the third and fourth numbers, and so on.
        If there is an odd number of elements in @sortedset, the pair consisting of the number 0 and the first element
        in @sortedset becomes the first slice in the output list. The remaining slices, if any, are formed by the
        second and third numbers, the fourth and fifth numbers, and so on.
        """
        slices = []
        if len(sortedset) % 2 == 0:
            for i in range(0, len(sortedset), 2):
                slices.append(slice(sortedset[i], sortedset[i + 1]))
        else:
            slices.append(slice(0, sortedset[0]))
            for i in range(1, len(sortedset), 2):
                slices.append(slice(sortedset[i], sortedset[i + 1]))
        return slices

    def slices(self) -> List[slice]:
        return self.sortedset2slices(self._intervals)

    def pairs(self) -> Iterator[Tuple[int, int]]:
        if len(self._intervals) % 2 == 0:
            return zip(self._intervals[::2], self._intervals[1::2])
        return itertools.chain([(0, self._intervals[0])], zip(self._intervals[1::2], self._intervals[2::2]))

    def gap_pairs(self) -> Iterator[Tuple[int, int]]:
        return self.complement().pairs()

    def intervals(self):
        return self._intervals

    def exclude(self, from_index: Optional[int], to_index: Optional[int]):
        original_length = self._revealed_count
        if isinstance(from_index, int) and -self.universe.stop <= from_index < 0:
            from_index = from_index % self.universe.stop
        if isinstance(to_index, int):
            if to_index > self.universe.stop:
                return self.exclude(from_index, None)
            if -self.universe.stop <= to_index < 0:
                to_index = to_index % self.universe.stop
        assert from_index is None or self.universe.start <= from_index <= self.universe.stop
        assert to_index is None or self.universe.start <= to_index <= self.universe.stop
        if from_index is None:
            from_index = self.universe.start
        if to_index is None:
            to_index = self.universe.stop
        if len(self._intervals) == 0:
            return 0
        if from_index >= to_index:
            return 0

        m = self._intervals.bisect_right(from_index)
        n = self._intervals.bisect_right(to_index)

        try:
            from_index_index = self._intervals.index(from_index)
        except ValueError:
            from_index_index = None
        try:
            to_index_index = self._intervals.index(to_index)
        except ValueError:
            to_index_index = None
        from_index_is_included = (
            len(self._intervals) % 2 == 0 and m % 2 == 1 or len(self._intervals) % 2 == 1 and m % 2 == 0)
        to_index_is_included = (
            len(self._intervals) % 2 == 0 and n % 2 == 1 or len(self._intervals) % 2 == 1 and n % 2 == 0)
        from_index_is_leftmost_included = from_index == 0 and from_index_is_included or from_index_index is not None and (
                len(self._intervals) % 2 == 0 and from_index_index % 2 == 0
                or len(self._intervals) % 2 == 1 and (from_index == 0 or from_index_index % 2 == 1))
        to_index_right_of_excluded = to_index_index is not None and (
                len(self._intervals) % 2 == 0 and to_index_index % 2 == 1
                or len(self._intervals) % 2 == 1 and (to_index == 0 or to_index_index % 2 == 0))

        if from_index_is_included:
            if from_index_is_leftmost_included:
                if to_index_is_included:
                    if m == 0:
                        to_remove = self._intervals[m:n]
                        endpoint = 0 if n == 0 else self._intervals[n - 1]
                        addendum = 0 if n == 0 else self._intervals[0]
                        self._revealed_count -= (to_index - endpoint) + addendum + sum(
                            b - a for a, b in zip(to_remove[1:-1:2], to_remove[2:-1:2]))
                        del self._intervals[m:n]
                        self._intervals.add(to_index)
                    else:
                        intermediates = self._intervals[m + 1:n - 1]
                        from_start, from_end = self._intervals[m - 1], self._intervals[m]
                        to_start, to_end = self._intervals[n - 1], self._intervals[n]
                        if m == n:
                            self._revealed_count -= to_index - from_start
                            self._intervals.remove(from_start)
                            self._intervals.add(to_index)
                        else:
                            self._revealed_count -= (from_end - from_start) + (to_index - self._intervals[n - 1]) + (
                                from_index - from_start) + sum(
                                b - a for a, b in zip(intermediates[::2], intermediates[1::2]))
                            del self._intervals[m + 1:n - 1]  # intermediates
                            self._intervals.remove(from_start)
                            self._intervals.remove(from_end)
                            self._intervals.remove(to_start)
                            self._intervals.add(to_index)
                else:
                    from_start = 0 if m == 0 else self._intervals[m - 1]
                    from_end = self._intervals[m]
                    self._revealed_count -= from_end - from_start
                    if from_start > 0:
                        self._intervals.remove(from_start)
                    self._intervals.remove(from_end)
            else:
                if to_index_is_included:
                    from_end = self._intervals[m]
                    to_start = self._intervals[n - 1]
                    if m == n:
                        self._revealed_count -= to_index - from_index
                        if from_index > 0:
                            self._intervals.add(from_index)
                        self._intervals.add(to_index)
                    else:
                        intermediates = self._intervals[m + 1:n - 1]
                        self._revealed_count -= (from_end - from_index) + (to_index - to_start) + sum(
                            b - a for a, b in zip(intermediates[::2], intermediates[1::2]))
                        del self._intervals[m + 1:n - 1]  # intermediates
                        if from_index > 0:
                            self._intervals.add(from_index)
                        self._intervals.add(to_index)
                        self._intervals.remove(from_end)
                        self._intervals.remove(to_start)
                else:
                    to_remove = self._intervals[m:n]
                    self._revealed_count -= self._intervals[m] - from_index + sum(b - a for a, b in zip(to_remove[1::2], to_remove[::2]))
                    del self._intervals[m:n]
                    if from_index != 0:
                        self._intervals.add(from_index)
        else:
            if to_index_is_included:
                if to_index_right_of_excluded:
                    to_remove = self._intervals[m:n - 1]
                    del self._intervals[m:n - 1]
                    self._revealed_count -= sum(b - a for a, b in zip(to_remove[::2], to_remove[1::2]))
                else:
                    to_remove = self._intervals[m:n]
                    del self._intervals[m:n]
                    self._intervals.add(to_index)
                    self._revealed_count -= (to_index - to_remove[0]) + sum(b - a for a, b in zip(to_remove[1::2], to_remove[::2]))
            else:
                to_remove = self._intervals[m:n]
                del self._intervals[m:n]
                self._revealed_count -= sum(b - a for a, b in zip(to_remove[::2], to_remove[1::2]))

        return original_length - self._revealed_count

    def exclude_virtual(self, from_index: Optional[int], to_index: Optional[int]):
        if from_index is None or from_index < -len(self) or from_index >= len(self):
            p_from_index = None
        else:
            p_from_index = self.virtual2physical(from_index)
        if to_index is None or to_index < -len(self) or to_index >= len(self):
            p_to_index = None
        else:
            p_to_index = self.virtual2physical(to_index)
        return self.exclude(p_from_index, p_to_index)

    def include(self, from_index: Optional[int], to_index: Optional[int]):
        original_length = len(self)
        if isinstance(from_index, int) and -self.universe.stop <= from_index < 0:
            from_index = from_index % self.universe.stop
        if isinstance(to_index, int):
            if to_index > self.universe.stop:
                return self.include(from_index, None)
            if -self.universe.stop <= to_index < 0:
                to_index = to_index % self.universe.stop
        assert from_index is None or self.universe.start <= from_index <= self.universe.stop
        assert to_index is None or self.universe.start <= to_index <= self.universe.stop
        if from_index is None:
            from_index = self.universe.start
        if to_index is None:
            to_index = self.universe.stop
        if not self._intervals:
            if from_index > 0:
                self._intervals.add(from_index)
            self._intervals.add(to_index)
            self._revealed_count += to_index - from_index
            return to_index - from_index
        if from_index == to_index:
            return 0

        m = self._intervals.bisect_right(from_index)
        n = self._intervals.bisect_right(to_index)

        try:
            from_index_index = self._intervals.index(from_index)
        except ValueError:
            from_index_index = None

        from_index_is_included = (
                len(self._intervals) % 2 == 0 and m % 2 == 1 or len(self._intervals) % 2 == 1 and m % 2 == 0)
        to_index_is_included = (
                len(self._intervals) % 2 == 0 and n % 2 == 1 or len(self._intervals) % 2 == 1 and n % 2 == 0)
        from_index_right_of_included = from_index_index is not None and (
                len(self._intervals) % 2 == 0 and from_index_index % 2 == 1
                or len(self._intervals) % 2 == 1 and from_index_index % 2 == 0)

        if from_index_is_included:
            if to_index_is_included:
                to_remove = self._intervals[m:n]
                del self._intervals[m:n]
                self._revealed_count += sum(b - a for a, b in zip(to_remove[::2], to_remove[1::2]))
            else:
                to_remove = self._intervals[m:n]
                del self._intervals[m:n]
                self._intervals.add(to_index)
                self._revealed_count += (to_index - to_remove[-1]) + sum(b - a for a, b in zip(to_remove[1::2], to_remove[::2]))
        else:
            if to_index_is_included:
                if from_index_right_of_included:
                    to_remove = self._intervals[m - 1:n]
                    del self._intervals[m - 1:n]
                    self._revealed_count += sum(b - a for a, b in zip(to_remove[::2], to_remove[1::2]))
                else:
                    to_remove = self._intervals[m:n]
                    del self._intervals[m:n]
                    self._intervals.add(from_index)
                    self._revealed_count += (to_remove[0] - from_index) + sum(b - a for a, b in zip(to_remove[1::2], to_remove[::2]))
            else:
                if from_index_right_of_included:
                    intermediates = self._intervals[m:n]
                    del self._intervals[m:n]  # intermediates
                    self._intervals.remove(from_index)
                    self._intervals.add(to_index)
                    self._revealed_count += (to_index - from_index) - sum(b - a for a, b in zip(intermediates[::2], intermediates[1::2]))
                else:
                    to_remove = self._intervals[m:n]
                    del self._intervals[m:n]
                    if from_index > 0:
                        self._intervals.add(from_index)
                    self._intervals.add(to_index)
                    self._revealed_count += (to_index - from_index) - sum(b - a for a, b in zip(to_remove[::2], to_remove[1::2]))

        return len(self) - original_length

    def include_partially(self, from_index: Optional[int], to_index: Optional[int], count: Union[int, tuple]):
        if isinstance(count, int):
            return self.include_partially(from_index, to_index, (count, count))
        head_count, tail_count = count
        head_revealed_count = self._include_partially_from_left(from_index, to_index, head_count)
        tail_revealed_count = self._include_partially_from_right(from_index, to_index, tail_count)
        return head_revealed_count + tail_revealed_count

    def _include_partially_from_left(self, from_index: int, to_index: int, count: int):
        if count == 0:
            return 0
        from_index, to_index = self._normalized_range(from_index, to_index)
        subsel = self._spanning_subslice(from_index, to_index).complement().subslice(from_index, to_index)

        revealed_count = 0
        for covered_start, covered_stop in subsel.pairs():
            coverage = covered_stop - covered_start
            if revealed_count + coverage < count:
                self.include(covered_start, covered_stop)
                revealed_count += coverage
            else:
                self.include(covered_start, covered_start + count - revealed_count)
                revealed_count = count
                break
        return revealed_count

    def _include_partially_from_right(self, from_index: int, to_index: int, count: int):
        if count == 0:
            return 0
        from_index, to_index = self._normalized_range(from_index, to_index)
        subsel = self._spanning_subslice(from_index, to_index).complement().subslice(from_index, to_index)

        revealed_count = 0
        for covered_start, covered_stop in reversed(list(subsel.pairs())):
            coverage = covered_stop - covered_start
            if revealed_count + coverage < count:
                self.include(covered_start, covered_stop)
                revealed_count += coverage
            else:
                self.include(covered_stop - (count - revealed_count), covered_stop)
                revealed_count = count
                break
        return revealed_count

    def include_expand(self, from_index: Optional[int], to_index: Optional[int], count: Union[int, Tuple[int, int]]):
        if isinstance(count, int):
            return self.include_expand(from_index, to_index, (count, count))
        if count == (0, 0):
            return 0
        head_count, tail_count = count
        revealed_counter = 0
        gaps = self.complement().subslice(from_index, to_index)
        for a, b in gaps.pairs():
            if b < self.universe.stop:
                revealed_counter += self._include_partially_from_right(a, b, head_count)
            if a > self.universe.start:
                revealed_counter += self._include_partially_from_left(a, b, tail_count)
        return revealed_counter

    def _previous_slice(self, sl: slice):
        """ :return The revealed or covered slice immediately to the left of @sl.
        :raise ValueError if there is none. """
        if sl.start == self.universe.start:
            raise ValueError("There is no slice to the left of {}.".format(sl))
        # TODO O(n) -> O(1)
        zero_or_one = [s for s in self._intervals + self.complement()._intervals if s.stop == sl.start]
        if len(zero_or_one) == 1:
            return zero_or_one[0]
        else:
            raise ValueError("Slice not found: {}.".format(sl))

    def _next_slice(self, sl: slice):
        """ :return The revealed or covered slice immediately to the right of @sl.
        :raise ValueError if there is none. """
        if sl.stop == self.universe.stop:
            raise ValueError("There is no slice to the right of {}.".format(sl))
        # TODO O(n)
        zero_or_one = [s for s in self._intervals + self.complement()._intervals if s.start == sl.stop]
        if len(zero_or_one) == 1:
            return zero_or_one[0]
        else:
            raise ValueError("Slice not found: {}.".format(sl))

    def include_virtual(self, from_index, to_index):
        if from_index is None or from_index < -len(self) or from_index >= len(self):
            p_from_index = None
        else:
            p_from_index = self.virtual2physical(from_index)
        if to_index is None or to_index < -len(self) or to_index >= len(self):
            p_to_index = None
        else:
            p_to_index = self.virtual2physical(to_index)
        return self.include(p_from_index, p_to_index)

    def include_partially_virtual(self, from_index: Optional[int], to_index: Optional[int], count: Union[int, tuple]):
        if from_index is None or from_index < -len(self) or from_index >= len(self):
            p_from_index = None
        else:
            p_from_index = self.virtual2physical(from_index)
        if to_index is None or to_index < -len(self) or to_index >= len(self):
            p_to_index = None
        else:
            p_to_index = self.virtual2physical(to_index)
        return self.include_partially(p_from_index, p_to_index, count)

    # FIXME Inconsistent with reversed(selection). Should probably make this use the default implementation and instead
    # rewrite this one to iter_slices or something.
    def __iter__(self):
        for a, b in self.pairs():
            yield a, b  # FIXME should probably generate slices instead, or every index

    def complement(self):
        if len(self._intervals) >= 1 and self._intervals[-1] == self.universe.stop:
            return Selection(universe=self.universe, intervals=self._intervals[:-1],
                             _length=self.universe.stop - len(self))
        return Selection(universe=self.universe, intervals=self._intervals.union([self.universe.stop]),
                         _length=self.universe.stop - len(self))

    def _normalized_range(self, from_index: Optional[int], to_index: Optional[int]) -> Tuple[int, int]:
        """ For any range [@from_index, @to_index) where the indices are either None or any integer, returns the
        equivalent range [x, y) such that either 0 <= x < y <= upper_bound or x = y = 0. The ranges are equivalent in
        the sense that when using them to slice this selection, they produce the same sub-selection. """
        if from_index is None or from_index <= -self.universe.stop:
            from_index = self.universe.start
        elif from_index > self.universe.stop:
            from_index = self.universe.stop
        elif -self.universe.stop <= from_index < 0:
            from_index = self.universe.stop - from_index

        if to_index is None or to_index >= self.universe.stop:
            to_index = self.universe.stop
        elif -self.universe.stop <= to_index < 0:
            to_index = self.universe.stop - to_index
        elif to_index < -self.universe.stop:
            to_index = self.universe.start

        if from_index >= to_index:
            from_index, to_index = (0, 0)
        return from_index, to_index

    def subslice(self, from_index: Optional[int], to_index: Optional[int]):
        from_index, to_index = self._normalized_range(from_index, to_index)
        sel = self._spanning_subslice(from_index, to_index)
        if len(sel._intervals) % 2 == 0:
            if len(sel) > 0:
                if sel._intervals[0] < from_index < sel._intervals[1]:
                    sel._revealed_count -= from_index - sel._intervals[0]
                    del sel._intervals[0]
                    sel._intervals.add(from_index)
                if sel._intervals[-2] < to_index < sel._intervals[-1]:
                    sel._revealed_count -= sel._intervals[-1] - to_index
                    del sel._intervals[-1]
                    sel._intervals.add(to_index)
        else:
            if 0 < from_index < sel._intervals[0]:
                sel._revealed_count -= from_index
                sel._intervals.add(from_index)
            if (len(sel._intervals) == 1 and to_index < sel._intervals[-1]
                    or len(sel._intervals) >= 2 and sel._intervals[-2] < to_index < sel._intervals[-1]):
                sel._revealed_count -= sel._intervals[-1] - to_index
                del sel._intervals[-1]
                sel._intervals.add(to_index)
        return sel

    def _spanning_subslice(self, from_index: int, to_index: int):
        """ :return A Selection whose set of revealed slices is a subset of that of this Selection such that every index
        in [from_index, to_index) is either on some slice in the subset, or on a gap. """
        if from_index >= to_index:
            return Selection(universe=deepcopy(self.universe), intervals=[])
        m = self._intervals.bisect_right(from_index)
        if len(self._intervals) % 2 == 0:
            n = self._intervals.bisect_left(to_index)
            intervals = self._intervals[m - (m % 2):n + (n % 2)]
        else:
            n = self._intervals.bisect_right(to_index)
            a = max(0, m - ((m + 1) % 2))
            b = n + ((n + 1) % 2)
            intervals = self._intervals[a:b]
        sel = Selection(universe=deepcopy(self.universe), intervals=intervals)
        return sel

    def _slow_subslice(self, from_index: Optional[int], to_index: Optional[int]):
        sel = self.deepcopy()
        if isinstance(from_index, int):
            sel.exclude(None, from_index)
        if isinstance(to_index, int):
            sel.exclude(to_index, None)
        return sel

    def _interval_index(self, pindex):
        """ :return n if the nth interval edge is the smallest number such that @pindex < n (zero-indexed). """
        lower = 0
        upper = len(self._intervals) - 1
        while lower <= upper:
            middle = (lower + upper) // 2
            midsl = self._intervals[middle]
            if pindex < midsl.start:
                upper = middle - 1
            elif midsl.stop <= pindex:
                lower = middle + 1
            else:  # midsl.start <= pindex < midsl.stop:
                return middle
        raise IndexError("{} is not in any interval.".format(pindex))

    def select(self, listlike):
        # TODO only works for stringlike objects
        lst = []
        for interval in self.slices():
            lst.append(listlike[interval])
        selection = listlike[0:0].join(lst)
        return selection

    def physical2virtual(self, pindex: int):
        vindex = 0
        for a, b in self.pairs():
            if a <= pindex < b:
                vindex += pindex - a
                return vindex
            vindex += b - a
        raise IndexError("Physical index {} out of bounds for selection {}".format(pindex, self))

    # TODO: O(n) -> O(log(n)) (using another sorted set for cumulative lengths?)
    def virtual2physical(self, vindex: int):  # TODO -> virtualint2physical
        """ :return the integer n such that where the @vindex'th revealed element is the nth element. If
        @vindex < 0, @vindex is interpreted as (number of revealed elements) + @vindex.
        """
        if vindex < -len(self):
            raise IndexError(
                "Got index {}, expected it to be within range [{},{})".format(vindex, -len(self), len(self)))
        elif vindex < 0:
            return self.virtual2physical(len(self) + vindex)
        cumlength = 0
        for a, b in self.pairs():
            cumlength += b - a
            if vindex < cumlength:
                pindex = b - (cumlength - vindex)
                if a <= pindex < b:
                    return pindex
                else:
                    break
        raise IndexError("Virtual index {} out of bounds for selection {}".format(vindex, self))

    def virtual2physicalselection(self, vslice: slice) -> 'Selection':  # TODO -> virtualslice2physical
        """ :return the sub-Selection that is the intersection of this selection and @vslice. """
        if not self._intervals or vslice.stop == 0:
            return Selection(self.universe, revealed=[])
        if vslice.start is None:
            a = self.virtual2physical(0)
        elif -len(self) <= vslice.start < len(self):
            a = self.virtual2physical(vslice.start)
        elif vslice.start >= len(self):
            a = self._intervals[-1]
        else:
            raise ValueError("Unexpected slice start: {}".format(vslice))
        if vslice.stop is None or vslice.stop >= len(self):
            b = self._intervals[-1] - 1
        elif -len(self) <= vslice.stop < len(self):
            b = self.virtual2physical(vslice.stop - 1)
        else:
            raise ValueError("Unexpected slice stop: {}".format(vslice))
        # INV: a is the physical index of the first element, b is the physical index of the last element
        if b < a:
            return Selection(universe=self.universe, revealed=[])
        m = self._intervals.bisect_right(a)
        n = self._intervals.bisect_right(b)
        intervals = SortedSet([a] + self._intervals[m:n] + [b + 1])
        return Selection(universe=self.universe, intervals=intervals)

    def virtualselection2physical(self, vselection: 'Selection'):  # TODO -> virtualslice2physical
        """ :return the sub-Selection that is the intersection of this selection and @vselection. """
        intervals = []
        for start, stop in vselection:
            for a, b in self.virtual2physicalselection(slice(start, stop)):
                intervals.append(slice(a, b))
        return Selection(universe=self.universe, revealed=intervals)

    def stretched(self, from_index: Optional[int], to_index: Optional[int]):  # TODO remove?
        """ :return A potentially shrinked deep copy of this selection, delimited by the universe
        [@from_index, @to_index). """
        m = self._intervals.bisect_right(from_index)
        n = self._intervals.bisect_right(to_index)
        intervals = self._intervals[m:n]
        return Selection(universe=slice(from_index, to_index), intervals=intervals)

    def __getitem__(self, item):
        return self.virtual2physical(item)

    @staticmethod
    def _compute_len(sortedset: SortedSet):
        """ :return The sum of the lengths of every slice in @slicelist. """
        if len(sortedset) == 0:
            return 0
        elif len(sortedset) % 2 == 0:
            return sum(sortedset[i + 1] - sortedset[i] for i in range(0, len(sortedset), 2))
        return sortedset[0] + sum(sortedset[i + 1] - sortedset[i] for i in range(1, len(sortedset), 2))

    def __len__(self):
        return self._revealed_count

    def __eq__(self, other):
        return repr(self) == repr(other)

    def __mul__(self, other: int):
        if other == 0:
            return Selection(universe=slice(0, 0), revealed=[])
        scaled_universe = slice(self.universe.start * other, self.universe.stop * other)
        scaled_revealed = [other * x for x in self._intervals]
        return Selection(universe=scaled_universe, intervals=scaled_revealed)

    def __rmul__(self, other):
        return self.__mul__(other)

    def __repr__(self):
        return "{}(universe={}, intervals={})".format(self.__class__.__name__, self.universe, self._intervals)

    def __str__(self):
        return repr(self)

    def deepcopy(self):
        """ :return A deep copy of this object. """
        return Selection(universe=deepcopy(self.universe), intervals=deepcopy(self._intervals))
Example #7
0
def preprocess_articles(langs,
                        date_start=None,
                        date_end=None,
                        pca_dim=300,
                        disallow_repeats=False):
    global DONE_PROCESSING

    extracted_path = os.path.join(
        REUTERS_DIRECTORY, 'preprocessed/%s--%s.pkl' %
        (date_start.strftime("%m%d%Y"), date_end.strftime("%m%d%Y")))
    if os.path.exists(extracted_path):
        with open(extracted_path, 'rb') as f:
            articles = pickle.load(f)
    else:
        warnings.filterwarnings(
            "ignore",
            message=
            "Due to multiword token expansion or an alignment issue, the original text has been replaced by space-separated expanded tokens."
        )
        warnings.filterwarnings(
            "ignore",
            message=
            "Can't set named entities because of multi-word token expansion or because the character offsets don't map to valid tokens produced by the Stanza tokenizer:"
        )

        articles = []
        q = queue.Queue()

        for thread_num in range(NUM_WORKERS):
            threading.Thread(target=preprocess_articles_worker,
                             args=(q, articles)).start()

        for article_num, article in enumerate(
                get_articles(langs, date_start, date_end)):
            q.put((article_num, article))

        q.join()
        DONE_PROCESSING = True

        with open(extracted_path, 'wb') as f:
            pickle.dump(articles, f)

        print("Processed %s articles!" % len(articles))

    noun_and_verb_vocabulary_path = os.path.join(
        REUTERS_DIRECTORY, 'preprocessed/%s--%s-nouns-and-verbs.pkl' %
        (date_start.strftime("%m%d%Y"), date_end.strftime("%m%d%Y")))
    noun_and_verb_embeddings_path = os.path.join(
        REUTERS_DIRECTORY, 'preprocessed/%s--%s-noun-and-verb-embeddings.npy' %
        (date_start.strftime("%m%d%Y"), date_end.strftime("%m%d%Y")))
    article_nouns_and_verbs_path = os.path.join(
        REUTERS_DIRECTORY, 'preprocessed/%s--%s-article-nouns-and-verbs.pkl' %
        (date_start.strftime("%m%d%Y"), date_end.strftime("%m%d%Y")))

    if not all(
            map(os.path.exists, [
                noun_and_verb_vocabulary_path, noun_and_verb_embeddings_path,
                article_nouns_and_verbs_path
            ])):
        lang_counts = Counter(map(lambda article: article.lang, articles))

        print("%s articles!" % (len(articles)))
        print(lang_counts)

        ###
        # NOUNS AND VERBS
        ###

        # now, for our nouns and verbs, we define a vocabulary.
        # keep all nouns and verbs that:
        # 	1) are in less than 30% of documents (ie, contain informative signal)
        #	2) aren't in our (rather aggressive) list of stopwords for each language (ie, contain information signal)
        #	3) are in the set of word embeddings published by MUSE

        noun_and_verb_counts = defaultdict(Counter)
        for article in articles:
            for noun_or_verb in article.nouns_and_verbs:
                if noun_or_verb != noun_or_verb.strip():
                    print(noun_or_verb)
                noun_and_verb_counts[article.lang][noun_or_verb] += 1

        print("Unfiltered noun/verb vocab size")
        print({
            lang: len(noun_and_verb_counts[lang])
            for lang in noun_and_verb_counts.keys()
        })

        noun_and_verbs_by_lang = defaultdict(set)
        noun_and_verb_vocabulary = SortedSet()
        for lang in noun_and_verb_counts:
            for noun_or_verb, count in noun_and_verb_counts[lang].items():
                if count > 0.3 * lang_counts[lang]:
                    continue

                if noun_or_verb in STOP_WORDS[lang]:
                    continue

                if noun_or_verb not in EMBEDDINGS[lang]:
                    continue

                noun_and_verb_vocabulary.add((lang, noun_or_verb))
                noun_and_verbs_by_lang[lang].add(noun_or_verb)

            print("Filtered noun/verb vocab size for %s=%s" %
                  (lang, len(noun_and_verbs_by_lang[lang])))

        noun_and_verb_embeddings = np.array([
            EMBEDDINGS[lang][noun_or_verb]
            for lang, noun_or_verb in noun_and_verb_vocabulary
        ])

        # optionally reduce dimensionality (don't need to hold onto PCA matrix)
        if pca_dim < noun_and_verb_embeddings.shape[1]:
            print("Reducing embedding dimensionality from %s to %s" %
                  (noun_and_verb_embeddings.shape[1], pca_dim))
            noun_and_verb_embeddings = PCA(pca_dim).fit_transform(
                noun_and_verb_embeddings)

        article_nouns_and_verbs = []
        for article_id, article in enumerate(articles):
            article_nouns_and_verbs.append([])
            for noun_or_verb, count in article.nouns_and_verbs.items():
                if (article.lang,
                        noun_or_verb) not in noun_and_verb_vocabulary:
                    continue

                noun_or_verb_id = noun_and_verb_vocabulary.index(
                    (article.lang, noun_or_verb))
                for _ in range(count):
                    article_nouns_and_verbs[-1].append(noun_or_verb_id)

        with open(noun_and_verb_vocabulary_path, 'wb') as f:
            pickle.dump(noun_and_verb_vocabulary, f)

        np.save(noun_and_verb_embeddings_path.strip(".npy"),
                noun_and_verb_embeddings)

        with open(article_nouns_and_verbs_path, 'wb') as f:
            pickle.dump(article_nouns_and_verbs, f)

        print("Wrote nouns and verbs of size to %s" %
              (noun_and_verb_data_path))
    else:
        with open(noun_and_verb_vocabulary_path, 'rb') as f:
            noun_and_verb_vocabulary = pickle.load(f)

        noun_and_verb_embeddings = np.load(noun_and_verb_embeddings_path)

        with open(article_nouns_and_verbs_path, 'rb') as f:
            article_nouns_and_verbs = pickle.load(f)

    ###
    # NAMED ENTITIES
    ###

    named_entity_vocabulary_path = os.path.join(
        REUTERS_DIRECTORY, 'preprocessed/%s--%s-named-entities.pkl' %
        (date_start.strftime("%m%d%Y"), date_end.strftime("%m%d%Y")))
    article_named_entities_path = os.path.join(
        REUTERS_DIRECTORY, 'preprocessed/%s--%s-article-named-entities.pkl' %
        (date_start.strftime("%m%d%Y"), date_end.strftime("%m%d%Y")))

    if not all(
            map(os.path.exists,
                [named_entity_vocabulary_path, article_named_entities_path])):
        named_entities = set()
        for article in articles:
            for named_entity in article.named_entities:
                named_entities.add((article.lang, named_entity))
        named_entities = list(sorted(named_entities))

        print("Ungrouped named entities: %s" % len(named_entities))

        es_named_entities = [
            named_entity for (lang, named_entity) in named_entities
            if lang == 'es'
        ]
        es_named_entities_translated = translate_named_entities(
            'es', es_named_entities)
        ru_named_entities = [
            named_entity for (lang, named_entity) in named_entities
            if lang == 'ru'
        ]
        ru_named_entities_translated = translate_named_entities(
            'ru', ru_named_entities)

        grouped_named_entity_counts = Counter()
        for article in articles:
            for named_entity in article.named_entities:
                if article.lang == 'es':
                    named_entity = es_named_entities_translated[named_entity]
                elif article.lang == 'ru':
                    named_entity = ru_named_entities_translated[named_entity]

                grouped_named_entity_counts[named_entity] += 1

        named_entity_vocabulary = SortedSet()
        for named_entity, count in grouped_named_entity_counts.items():
            if count > 5 and count < 0.8 * len(
                    articles) and 'reuters' not in named_entity:
                named_entity_vocabulary.add(named_entity)

        print("Grouped named entities: %s" % len(named_entity_vocabulary))

        article_named_entities = []
        for article_id, article in enumerate(articles):
            article_named_entities.append([])
            for named_entity, count in article.named_entities.items():
                if article.lang == 'es':
                    named_entity = es_named_entities_translated[named_entity]
                elif article.lang == 'ru':
                    named_entity = ru_named_entities_translated[named_entity]

                if named_entity not in named_entity_vocabulary:
                    continue

                named_entity_id = named_entity_vocabulary.index(named_entity)
                for _ in range(count):
                    article_named_entities[-1].append(named_entity_id)

        with open(named_entity_vocabulary_path, 'wb') as f:
            pickle.dump(named_entity_vocabulary, f)

        with open(article_named_entities_path, 'wb') as f:
            pickle.dump(article_named_entities, f)
    else:
        with open(named_entity_vocabulary_path, 'rb') as f:
            named_entity_vocabulary = pickle.load(f)

        with open(article_named_entities_path, 'rb') as f:
            article_named_entities = pickle.load(f)

    if disallow_repeats:
        for article_id in range(len(articles)):
            article_nouns_and_verbs[article_id] = list(
                set(article_nouns_and_verbs[article_id]))
            article_named_entities[article_id] = list(
                set(article_named_entities[article_id]))

    return TrainingData(articles, noun_and_verb_vocabulary,
                        noun_and_verb_embeddings, article_nouns_and_verbs,
                        named_entity_vocabulary, article_named_entities)
Example #8
0
class SweepLineStatus:
    def __init__(self, scenes=[]):
        global X
        super().__init__()
        self.lines: SortedSet = None
        self.events: SortedSet = SortedSet(key=lambda p: -p.x)
        self.scenes = scenes
        self.dataset = {}
        self.results = set([])

    def run(self, dataset: List[Line]):
        global X
        for l in dataset:
            self.dataset[l.get_left()] = l
            self.dataset[l.get_right()] = l
            self.events.add(l.get_left())
            self.events.add(l.get_right())
        self.lines = SortedSet()
        while len(self.events) > 0:
            print('iteration')
            event = self.events.pop()

            self.event_happened(event)
        return self.results

    def find_intersection(self, line1: Line, line2: Line):
        return line_intersection(line1, line2)

    def insert_line(self, line: Line):
        global X
        print(X)
        self.lines.add(line)
        try:
            i = self.lines.index(line)
        except:
            print('error')
            self.update_keys(X)
        i = self.lines.index(line)
        if i - 1 >= 0 and i + 1 < len(self.lines):
            intersection = self.find_intersection(self.lines[i - 1],
                                                  self.lines[i + 1])
            if intersection is not None and intersection in self.events:
                self.events.remove(intersection)
        if i - 1 >= 0:
            intersection = self.find_intersection(self.lines[i - 1], line)
            if intersection is not None and intersection not in self.results and intersection not in self.events:
                self.events.add(intersection)
        if i + 1 < len(self.lines):
            intersection = self.find_intersection(line, self.lines[i + 1])
            if intersection is not None and intersection not in self.results and intersection not in self.events:
                self.events.add(intersection)

    def update_keys(self, x):
        global X
        temp_lines = SortedSet()
        temp_lines.update(self.lines)
        self.lines = temp_lines

    def remove_line(self, line: Line):
        global X
        print(X)
        try:
            i = self.lines.index(line)
        except:
            # print('error')
            self.update_keys(X)
        i = self.lines.index(line)
        if i - 1 >= 0 and i + 1 < len(self.lines):
            intersection = self.find_intersection(self.lines[i - 1],
                                                  self.lines[i + 1])
            if intersection is not None and intersection not in self.results:
                self.events.add(intersection)
        self.lines.remove(line)

    def intersection_event(self, intersection: Point):
        global X
        print(X)
        X = intersection.x - 0.001
        self.results.add(intersection)
        line1 = intersection.line1
        line2 = intersection.line2

        self.remove_line(line1)
        self.remove_line(line2)

        X = intersection.x + 0.001

        self.insert_line(line1)
        self.insert_line(line2)

    def event_happened(self, event: Point):
        global X
        X = event.x
        print(X)
        if event in self.dataset:
            line = self.dataset[event]
            if event == line.get_left():
                self.insert_line(line)
            else:
                self.remove_line(line)
        else:
            self.intersection_event(event)
class FirstLastList:
    def __init__(self):
        self.sorted_elements = SortedSet()
        self.ordered_elements = OrderedDict()

    def count(self):
        return len(self.ordered_elements)

    def clear(self):
        self.sorted_elements = SortedSet()
        self.ordered_elements = OrderedDict()

    def add(self, element):
        # add in the sorted container
        sorted_element = ElementWrapper(element)
        if sorted_element in self.sorted_elements:
            # increment his count
            sorted_element_idx = self.sorted_elements.index(sorted_element)
            self.sorted_elements[sorted_element_idx].count += 1
        else:
            self.sorted_elements.add(sorted_element)

        element_occurence = self.sorted_elements[self.sorted_elements.index(
            sorted_element)].count
        # add in the ordered container
        ordered_element = ElementOrderWrapper(element, element_occurence)
        self.ordered_elements[ordered_element] = True

    def min(self, count):
        count_left = count
        min_items = []
        to_break = False
        for i in range(len(self.sorted_elements)):
            min_obj = self.sorted_elements[i]
            for _ in range(min_obj.count):
                count_left -= 1
                min_items.append(min_obj.value)
                if count_left == 0:
                    to_break = True
                    break
            if to_break:
                break

        return min_items

    def max(self, count):
        count_left = count
        max_items = []
        to_break = False
        for i in range(1, len(self.sorted_elements) + 1):
            max_obj = self.sorted_elements[-i]
            for _ in range(max_obj.count):
                count_left -= 1
                max_items.append(max_obj.value)
                if count_left == 0:
                    to_break = True
                    break
            if to_break:
                break

        return max_items

    def first(self, count):
        return list(self.ordered_elements.keys())[:count]

    def last(self, count):
        start = len(self.ordered_elements) - count
        if start < 0:
            start = 0
        keys = list(self.ordered_elements.keys())
        return [keys[i] for i in reversed(range(start, len(keys)))]

    def remove_all(self, element):
        el_obj = ElementWrapper(element)
        if el_obj not in self.sorted_elements:
            return 0
        # remove from the sorted collection
        el_idx = self.sorted_elements.index(el_obj)
        element_obj = self.sorted_elements[el_idx]
        self.sorted_elements.remove(element_obj)

        # remove from the order collection
        for occurence in range(1, element_obj.count + 1):
            el_wrapper = ElementOrderWrapper(element, occurence)
            del self.ordered_elements[el_wrapper]

        return element_obj.count
class BunnyWars:
    def __init__(self):
        self.rooms_by_idx = SortedSet()  # integer ID only
        self.rooms = SortedDict()  # key: id, value: room
        self.bunnies_by_team = {
        }  # key: team id, value: SortedSet(key=bunny.reversed_name) of Bunny objects
        self.bunnies_by_suffix = datrie.Trie(string.ascii_letters + ''.join(
            str(part) for part in range(0, 10)))
        self.bunny_names = {}

    def next_bunny(self, bunny_name):
        self._move_bunny(bunny_name)

    def prev_bunny(self, bunny_name):
        self._move_bunny(bunny_name, prev=True)

    def bunny_count(self):
        return len(self.bunny_names)

    def room_count(self):
        return len(self.rooms)

    def list_bunnies_by_team(self, team_id):
        """
        ListBunniesByTeam teamId - returns all bunnies from the specified team in (sorted by name in descending order).
        """
        return reversed(self.bunnies_by_team[team_id])

    def list_bunnies_by_suffix(self, suffix):
        """
        ListBunniesBySuffix suffix -
            returns all bunnies ending with the specified suffix (sorted by the ASCII code of the reversed name
            in ascending order as a first criteria and by length in ascending order as a second criteria).
            Example Tpen < apen < aapen < bapen < bpen.
        """
        return self.bunnies_by_suffix.values(''.join(reversed(suffix)))

    def detonate(self, bunny_name):
        if bunny_name not in self.bunny_names:
            raise Exception('Bunny does not exist!')
        bunny = self.bunny_names[bunny_name]
        room = self.rooms[bunny.room]
        dead_bunnies = room.detonate(
            bunny)  # detonate the bunny and get all the bunnies that have died
        for dead_bunny in dead_bunnies:
            self._delete_bunny(dead_bunny)

    def add_room(self, id):
        """
        Add roomId – adds a room to the structure.
            Rooms have unique ids.
            Rooms should be situated according to their id in ascending order.
            If a room with the given Id exists the command should throw an exception.
        """
        if id in self.rooms:
            raise Exception(
                'Room with id {id} is already registered!'.format(id=id))
        self.rooms_by_idx.add(id)
        self.rooms[id] = Room(id)

    def add_bunny(self, bunny_name, team_id, room_id):
        if room_id not in self.rooms or team_id > 4 or team_id < 0:
            raise Exception('Invalid room/team id!')
        if bunny_name in self.bunny_names:
            raise Exception('A bunny with the given name already exists!')
        bunny_obj = Bunny(name=bunny_name, teamid=team_id, room=room_id)
        # 1. Add to the room
        self.rooms[room_id].add_bunny(bunny_obj)
        # 2. Add to overall bunnies
        self.bunny_names[bunny_name] = bunny_obj
        # 3. Add to suffixes
        self.bunnies_by_suffix[bunny_obj.reversed_name] = bunny_obj
        # 4. Add to bunnies by team
        if bunny_obj.team not in self.bunnies_by_team:
            self.bunnies_by_team[bunny_obj.team] = SortedSet()
        self.bunnies_by_team[bunny_obj.team].add(bunny_obj)

    def remove_room(self, room_id):
        if room_id not in self.rooms:
            raise Exception(
                'A room with the id {id} does not exist!'.format(id=room_id))
        room = self.rooms[room_id]
        del self.rooms[room_id]
        self.rooms_by_idx.remove(room_id)

        # delete every bunny there
        for bunnies_from_team in room.bunnies.values():
            for bunny in bunnies_from_team.values():
                self._delete_bunny(bunny)

    def _move_bunny(self, bunny_name, prev=False):
        if bunny_name not in self.bunny_names:
            raise Exception()
        bunny = self.bunny_names[bunny_name]
        old_room_id = bunny.room
        old_room = self.rooms[old_room_id]
        old_room_index = self.rooms_by_idx.index(old_room_id)
        if prev:
            next_room_index = old_room_index - 1
        else:
            next_room_index = old_room_index + 1
        if next_room_index >= len(
                self.rooms_by_idx) or next_room_index < 0:  # is out of bounds
            next_room_index = 0 if prev else len(self.rooms_by_idx) - 1
        # get the new room id and assign it to the bunny
        new_room_id = self.rooms_by_idx[next_room_index]
        bunny.room = new_room_id
        new_room = self.rooms[new_room_id]
        # remove the bunny from the old room and move it to the new one
        old_room.remove_bunny(bunny)
        new_room.move_bunny_in(bunny)

    def _delete_bunny(self, bunny: Bunny):
        # 1.Remove from overall bunnies
        del self.bunny_names[bunny.name]
        # 2.Remove from suffixes
        del self.bunnies_by_suffix[bunny.reversed_name]
        # 3.Remove from bunnies by team
        self.bunnies_by_team[bunny.team].remove(bunny)
Example #11
0
    def cluster_(self, fX):
        """Compute complete dendrogram

        Parameters
        ----------
        fX : (n_items, dimension) np.array
            Embeddings.

        Returns
        -------
        dendrogram : list of (i, j, distance) tuples
            Dendrogram.
        """

        N = len(fX)

        # clusters contain the identifier of each cluster
        clusters = SortedSet(np.arange(N))

        # labels[i] = c means ith item belongs to cluster c
        labels = np.array(np.arange(N))

        squared = squareform(pdist(fX, metric=self.metric))
        distances = ValueSortedDict()
        for i, j in itertools.combinations(range(N), 2):
            distances[i, j] = squared[i, j]

        dendrogram = []

        for _ in range(N-1):

            # find most similar clusters
            (c_i, c_j), d = distances.peekitem(index=0)

            # keep track of this iteration
            dendrogram.append((c_i, c_j, d))

            # index of clusters in 'clusters' and 'fX'
            i = clusters.index(c_i)
            j = clusters.index(c_j)

            # merge items of cluster c_j into cluster c_i
            labels[labels == c_j] = c_i

            # update c_i representative
            fX[i] += fX[j]

            # remove c_j cluster
            fX[j:-1, :] = fX[j+1:, :]
            fX = fX[:-1]

            # remove distances to c_j cluster
            for c in clusters[:j]:
                distances.pop((c, c_j))
            for c in clusters[j+1:]:
                distances.pop((c_j, c))

            clusters.remove(c_j)

            if len(clusters) < 2:
                continue

            # compute distance to new c_i cluster
            new_d = cdist(fX[i, :].reshape((1, -1)), fX, metric=self.metric).squeeze()
            for c_k, d in zip(clusters, new_d):

                if c_k < c_i:
                    distances[c_k, c_i] = d
                elif c_k > c_i:
                    distances[c_i, c_k] = d

        return dendrogram
Example #12
0
class SparseTimeSeriesDataSet:
    # A dataset designed for dealing with sparse time series data that needs to be kept in sync in time.
    def __init__(self, unique_timestamps = None, minimum_time_between_timestamps = None, mode='strict'):
        # possible modes are strict, remove_difference, union
        if unique_timestamps is not None:
            self.unique_timestamps = SortedSet(unique_timestamps)
        else:
            self.unique_timestamps = SortedSet()

        self.mode = mode
        self.all_raw_data = {}

        #dict of sorteddicts
        self.timestamp_indexed_data = {}

        self.minimum_time_between_timestamps = minimum_time_between_timestamps
        self.check_minimum_timestamp_interval()


    def __len__(self):
        return len(self.unique_timestamps)

    @classmethod
    def sample_data_at_intervals(cls, start_timestamp, end_timestamp, interval, data):
        # extends previous datapoint if one is missing
        timestamps = SortedList([x[0] for x in data])

        start_timestamp = int(start_timestamp)
        end_timestamp = int(end_timestamp)

        assert(timestamps[0] <= start_timestamp)
        assert(timestamps[-1] >= end_timestamp)
        sampled_data = []

        for timestamp in range(start_timestamp, end_timestamp+1, interval):
            index = timestamps.bisect_right(timestamp)-1
            new_datapoint = data[index].copy()
            new_datapoint[0] = timestamp
            sampled_data.append(new_datapoint)

        return sampled_data

    @property
    def ids(self):
        return list(self.all_raw_data.keys())

    @property
    def first_timestamp(self):
        return self.unique_timestamps[0]

    def first_timestamp_for_id(self, id):
        return self.all_raw_data[id][0][0]

    @property
    def last_timestamp(self):
        return self.unique_timestamps[-1]

    def last_timestamp_for_id(self, id):
        return self.all_raw_data[id][-1][0]

    def first_unpadded_index_for_id(self, id):
        first_timestamp = self.first_timestamp_for_id(id)
        return self.unique_timestamps.index(first_timestamp)

    def last_unpadded_index_for_id(self, id):
        last_timestamp = self.last_timestamp_for_id(id)
        return self.unique_timestamps.index(last_timestamp)


    def check_minimum_timestamp_interval(self):
        if self.minimum_time_between_timestamps is not None:
            prev_timestamp = 0
            for timestamp in self.unique_timestamps:
                if timestamp-prev_timestamp < self.minimum_time_between_timestamps:
                    raise InvalidTimestampsInDataError("Found timestamps that have less than the required {} between them".format(self.minimum_time_between_timestamps))
                prev_timestamp = timestamp

    def add(self, id: str, data):
        if len(data) == 0:
            raise ValueError("Tried to add empty data for id {}".format(id))

        if id in self.all_raw_data and self.all_raw_data[id] == data:
            print("Data for id {} already added.".format(id))
            return

        self.all_raw_data[id] = data

        if len(data[0]) > 2:
            # we have multidimensional data
            timestamp_indexed_data = SortedDict([[int(x[0]), x[1:]] for x in data])
        else:
            timestamp_indexed_data = SortedDict([[int(x[0]), x[1]] for x in data])


        new_timestamps = {x[0] for x in data}
        difference = new_timestamps.difference(self.unique_timestamps)

        if self.mode == 'strict':
            if len(difference) != 0:
                raise InvalidTimestampsInDataError("Tried to add new data with id {} that includes timestamps that are not in the set of allowed timestamps. "
                                                   "Difference = {}".format(id, difference))
            opposite_difference = self.unique_timestamps.difference(new_timestamps)
            # for timestamp_current in opposite_difference:
            #     if timestamp_current > min(new_timestamps) and timestamp_current < max(new_timestamps):
            #         raise Exception("Missing timestamps in the middle of the data")

        elif self.mode == 'remove_difference':
            for timestamp_to_remove in difference:
                del(timestamp_indexed_data[timestamp_to_remove])

        elif self.mode == 'union':
            self.unique_timestamps = self.unique_timestamps.union(new_timestamps)

        self.check_minimum_timestamp_interval()

        if len(timestamp_indexed_data) == 0:
            raise NotEnoughInputData("The data being added has zero length. If the mode is remove_difference, then this means that the new data has no timestamps in common with the required timestamps")

        self.timestamp_indexed_data[id] = timestamp_indexed_data


    def get_left_and_right_padding_required(self, ids):
        padding_required = []
        for id in ids:
            first_timestamp_for_id = self.first_timestamp_for_id(id)
            last_timestamp_for_id = self.last_timestamp_for_id(id)
            left_padding = self.unique_timestamps.index(first_timestamp_for_id)
            right_padding = len(self) - self.unique_timestamps.index(last_timestamp_for_id)-1

            assert(self.all_raw_data[id][0][0] == self.unique_timestamps[left_padding])
            assert(self.all_raw_data[id][-1][0] == self.unique_timestamps[-(right_padding+1)])

            padding_required.append([left_padding, right_padding])
        return padding_required

    def get_data_extend_missing_internal(self, id: str):
        # This function does't pad the left or right of the data, but it will fill in any missing data
        # using the previous value
        timestamp_indexed_data = self.timestamp_indexed_data[id]

        timestamps_in_this_data = set(timestamp_indexed_data.keys())
        missing_timestamps = self.unique_timestamps - timestamps_in_this_data

        if len(missing_timestamps) > 0:
            for timestamp in missing_timestamps:
                entry_index = timestamp_indexed_data.bisect_right(timestamp)

                if entry_index != 0 and entry_index < len(timestamp_indexed_data):
                    # only pad in the middle of the data and not at the end
                    current_padded_value = timestamp_indexed_data.peekitem(entry_index - 1)[1]
                    timestamp_indexed_data[timestamp] = current_padded_value

        if isinstance(timestamp_indexed_data.peekitem(0)[1], list) or isinstance(timestamp_indexed_data.peekitem(0)[1], tuple):
            to_return = [[x[0], *x[1]]for x in timestamp_indexed_data.items()]
        else:
            to_return = list(timestamp_indexed_data.items())
        return to_return


    def get_padded_data_in_sync(self, padding_val = "extend"):
        # It will always pad missing values in the middle or end of the data by extending the previous value.
        # The padding_val variable determined how to pad the beginning when there is no value before it.
        padded_timestamp_indexed_data = {}

        for ric, timestamp_indexed_data in self.timestamp_indexed_data.items():
            padded_timestamp_indexed_data[ric] = timestamp_indexed_data

            timestamps_in_this_data = set(timestamp_indexed_data.keys())
            missing_timestamps = self.unique_timestamps - timestamps_in_this_data

            if len(missing_timestamps) > 0:
                for timestamp in missing_timestamps:
                    entry_index = padded_timestamp_indexed_data[ric].bisect_right(timestamp)
                    if entry_index == 0:
                        if padding_val == 'extend':
                            current_padded_value = padded_timestamp_indexed_data[ric].peekitem(entry_index)[1]
                        else:
                            current_padded_value = padding_val
                    else:
                        current_padded_value = padded_timestamp_indexed_data[ric].peekitem(entry_index-1)[1]

                    padded_timestamp_indexed_data[ric][timestamp] = current_padded_value

        return padded_timestamp_indexed_data


    def get_start_and_end_index_for_concat_data(self, keys):
        start_stop = []
        current_position = 0
        for id in keys:
            if id in self.timestamp_indexed_data:
                length_of_data = len(self.timestamp_indexed_data[id])
                start_stop.append([current_position,current_position+length_of_data])
                current_position = length_of_data
            else:
                print("warning: tried to concat data for keys {} but key {} is missing".format(keys, id))

        return start_stop


    def concat_data_unpadded(self, keys, as_numpy = True, with_timestamps = True):
        data_to_concat = []
        for id in keys:
            if id in self.timestamp_indexed_data:
                if with_timestamps:
                    data_to_concat.append(np.squeeze(self.timestamp_indexed_data[id].items()[:]))
                else:
                    data_to_concat.append(np.squeeze(self.timestamp_indexed_data[id].values()[:]))
            else:
                print("warning: tried to concat data for keys {} but key {} is missing".format(keys, id))


        if as_numpy:
            return np.concatenate(data_to_concat)
        else:
            return np.concatenate(data_to_concat).tolist()
class MySortedSet:
    """Custom class to abstract redis sorted sets"""
    def __init__(self):
        self.members = SortedSet(key=self.sortedset_key)
        self.scoremap = {}

    def sortedset_key(self, x):
        return self.scoremap[x]

    def update(self, iterable, ch_flag=False):
        # Emulates set update
        scores, members = zip(*iterable)
        exist_count = 0
        for ikey, key in enumerate(members):
            if key in self.scoremap:
                if ch_flag:
                    if self.scoremap[key] == scores[ikey]:
                        exist_count += 1
                else:
                    exist_count += 1
            self.scoremap[key] = scores[ikey]

        for member in members:
            try:
                self.members.remove(member)
            except KeyError:
                continue
            except ValueError:
                continue
        self.members.update(members)

        return len(members) - exist_count

    def incr_update(self, iterable):
        # Increments the scores for already existing keys
        scores, members = zip(*iterable)
        for ikey, key in enumerate(members):
            if key in self.scoremap:
                self.scoremap[key] += scores[ikey]
            else:
                self.scoremap[key] = scores[ikey]
        for member in members:
            try:
                self.members.remove(member)
            except KeyError:
                continue
            except ValueError:
                continue
        self.members.update(members)
        return self.scoremap[members[-1]]

    def rank(self, member):
        try:
            return self.members.index(member)
        except KeyError:
            return '(nil)'

    def range(self, start, end, withscores):
        range_members = self.members[start:end]
        if withscores:
            range_scores = [self.scoremap[member] for member in range_members]
            return list(zip(range_members, range_scores))
        else:
            return range_members