Python SortedSet.difference Examples

Programming Language: Python

Namespace/Package Name: sortedcontainers

Class/Type: SortedSet

Method/Function: difference

Examples at hotexamples.com: 6

Python SortedSet.difference - 6 examples found. These are the top rated real world Python examples of sortedcontainers.SortedSet.difference extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SortedSet(30)

add(30)

remove(30)

pop(30)

discard(30)

update(30)

_reset(30)

bisect_left(27)

union(11)

index(11)

bisect_right(10)

clear(10)

_check(8)

bisect(8)

copy(8)

difference(5)

irange(5)

difference_update(4)

islice(3)

bisect_key_left(3)

__contains__(2)

irange_key(2)

isdisjoint(2)

count(2)

issubset(2)

issuperset(2)

__iter__(2)

extend(1)

__repr__(1)

intersection(1)

bisect_key_right(1)

__len__(1)

symmetric_difference(1)

symmetric_difference_update(1)

bisect_key(1)

intersection_update(1)

Example #1

Show file

File: SWMCmain.py Project: NavneethJayendran/StillingerWeberMDMC

def printnl(nl2, np2, l, v, atmi):
    filename = 'nl2s.dat'
    n = SortedSet(nl2[np2[atmi]:np2[atmi + 1]])
    d = n.difference(l)

    with open(filename, "a") as myfile:
        myfile.write(str(list(d)))
        myfile.write('\n \n')

Example #2

Show file

File: alpha_plus.py Project: mehdi149/Learning-projects

    def extract_FL1L(self):

        self.F_L1L = SortedSet()
        cpt = 1
        for transition1 in self.L1L:

            A = SortedSet()
            B = SortedSet()
            for transition2 in self.T_pr:
                if self.relations[transition2][
                        transition1] == Relations.RIGHT_CAUSALITY:
                    print("for transition ", transition1, " : ", transition2)
                    A.add(transition2)
                if self.relations[transition1][
                        transition2] == Relations.RIGHT_CAUSALITY:
                    print("for transition ", transition1, " : ", transition2)
                    B.add(transition2)
            '''
            The solution to tackle length-one loops in sound SWF-nets focuses on the
            pre- and post-processing phases of process mining. The key idea is to identify
            the length-one-loop tasks and the single place to which each task should be
            connected. Any length-one-loop task t can be identified by searching a loopcomplete
            event log for traces containing the substring tt. To determine the correct
            place p to which each t should be connected in the discovered net, we must check
            which transitions are directed followed by t but do not direct follow t (i.e. p is an
            output place of these transitions) and which transitions direct follow t but t does
            not direct follow them (i.e. p is the input place of these transitions)
            '''
            print(len(A) == len(B))

            place = 'p' + str(cpt)
            for transition in A.difference(B):
                # Add input places
                transition_place = (transition1, place)
                self.F_L1L.add(transition_place)
            for transition in B.difference(A):
                #Add output place
                transition_place = (place, transition1)
                self.F_L1L.add(transition_place)

            cpt += 1
        print(self.F_L1L)
        pass

Example #3

Show file

File: test_coverage_sortedset.py Project: Muon/sorted_containers

def test_difference():
    temp = SortedSet(range(100), load=7)
    that = temp.difference(range(0, 10), range(10, 20))
    assert all(val == temp[val] for val in range(100))
    assert all((val + 20) == that[val] for val in range(80))

Example #4

Show file

def test_difference():
    temp = SortedSet(range(100))
    temp._reset(7)
    that = temp.difference(range(0, 10), range(10, 20))
    assert all(val == temp[val] for val in range(100))
    assert all((val + 20) == that[val] for val in range(80))

Example #5

Show file

class Index:
    def __init__(self, tokenizer=None):
        self.postings = SortedDict()
        self.unit_list = SortedSet()
        self.unit_count = 0
        self.tokenizer = __default_tokenizer__ if tokenizer is None else tokenizer

    def add(self, unit: Unit):
        self.unit_count += 1
        if len(unit.keywords()) == 0: self.unit_count -= 1
        else: self.unit_list.add(unit)
        for word in unit.keywords():
            if word:
                if word in self.postings: self.postings[word].add(unit)
                else: self.postings[word] = SortedSet([unit])

    def count(self):
        return self.unit_count

    def search(self, query):
        """Searches given query inside the index.

            :param `query`: String to search. Can contain operators `('and', 'or', 'not')` to refine results.

            :returns: a list of document units that satisfy the given query.
        """
        tokens = self.tokenizer(query)
        result, sub_result = None, None
        i = 0
        while i < len(tokens):
            # print("> Now on token", i, ":", tokens[i])
            if tokens[i] == 'not':
                i += 1
                sub_result = self.unit_list.difference(
                    self.postings[tokens[i]]) if i < len(tokens) and tokens[
                        i] in self.postings else self.unit_list
            else:
                sub_result = self.postings[tokens[i]] if i < len(
                    tokens) and tokens[i] in self.postings else None
            if i < len(tokens) and (tokens[i] == 'and' or tokens[i] == 'or'):
                operator = tokens[i]
                i += 1
                if tokens[i] == 'not':
                    i += 1
                    sub_result = self.unit_list.difference(self.postings[
                        tokens[i]]) if i < len(tokens) and tokens[
                            i] in self.postings else self.unit_list
                else:
                    sub_result = self.postings[tokens[i]] if i < len(
                        tokens) and tokens[i] in self.postings else None
                if result is not None and sub_result is not None:
                    if operator == 'and':
                        result = result.intersection(sub_result)
                    else:
                        result = result.union(sub_result)
            elif result is not None:
                if sub_result is not None: result = result.union(sub_result)
            elif result is None: result = sub_result
            i += 1
        return result

    def keywords(self):
        return self.postings.keys()

    def __getitem__(self, word):
        return self.postings[word] if word in self.postings else None

Example #6

Show file

class SparseTimeSeriesDataSet:
    # A dataset designed for dealing with sparse time series data that needs to be kept in sync in time.
    def __init__(self, unique_timestamps = None, minimum_time_between_timestamps = None, mode='strict'):
        # possible modes are strict, remove_difference, union
        if unique_timestamps is not None:
            self.unique_timestamps = SortedSet(unique_timestamps)
        else:
            self.unique_timestamps = SortedSet()

        self.mode = mode
        self.all_raw_data = {}

        #dict of sorteddicts
        self.timestamp_indexed_data = {}

        self.minimum_time_between_timestamps = minimum_time_between_timestamps
        self.check_minimum_timestamp_interval()


    def __len__(self):
        return len(self.unique_timestamps)

    @classmethod
    def sample_data_at_intervals(cls, start_timestamp, end_timestamp, interval, data):
        # extends previous datapoint if one is missing
        timestamps = SortedList([x[0] for x in data])

        start_timestamp = int(start_timestamp)
        end_timestamp = int(end_timestamp)

        assert(timestamps[0] <= start_timestamp)
        assert(timestamps[-1] >= end_timestamp)
        sampled_data = []

        for timestamp in range(start_timestamp, end_timestamp+1, interval):
            index = timestamps.bisect_right(timestamp)-1
            new_datapoint = data[index].copy()
            new_datapoint[0] = timestamp
            sampled_data.append(new_datapoint)

        return sampled_data

    @property
    def ids(self):
        return list(self.all_raw_data.keys())

    @property
    def first_timestamp(self):
        return self.unique_timestamps[0]

    def first_timestamp_for_id(self, id):
        return self.all_raw_data[id][0][0]

    @property
    def last_timestamp(self):
        return self.unique_timestamps[-1]

    def last_timestamp_for_id(self, id):
        return self.all_raw_data[id][-1][0]

    def first_unpadded_index_for_id(self, id):
        first_timestamp = self.first_timestamp_for_id(id)
        return self.unique_timestamps.index(first_timestamp)

    def last_unpadded_index_for_id(self, id):
        last_timestamp = self.last_timestamp_for_id(id)
        return self.unique_timestamps.index(last_timestamp)


    def check_minimum_timestamp_interval(self):
        if self.minimum_time_between_timestamps is not None:
            prev_timestamp = 0
            for timestamp in self.unique_timestamps:
                if timestamp-prev_timestamp < self.minimum_time_between_timestamps:
                    raise InvalidTimestampsInDataError("Found timestamps that have less than the required {} between them".format(self.minimum_time_between_timestamps))
                prev_timestamp = timestamp

    def add(self, id: str, data):
        if len(data) == 0:
            raise ValueError("Tried to add empty data for id {}".format(id))

        if id in self.all_raw_data and self.all_raw_data[id] == data:
            print("Data for id {} already added.".format(id))
            return

        self.all_raw_data[id] = data

        if len(data[0]) > 2:
            # we have multidimensional data
            timestamp_indexed_data = SortedDict([[int(x[0]), x[1:]] for x in data])
        else:
            timestamp_indexed_data = SortedDict([[int(x[0]), x[1]] for x in data])


        new_timestamps = {x[0] for x in data}
        difference = new_timestamps.difference(self.unique_timestamps)

        if self.mode == 'strict':
            if len(difference) != 0:
                raise InvalidTimestampsInDataError("Tried to add new data with id {} that includes timestamps that are not in the set of allowed timestamps. "
                                                   "Difference = {}".format(id, difference))
            opposite_difference = self.unique_timestamps.difference(new_timestamps)
            # for timestamp_current in opposite_difference:
            #     if timestamp_current > min(new_timestamps) and timestamp_current < max(new_timestamps):
            #         raise Exception("Missing timestamps in the middle of the data")

        elif self.mode == 'remove_difference':
            for timestamp_to_remove in difference:
                del(timestamp_indexed_data[timestamp_to_remove])

        elif self.mode == 'union':
            self.unique_timestamps = self.unique_timestamps.union(new_timestamps)

        self.check_minimum_timestamp_interval()

        if len(timestamp_indexed_data) == 0:
            raise NotEnoughInputData("The data being added has zero length. If the mode is remove_difference, then this means that the new data has no timestamps in common with the required timestamps")

        self.timestamp_indexed_data[id] = timestamp_indexed_data


    def get_left_and_right_padding_required(self, ids):
        padding_required = []
        for id in ids:
            first_timestamp_for_id = self.first_timestamp_for_id(id)
            last_timestamp_for_id = self.last_timestamp_for_id(id)
            left_padding = self.unique_timestamps.index(first_timestamp_for_id)
            right_padding = len(self) - self.unique_timestamps.index(last_timestamp_for_id)-1

            assert(self.all_raw_data[id][0][0] == self.unique_timestamps[left_padding])
            assert(self.all_raw_data[id][-1][0] == self.unique_timestamps[-(right_padding+1)])

            padding_required.append([left_padding, right_padding])
        return padding_required

    def get_data_extend_missing_internal(self, id: str):
        # This function does't pad the left or right of the data, but it will fill in any missing data
        # using the previous value
        timestamp_indexed_data = self.timestamp_indexed_data[id]

        timestamps_in_this_data = set(timestamp_indexed_data.keys())
        missing_timestamps = self.unique_timestamps - timestamps_in_this_data

        if len(missing_timestamps) > 0:
            for timestamp in missing_timestamps:
                entry_index = timestamp_indexed_data.bisect_right(timestamp)

                if entry_index != 0 and entry_index < len(timestamp_indexed_data):
                    # only pad in the middle of the data and not at the end
                    current_padded_value = timestamp_indexed_data.peekitem(entry_index - 1)[1]
                    timestamp_indexed_data[timestamp] = current_padded_value

        if isinstance(timestamp_indexed_data.peekitem(0)[1], list) or isinstance(timestamp_indexed_data.peekitem(0)[1], tuple):
            to_return = [[x[0], *x[1]]for x in timestamp_indexed_data.items()]
        else:
            to_return = list(timestamp_indexed_data.items())
        return to_return


    def get_padded_data_in_sync(self, padding_val = "extend"):
        # It will always pad missing values in the middle or end of the data by extending the previous value.
        # The padding_val variable determined how to pad the beginning when there is no value before it.
        padded_timestamp_indexed_data = {}

        for ric, timestamp_indexed_data in self.timestamp_indexed_data.items():
            padded_timestamp_indexed_data[ric] = timestamp_indexed_data

            timestamps_in_this_data = set(timestamp_indexed_data.keys())
            missing_timestamps = self.unique_timestamps - timestamps_in_this_data

            if len(missing_timestamps) > 0:
                for timestamp in missing_timestamps:
                    entry_index = padded_timestamp_indexed_data[ric].bisect_right(timestamp)
                    if entry_index == 0:
                        if padding_val == 'extend':
                            current_padded_value = padded_timestamp_indexed_data[ric].peekitem(entry_index)[1]
                        else:
                            current_padded_value = padding_val
                    else:
                        current_padded_value = padded_timestamp_indexed_data[ric].peekitem(entry_index-1)[1]

                    padded_timestamp_indexed_data[ric][timestamp] = current_padded_value

        return padded_timestamp_indexed_data


    def get_start_and_end_index_for_concat_data(self, keys):
        start_stop = []
        current_position = 0
        for id in keys:
            if id in self.timestamp_indexed_data:
                length_of_data = len(self.timestamp_indexed_data[id])
                start_stop.append([current_position,current_position+length_of_data])
                current_position = length_of_data
            else:
                print("warning: tried to concat data for keys {} but key {} is missing".format(keys, id))

        return start_stop


    def concat_data_unpadded(self, keys, as_numpy = True, with_timestamps = True):
        data_to_concat = []
        for id in keys:
            if id in self.timestamp_indexed_data:
                if with_timestamps:
                    data_to_concat.append(np.squeeze(self.timestamp_indexed_data[id].items()[:]))
                else:
                    data_to_concat.append(np.squeeze(self.timestamp_indexed_data[id].values()[:]))
            else:
                print("warning: tried to concat data for keys {} but key {} is missing".format(keys, id))


        if as_numpy:
            return np.concatenate(data_to_concat)
        else:
            return np.concatenate(data_to_concat).tolist()