def rangeQuery(api, start, end): TIMESTAMPE = 'Timestamp' # print(start, end) result = [] for ts in range(start // SCALE + 1, end // SCALE): # print('ts:', ts) # temp = api.liststreamkeyitems(TIMESTAMPE, str(ts))['result'] temp = pointQuery(api, TIMESTAMPE, str(ts)) # print(temp) if temp: result += temp # print(result) temp = api.liststreamkeyitems(TIMESTAMPE, str(start // SCALE))['result'] # print('in range:\n', *result, sep='\n') if temp: data = getData(temp) # print(data) sl = SortedList(data, key=lambda a: a.split(" ")[0]) result += list(sl.irange(str(start), str(end))) temp = api.liststreamkeyitems(TIMESTAMPE, str(end // SCALE))['result'] if temp: data = getData(temp) sl = SortedList(data, key=lambda a: a.split(" ")[0]) # print('end:') # print(*list(sl.irange(maximum=str(end))), sep='\n') result += list(sl.irange(str(start), str(end))) return result
def _find_general_vector(self): """Generate a (non-unit) vector that is general relative to edges""" def key(v): return vector_frangle(v) frangles = SortedList() for half_edge in self.half_edges.values(): frangle = half_edge.frangle frangles.add((frangle + 0 * FRANGLE_90) % MAX_FRANGLE) frangles.add((frangle + 1 * FRANGLE_90) % MAX_FRANGLE) frangles.add((frangle + 2 * FRANGLE_90) % MAX_FRANGLE) frangles.add((frangle + 3 * FRANGLE_90) % MAX_FRANGLE) if len(frangles) == 0: return (Fraction(0), Fraction(1)) first = list(islice(frangles.irange(None, None), 1)) assert len(first) == 1 first = first[0] second = list( islice(frangles.irange(first, None, inclusive=(False, True)), 1)) assert len(second) == 1 second = second[0] assert first != second return frangle_unit_square_vector((first + second) / 2)
def rangeQuery(api, start, end): # print(start, end) result = [] for i in reversed(range(NLEVEL)): cStep = SCALE * STEP**i # print(cStep) if start // cStep + 1 < end // cStep: for ts in range(start // cStep + 1, end // cStep): result += pointQuery(api, PREFIX + str(cStep), str(ts)) # print("hello") # print(result) break cStep = SCALE * STEP**(NLEVEL - 1) data = pointQuery(api, PREFIX + str(cStep), str(start // cStep)) if data: # data = getData(temp) sl = SortedList(data, key=lambda a: a.split(" ")[0]) result += list(sl.irange(str(start), str(end))) data = pointQuery(api, PREFIX + str(cStep), str(end // cStep)) if data: # data = getData(temp) sl = SortedList(data, key=lambda a: a.split(" ")[0]) result += list(sl.irange(str(start), str(end))) return result
def coincident_indices(list0, list1, delta): """Get indices of coincident times in both lists as dictionary. Parameters ---------- list0 : list List of times list1 : list List of times (preferably longer than ``list0``) delta : float Time-delta slice Returns ------- coincidents : dict {index_list0: index_list1} """ slist0 = SortedList(list0) slist1 = SortedList(list1) coincidents = {} for t0 in iter(slist0): times = list(slist1.irange(t0 - delta, t0 + delta)) diffs = [] for t1 in iter(times): diffs.append(abs(t0 - t1)) if len(diffs) > 0: coincidents[slist0.index(t0)] = slist1.index( times[np.argmin(diffs)]) return coincidents
def rangeQuery(api, start, end): result = [] stream = att_dict['T'] timestamps = api.liststreamkeys(stream)["result"] # print(timestamps) # input() sl = SortedList(list(map(int, [key['key'] for key in timestamps]))) temp = api.liststreamkeys(stream, list(map(str, sl.irange(start, end))), True)["result"] # print(*result, sep='\n') # input() for r in temp: result.append(bytes.fromhex(r["first"]["data"]).decode(ENCODE_FORMAT)) # for timestamp in sl.irange(start, end): # result += getData(api.liststreamkeyitems(stream, # str(timestamp))['result']) # print(result) # input() # ts = [v[0] for v in database["Timestamp"].values()][:400] # sl = SortedList(ts, key=lambda a: a.split(" ")[0]) # ans = list(sl.irange(str(start), str(end))) # if set(result) != set(ans): # print('result:\n', *result, sep='\n') # print('ans:\n', *ans, sep='\n') # # print('result:\n', *(set(result) - set(ans)), sep='\n') # print('ans:\n', *(set(ans) - set(result)), sep='\n') return result
def rangeQuery(api, start, end): # print(start, end) result = [] for i in reversed(range(NLEVEL)): cStep = SCALE * STEP**i # print(cStep) if start // cStep + 1 < end // cStep: for ts in range(start // cStep + 1, end // cStep): result += pointQuery(api, PREFIX + str(cStep), str(ts)) # print("hello") # print(result) break cStep = SCALE * STEP**(NLEVEL - 1) data = pointQuery(api, PREFIX + str(cStep), str(start // cStep)) if data: # data = getData(temp) sl = SortedList(data, key=lambda a: a.split(DELIMITER)[0]) result += list(sl.irange(str(start), str(end))) data = pointQuery(api, PREFIX + str(cStep), str(end // cStep)) if data: # data = getData(temp) sl = SortedList(data, key=lambda a: a.split(DELIMITER)[0]) result += list(sl.irange(str(start), str(end))) # ts = [] # for v in database["Timestamp"].values(): # ts += v # # print(len(ts)) # sl = SortedList(ts, key=lambda a: int(a.split(" ")[0])) # ans = list(sl.irange(str(start), str(end))) # print(*result, sep="\n") # print(*ans, sep="\n") # if set(result) != set(ans): # # print('result:\n', *result, sep='\n') # # print('ans:\n', *ans, sep='\n') # print(start, end) # print('result:', *(set(result) - set(ans)), sep='\n') # print('ans:', *(set(ans) - set(result)), sep='\n') # for a in ans: # if a.split(" ")[1] == '4': # print(a) # # print("result:", *result, sep="\n") # # print("ans:", *ans, sep="\n") # input() return result
def rangeQuery(api, start, end): result = [] stream = att_dict['T'] timestamps = api.liststreamkeys(stream)["result"] sl = SortedList(list(map(int, [key['key'] for key in timestamps]))) for timestamp in sl.irange(start, end): result += getData( api.liststreamkeyitems(stream, str(timestamp))['result']) return result
def rangeQuery(api, start, end): TIMESTAMPE = 'Timestamp' result = [] for ts in range(start // SCALE + 1, end // SCALE): temp = pointQuery(api, TIMESTAMPE, str(ts)) if temp: result += temp temp = api.liststreamkeyitems(TIMESTAMPE, str(start // SCALE))['result'] if temp: data = getData(temp) sl = SortedList(data, key=lambda a: a.split(" ")[0]) result += list(sl.irange(str(start), str(end))) temp = api.liststreamkeyitems(TIMESTAMPE, str(end // SCALE))['result'] if temp: data = getData(temp) sl = SortedList(data, key=lambda a: a.split(" ")[0]) result += list(sl.irange(str(start), str(end))) return result
def busiestServers(self, k: int, arrival: List[int], load: List[int]) -> List[int]: valid = SortedList(range(k)) cnt = [0] * k heap = [] for idx, (a, l) in enumerate(zip(arrival, load)): c = idx % k while heap and heap[0][0] <= a: _, v = heappop(heap) valid.add(v) selected = next(valid.irange(c, k - 1), None) if selected is None: selected = next(valid.irange(0, k - 1), None) if selected is not None: cnt[selected] += 1 valid.remove(selected) heappush(heap, [a + l, selected]) m = max(cnt) return [i for i, v in enumerate(cnt) if v == m]
def containsNearbyAlmostDuplicate(self, nums, k, t): if not nums: return False sl = SortedList() for i, n in enumerate(nums): fl_it = sl.irange(maximum=n, reverse=True) ceil_it = sl.irange(minimum=n, reverse=False) try: floor = next(fl_it) except StopIteration: floor = sys.maxsize try: ceil = next(ceil_it) except StopIteration: ceil = sys.maxsize if abs(ceil - n) <= t or abs(n - floor) <= t: return True sl.add(n) if len(sl) > k: sl.discard(nums[i - k]) return False
class HitCounter: records: SortedList def __init__(self): self.records = SortedList() def record(self, timestamp: int): self.records.add(timestamp) def total(self) -> int: return len(self.records) def range(self, lower, upper) -> int: return sum(1 for _ in self.records.irange(lower, upper))
def loadprocessedcsvdata(validcsvpath): #Load pkmn stats from csv data if not os.path.exists(validcsvpath): print('could not find ' + validcsvpath) sys.exit(1) scoreidx = processedheaders.index('Score') processedcsvdata = SortedList(key=lambda x: float(x[scoreidx])) #ascending order csvheaders = dict() csvdata = [[]] with open(validcsvpath,'r') as csvdatafile: csvreader = csv.reader(csvdatafile, delimiter=',') i = 0 for row in csvreader: if i is 0: j = 0 for col in row: csvheaders[col] = j j += 1 else: csvdata.append(row) i += 1 for pkmndata in csvdata: if len(pkmndata) is len(csvheaders): #this protects against csvdata[0] = []. not sure why this happens if float(pkmndata[scoreidx]) < pkmn_score_thresh and not pkmndata[csvheaders['Name']] in seededchoices_names: continue processedcsvdata.add(pkmndata) if len(processedcsvdata) > maxresultsize: processedcsvdata.pop() global pkmnstats global processedcsv processedcsv = [processedheaders] pkmnidx = 0 hpidx = processedheaders.index('HP') for somerow in processedcsvdata.irange(): processedcsv.append( somerow ) for i in range(len(processedheaders) - hpidx): pkmnstats[pkmnidx, i] = float(somerow[i + hpidx]) pkmnidx += 1
def getNumber(self, root: Optional[TreeNode], ops: List[List[int]]) -> int: res = [] def it(node): if node is None:return it(node.left) res.append(node.val) it(node.right) it(root) res = SortedList(res) # 排序列表(res为一个对象) colored = 0 for t,x,y in reversed(ops): # 反向遍历 rem = list(res.irange(x,y)) if t==1: colored += len(rem) for p in rem: res.remove(p) return colored
def find(self, left, right): sl = SortedList(self.track.keys()) l = sl.bisect_left(left) r = sl.bisect_left(right) if l > 0: l -= 1 if self.track[sl[l]] < left: l += 1 if l == r: return (left, right) else: i = min(sl[l], left) if r == len(sl): r -= 1 j = max(self.track[sl[r]], right) for it in list(sl.irange(sl[l], sl[r])): self.track.pop(it) return (i, j)
def avoidFlood(self, rains: List[int]) -> List[int]: ans = [1] * len(rains) last_rain = {} full = set() dry = SortedList() for i, lake in enumerate(rains): if lake == 0: dry.add(i) else: if lake in full: cand = dry.irange(minimum=last_rain[lake], inclusive=(False,True)) j = next(cand, None) if j is None: return [] ans[j] = lake dry.remove(j) else: full.add(lake) ans[i] = -1 last_rain[lake] = i return ans
def filter_maximal(itemsets): """filter maximal itemsets from a set of itemsets Parameters ---------- itemsets: Iterator[frozenset] a set of itemsets Returns ------- SortedList """ maximals = SortedList(key=len) itemsets = sorted(itemsets, key=len, reverse=True) for iset in itemsets: gts = maximals.irange(iset) # is there a superset amongst bigger itemsets ? if not any(map(lambda e: e > iset, gts)): maximals.add(iset) # O(log(len(maximals))) return maximals
def filter_minimal(itemsets): """filter minimal itemsets from a set of itemsets Parameters ---------- itemsets: Iterator[frozenset] a set of itemsets Returns ------- SortedList """ minimals = SortedList(key=len) itemsets = sorted(itemsets, key=len) for iset in itemsets: lts = minimals.irange(None, iset) # is there a subset amongst the smaller itemsets ? if not any(map(lambda e: e < iset, lts)): minimals.add(iset) return minimals
class Timeline: """ Ordered set of segments. A timeline can be seen as an ordered set of non-empty segments (Segment). Segments can overlap -- though adding an already exisiting segment to a timeline does nothing. Parameters ---------- segments : Segment iterator, optional initial set of (non-empty) segments uri : string, optional name of segmented resource Returns ------- timeline : Timeline New timeline """ @classmethod def from_df(cls, df: pd.DataFrame, uri: Optional[str] = None) -> 'Timeline': segments = list(df[PYANNOTE_SEGMENT]) timeline = cls(segments=segments, uri=uri) return timeline def __init__(self, segments: Optional[Iterable[Segment]] = None, uri: str = None): if segments is None: segments = () # set of segments (used for checking inclusion) segments_set = set(segments) if any(not segment for segment in segments_set): raise ValueError('Segments must not be empty.') self.segments_set_ = segments_set # sorted list of segments (used for sorted iteration) self.segments_list_ = SortedList(segments_set) # sorted list of (possibly redundant) segment boundaries boundaries = (boundary for segment in segments_set for boundary in segment) self.segments_boundaries_ = SortedList(boundaries) # path to (or any identifier of) segmented resource self.uri: str = uri def __len__(self): """Number of segments >>> len(timeline) # timeline contains three segments 3 """ return len(self.segments_set_) def __nonzero__(self): return self.__bool__() def __bool__(self): """Emptiness >>> if timeline: ... # timeline is empty ... else: ... # timeline is not empty """ return len(self.segments_set_) > 0 def __iter__(self) -> Iterable[Segment]: """Iterate over segments (in chronological order) >>> for segment in timeline: ... # do something with the segment See also -------- :class:`pyannote.core.Segment` describes how segments are sorted. """ return iter(self.segments_list_) def __getitem__(self, k: int) -> Segment: """Get segment by index (in chronological order) >>> first_segment = timeline[0] >>> penultimate_segment = timeline[-2] """ return self.segments_list_[k] def __eq__(self, other: 'Timeline'): """Equality Two timelines are equal if and only if their segments are equal. >>> timeline1 = Timeline([Segment(0, 1), Segment(2, 3)]) >>> timeline2 = Timeline([Segment(2, 3), Segment(0, 1)]) >>> timeline3 = Timeline([Segment(2, 3)]) >>> timeline1 == timeline2 True >>> timeline1 == timeline3 False """ return self.segments_set_ == other.segments_set_ def __ne__(self, other: 'Timeline'): """Inequality""" return self.segments_set_ != other.segments_set_ def index(self, segment: Segment) -> int: """Get index of (existing) segment Parameters ---------- segment : Segment Segment that is being looked for. Returns ------- position : int Index of `segment` in timeline Raises ------ ValueError if `segment` is not present. """ return self.segments_list_.index(segment) def add(self, segment: Segment) -> 'Timeline': """Add a segment (in place) Parameters ---------- segment : Segment Segment that is being added Returns ------- self : Timeline Updated timeline. Note ---- If the timeline already contains this segment, it will not be added again, as a timeline is meant to be a **set** of segments (not a list). If the segment is empty, it will not be added either, as a timeline only contains non-empty segments. """ segments_set_ = self.segments_set_ if segment in segments_set_ or not segment: return self segments_set_.add(segment) self.segments_list_.add(segment) segments_boundaries_ = self.segments_boundaries_ segments_boundaries_.add(segment.start) segments_boundaries_.add(segment.end) return self def remove(self, segment: Segment) -> 'Timeline': """Remove a segment (in place) Parameters ---------- segment : Segment Segment that is being removed Returns ------- self : Timeline Updated timeline. Note ---- If the timeline does not contain this segment, this does nothing """ segments_set_ = self.segments_set_ if segment not in segments_set_: return self segments_set_.remove(segment) self.segments_list_.remove(segment) segments_boundaries_ = self.segments_boundaries_ segments_boundaries_.remove(segment.start) segments_boundaries_.remove(segment.end) return self def discard(self, segment: Segment) -> 'Timeline': """Same as `remove` See also -------- :func:`pyannote.core.Timeline.remove` """ return self.remove(segment) def __ior__(self, timeline: 'Timeline') -> 'Timeline': return self.update(timeline) def update(self, timeline: Segment) -> 'Timeline': """Add every segments of an existing timeline (in place) Parameters ---------- timeline : Timeline Timeline whose segments are being added Returns ------- self : Timeline Updated timeline Note ---- Only segments that do not already exist will be added, as a timeline is meant to be a **set** of segments (not a list). """ segments_set = self.segments_set_ segments_set |= timeline.segments_set_ # sorted list of segments (used for sorted iteration) self.segments_list_ = SortedList(segments_set) # sorted list of (possibly redundant) segment boundaries boundaries = (boundary for segment in segments_set for boundary in segment) self.segments_boundaries_ = SortedList(boundaries) return self def __or__(self, timeline: 'Timeline') -> 'Timeline': return self.union(timeline) def union(self, timeline: 'Timeline') -> 'Timeline': """Create new timeline made of union of segments Parameters ---------- timeline : Timeline Timeline whose segments are being added Returns ------- union : Timeline New timeline containing the union of both timelines. Note ---- This does the same as timeline.update(...) except it returns a new timeline, and the original one is not modified. """ segments = self.segments_set_ | timeline.segments_set_ return Timeline(segments=segments, uri=self.uri) def co_iter(self, other: 'Timeline') -> Iterator[Tuple[Segment, Segment]]: """Iterate over pairs of intersecting segments >>> timeline1 = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) >>> timeline2 = Timeline([Segment(1, 3), Segment(3, 5)]) >>> for segment1, segment2 in timeline1.co_iter(timeline2): ... print(segment1, segment2) (<Segment(0, 2)>, <Segment(1, 3)>) (<Segment(1, 2)>, <Segment(1, 3)>) (<Segment(3, 4)>, <Segment(3, 5)>) Parameters ---------- other : Timeline Second timeline Returns ------- iterable : (Segment, Segment) iterable Yields pairs of intersecting segments in chronological order. """ for segment in self.segments_list_: # iterate over segments that starts before 'segment' ends temp = Segment(start=segment.end, end=segment.end) for other_segment in other.segments_list_.irange(maximum=temp): if segment.intersects(other_segment): yield segment, other_segment def crop_iter(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) \ -> Iterator[Union[Tuple[Segment, Segment], Segment]]: """Like `crop` but returns a segment iterator instead See also -------- :func:`pyannote.core.Timeline.crop` """ if mode not in {'loose', 'strict', 'intersection'}: raise ValueError("Mode must be one of 'loose', 'strict', or " "'intersection'.") if not isinstance(support, (Segment, Timeline)): raise TypeError("Support must be a Segment or a Timeline.") if isinstance(support, Segment): # corner case where "support" is empty if support: segments = [support] else: segments = [] support = Timeline(segments=segments, uri=self.uri) for yielded in self.crop_iter(support, mode=mode, returns_mapping=returns_mapping): yield yielded return # loose mode if mode == 'loose': for segment, _ in self.co_iter(support): yield segment return # strict mode if mode == 'strict': for segment, other_segment in self.co_iter(support): if segment in other_segment: yield segment return # intersection mode for segment, other_segment in self.co_iter(support): mapped_to = segment & other_segment if not mapped_to: continue if returns_mapping: yield segment, mapped_to else: yield mapped_to def crop(self, support: Support, mode: CropMode = 'intersection', returns_mapping: bool = False) \ -> Union['Timeline', Tuple['Timeline', Dict[Segment, Segment]]]: """Crop timeline to new support Parameters ---------- support : Segment or Timeline If `support` is a `Timeline`, its support is used. mode : {'strict', 'loose', 'intersection'}, optional Controls how segments that are not fully included in `support` are handled. 'strict' mode only keeps fully included segments. 'loose' mode keeps any intersecting segment. 'intersection' mode keeps any intersecting segment but replace them by their actual intersection. returns_mapping : bool, optional In 'intersection' mode, return a dictionary whose keys are segments of the cropped timeline, and values are list of the original segments that were cropped. Defaults to False. Returns ------- cropped : Timeline Cropped timeline mapping : dict When 'returns_mapping' is True, dictionary whose keys are segments of 'cropped', and values are lists of corresponding original segments. Examples -------- >>> timeline = Timeline([Segment(0, 2), Segment(1, 2), Segment(3, 4)]) >>> timeline.crop(Segment(1, 3)) <Timeline(uri=None, segments=[<Segment(1, 2)>])> >>> timeline.crop(Segment(1, 3), mode='loose') <Timeline(uri=None, segments=[<Segment(0, 2)>, <Segment(1, 2)>])> >>> timeline.crop(Segment(1, 3), mode='strict') <Timeline(uri=None, segments=[<Segment(1, 2)>])> >>> cropped, mapping = timeline.crop(Segment(1, 3), returns_mapping=True) >>> print(mapping) {<Segment(1, 2)>: [<Segment(0, 2)>, <Segment(1, 2)>]} """ if mode == 'intersection' and returns_mapping: segments, mapping = [], {} for segment, mapped_to in self.crop_iter(support, mode='intersection', returns_mapping=True): segments.append(mapped_to) mapping[mapped_to] = mapping.get(mapped_to, list()) + [segment] return Timeline(segments=segments, uri=self.uri), mapping return Timeline(segments=self.crop_iter(support, mode=mode), uri=self.uri) def overlapping(self, t: float) -> List[Segment]: """Get list of segments overlapping `t` Parameters ---------- t : float Timestamp, in seconds. Returns ------- segments : list List of all segments of timeline containing time t """ return list(self.overlapping_iter(t)) def overlapping_iter(self, t: float) -> Iterator[Segment]: """Like `overlapping` but returns a segment iterator instead See also -------- :func:`pyannote.core.Timeline.overlapping` """ segment = Segment(start=t, end=t) for segment in self.segments_list_.irange(maximum=segment): if segment.overlaps(t): yield segment def __str__(self): """Human-readable representation >>> timeline = Timeline(segments=[Segment(0, 10), Segment(1, 13.37)]) >>> print(timeline) [[ 00:00:00.000 --> 00:00:10.000] [ 00:00:01.000 --> 00:00:13.370]] """ n = len(self.segments_list_) string = "[" for i, segment in enumerate(self.segments_list_): string += str(segment) string += "\n " if i + 1 < n else "" string += "]" return string def __repr__(self): """Computer-readable representation >>> Timeline(segments=[Segment(0, 10), Segment(1, 13.37)]) <Timeline(uri=None, segments=[<Segment(0, 10)>, <Segment(1, 13.37)>])> """ return "<Timeline(uri=%s, segments=%s)>" % (self.uri, list(self.segments_list_)) def __contains__(self, included: Union[Segment, 'Timeline']): """Inclusion Check whether every segment of `included` does exist in timeline. Parameters ---------- included : Segment or Timeline Segment or timeline being checked for inclusion Returns ------- contains : bool True if every segment in `included` exists in timeline, False otherwise Examples -------- >>> timeline1 = Timeline(segments=[Segment(0, 10), Segment(1, 13.37)]) >>> timeline2 = Timeline(segments=[Segment(0, 10)]) >>> timeline1 in timeline2 False >>> timeline2 in timeline1 >>> Segment(1, 13.37) in timeline1 True """ if isinstance(included, Segment): return included in self.segments_set_ elif isinstance(included, Timeline): return self.segments_set_.issuperset(included.segments_set_) else: raise TypeError( 'Checking for inclusion only supports Segment and ' 'Timeline instances') def empty(self) -> 'Timeline': """Return an empty copy Returns ------- empty : Timeline Empty timeline using the same 'uri' attribute. """ return Timeline(uri=self.uri) def copy(self, segment_func: Optional[Callable[[Segment], Segment]] = None) \ -> 'Timeline': """Get a copy of the timeline If `segment_func` is provided, it is applied to each segment first. Parameters ---------- segment_func : callable, optional Callable that takes a segment as input, and returns a segment. Defaults to identity function (segment_func(segment) = segment) Returns ------- timeline : Timeline Copy of the timeline """ # if segment_func is not provided # just add every segment if segment_func is None: return Timeline(segments=self.segments_list_, uri=self.uri) # if is provided # apply it to each segment before adding them return Timeline(segments=[segment_func(s) for s in self.segments_list_], uri=self.uri) def extent(self) -> Segment: """Extent The extent of a timeline is the segment of minimum duration that contains every segments of the timeline. It is unique, by definition. The extent of an empty timeline is an empty segment. A picture is worth a thousand words:: timeline |------| |------| |----| |--| |-----| |----------| timeline.extent() |--------------------------------| Returns ------- extent : Segment Timeline extent Examples -------- >>> timeline = Timeline(segments=[Segment(0, 1), Segment(9, 10)]) >>> timeline.extent() <Segment(0, 10)> """ if self.segments_set_: segments_boundaries_ = self.segments_boundaries_ start = segments_boundaries_[0] end = segments_boundaries_[-1] return Segment(start=start, end=end) else: import numpy as np return Segment(start=np.inf, end=-np.inf) def support_iter(self) -> Iterator[Segment]: """Like `support` but returns a segment generator instead See also -------- :func:`pyannote.core.Timeline.support` """ # The support of an empty timeline is an empty timeline. if not self: return # Principle: # * gather all segments with no gap between them # * add one segment per resulting group (their union |) # Note: # Since segments are kept sorted internally, # there is no need to perform an exhaustive segment clustering. # We just have to consider them in their natural order. # Initialize new support segment # as very first segment of the timeline new_segment = self.segments_list_[0] for segment in self: # If there is no gap between new support segment and next segment, if not (segment ^ new_segment): # Extend new support segment using next segment new_segment |= segment # If there actually is a gap, else: yield new_segment # Initialize new support segment as next segment # (right after the gap) new_segment = segment # Add new segment to the timeline support yield new_segment def support(self) -> 'Timeline': """Timeline support The support of a timeline is the timeline with the minimum number of segments with exactly the same time span as the original timeline. It is (by definition) unique and does not contain any overlapping segments. A picture is worth a thousand words:: timeline |------| |------| |----| |--| |-----| |----------| timeline.support() |------| |--------| |----------| Returns ------- support : Timeline Timeline support """ return Timeline(segments=self.support_iter(), uri=self.uri) def duration(self) -> float: """Timeline duration The timeline duration is the sum of the durations of the segments in the timeline support. Returns ------- duration : float Duration of timeline support, in seconds. """ # The timeline duration is the sum of the durations # of the segments in the timeline support. return sum(s.duration for s in self.support_iter()) def gaps_iter(self, support: Optional[Support] = None) -> Iterator[Segment]: """Like `gaps` but returns a segment generator instead See also -------- :func:`pyannote.core.Timeline.gaps` """ if support is None: support = self.extent() if not isinstance(support, (Segment, Timeline)): raise TypeError("unsupported operand type(s) for -':" "%s and Timeline." % type(support).__name__) # segment support if isinstance(support, Segment): # `end` is meant to store the end time of former segment # initialize it with beginning of provided segment `support` end = support.start # support on the intersection of timeline and provided segment for segment in self.crop(support, mode='intersection').support(): # add gap between each pair of consecutive segments # if there is no gap, segment is empty, therefore not added gap = Segment(start=end, end=segment.start) if gap: yield gap # keep track of the end of former segment end = segment.end # add final gap (if not empty) gap = Segment(start=end, end=support.end) if gap: yield gap # timeline support elif isinstance(support, Timeline): # yield gaps for every segment in support of provided timeline for segment in support.support(): for gap in self.gaps_iter(support=segment): yield gap def gaps(self, support: Optional[Support] = None) \ -> 'Timeline': """Gaps A picture is worth a thousand words:: timeline |------| |------| |----| |--| |-----| |----------| timeline.gaps() |--| |--| Parameters ---------- support : None, Segment or Timeline Support in which gaps are looked for. Defaults to timeline extent Returns ------- gaps : Timeline Timeline made of all gaps from original timeline, and delimited by provided support See also -------- :func:`pyannote.core.Timeline.extent` """ return Timeline(segments=self.gaps_iter(support=support), uri=self.uri) def segmentation(self) -> 'Timeline': """Segmentation Create the unique timeline with same support and same set of segment boundaries as original timeline, but with no overlapping segments. A picture is worth a thousand words:: timeline |------| |------| |----| |--| |-----| |----------| timeline.segmentation() |-|--|-| |-|---|--| |--|----|--| Returns ------- timeline : Timeline (unique) timeline with same support and same set of segment boundaries as original timeline, but with no overlapping segments. """ # COMPLEXITY: O(n) support = self.support() # COMPLEXITY: O(n.log n) # get all boundaries (sorted) # |------| |------| |----| # |--| |-----| |----------| # becomes # | | | | | | | | | | | | timestamps = set([]) for (start, end) in self: timestamps.add(start) timestamps.add(end) timestamps = sorted(timestamps) # create new partition timeline # | | | | | | | | | | | | # becomes # |-|--|-| |-|---|--| |--|----|--| # start with an empty copy timeline = Timeline(uri=self.uri) if len(timestamps) == 0: return Timeline(uri=self.uri) segments = [] start = timestamps[0] for end in timestamps[1:]: # only add segments that are covered by original timeline segment = Segment(start=start, end=end) if segment and support.overlapping(segment.middle): segments.append(segment) # next segment... start = end return Timeline(segments=segments, uri=self.uri) def to_annotation(self, generator: Union[str, Iterable[Label], None, None] = 'string', modality: Optional[str] = None) \ -> 'Annotation': """Turn timeline into an annotation Each segment is labeled by a unique label. Parameters ---------- generator : 'string', 'int', or iterable, optional If 'string' (default) generate string labels. If 'int', generate integer labels. If iterable, use it to generate labels. modality : str, optional Returns ------- annotation : Annotation Annotation """ from .annotation import Annotation annotation = Annotation(uri=self.uri, modality=modality) if generator == 'string': from .utils.generators import string_generator generator = string_generator() elif generator == 'int': from .utils.generators import int_generator generator = int_generator() for segment in self: annotation[segment] = next(generator) return annotation def write_uem(self, file: TextIO): """Dump timeline to file using UEM format Parameters ---------- file : file object Usage ----- >>> with open('file.uem', 'w') as file: ... timeline.write_uem(file) """ uri = self.uri if self.uri else "<NA>" for segment in self: line = f"{uri} 1 {segment.start:.3f} {segment.end:.3f}\n" file.write(line) def for_json(self): """Serialization See also -------- :mod:`pyannote.core.json` """ data = {PYANNOTE_JSON: self.__class__.__name__} data[PYANNOTE_JSON_CONTENT] = [s.for_json() for s in self] if self.uri: data[PYANNOTE_URI] = self.uri return data @classmethod def from_json(cls, data): """Deserialization See also -------- :mod:`pyannote.core.json` """ uri = data.get(PYANNOTE_URI, None) segments = [Segment.from_json(s) for s in data[PYANNOTE_JSON_CONTENT]] return cls(segments=segments, uri=uri) def _repr_png_(self): """IPython notebook support See also -------- :mod:`pyannote.core.notebook` """ from .notebook import repr_timeline return repr_timeline(self)
def test_irange(): sl = SortedList() sl._reset(7) assert [] == list(sl.irange()) values = list(range(53)) sl.update(values) for start in range(53): for end in range(start, 53): assert list(sl.irange(start, end)) == values[start:(end + 1)] assert list(sl.irange(start, end, reverse=True)) == values[start:(end + 1)][::-1] for start in range(53): for end in range(start, 53): assert list(range(start, end)) == list(sl.irange(start, end, (True, False))) for start in range(53): for end in range(start, 53): assert list(range(start + 1, end + 1)) == list(sl.irange(start, end, (False, True))) for start in range(53): for end in range(start, 53): assert list(range(start + 1, end)) == list(sl.irange(start, end, (False, False))) for start in range(53): assert list(range(start, 53)) == list(sl.irange(start)) for end in range(53): assert list(range(0, end)) == list(sl.irange(None, end, (True, False))) assert values == list(sl.irange(inclusive=(False, False))) assert [] == list(sl.irange(53)) assert values == list(sl.irange(None, 53, (True, False)))
def _all_segment_intersections_no_horizontal(segments): # noqa # Must be unique assert len(set(segments)) == len(segments) segments = list(segments) # Must not be degenerate for segment in segments: assert segment[0] != segment[1] # Use the convention from the book: sweep on Y axis def event_key(pt): return (pt[1], pt[0]) # From point to list of segments event_queue = SortedDict(event_key) def add_event(pt, segment_key=None): if pt not in event_queue: event_queue[pt] = [] if segment_key is not None: event_queue[pt].append(segment_key) for i, segment in enumerate(segments): if event_key(segment[0]) < event_key(segment[1]): add_event(segment[0], _SweepKey(segment, segment[0])) add_event(segment[1], None) else: add_event(segment[0], None) add_event(segment[1], _SweepKey(segment, segment[1])) active = SortedList() y = -math.inf while len(event_queue) > 0: v = event_queue.popitem(0) pt, segstarts = v # Can't be > since while there are no horizontal segments, # there can still be points in horizontal relation to one another assert pt[1] >= y y = pt[1] # Find all segments within the event point fake_segment = ((pt[0], pt[1]), (pt[0], pt[1] + 1)) fake_key = _SweepKey(fake_segment, pt) touches = [] # The next lower / higher keys, respectively, to enter new events for neighbours = [] if _extra_checks: _assert_fully_sorted(list(active), y) # Iterate on both sides for it in ( active.irange(None, fake_key, inclusive=(True, True), reverse=True), active.irange(fake_key, None, inclusive=(False, True)), ): neighbour = None for sweep_key in it: if sweep_key.at_y(y) != pt[0]: neighbour = sweep_key break touches.append(sweep_key) neighbours.append(neighbour) # Remove the old sweep keys for touch in touches: active.remove(touch) segments_at_pt = [ sweep_key.segment for sweep_key in touches + segstarts ] if len(segments_at_pt) > 1: yield (pt, tuple(segments_at_pt)) # Create new _SweepKeys, automatically sorts # according to order after point sweep_keys = [] for segment in segments_at_pt: # Is this segment still relevant? if max(segment[0][1], segment[1][1]) <= pt[1]: continue sweep_keys.append(_SweepKey(segment, pt)) sweep_keys = list(sorted(sweep_keys)) # Add new events for neighbours if len(sweep_keys) == 0: # If we just removed stuff, the neighbours might now meet... if neighbours[0] is not None and neighbours[1] is not None: ipt = _nonparallel_intersection_point(neighbours[0].segment, neighbours[1].segment) if ipt and ipt[1] > pt[1]: add_event(ipt) continue if neighbours[0] is not None: ipt = _nonparallel_intersection_point(sweep_keys[0].segment, neighbours[0].segment) # hyp.note(fstr('IPTL', ipt, pt)) if ipt and ipt[1] > pt[1]: add_event(ipt) if neighbours[1] is not None: ipt = _nonparallel_intersection_point(sweep_keys[-1].segment, neighbours[1].segment) # hyp.note(fstr('IPTR', ipt, pt)) if ipt and ipt[1] > pt[1]: add_event(ipt) # Add them in and continue for sweep_key in sweep_keys: active.add(sweep_key)
class PeakDB: def __init__(self, peak_list: List[MetabolitePeak]): self._peaks = SortedList(peak_list) self.peak_dict = {p.id: p for p in peak_list} metabolite_peaks = {} for peak in peak_list: if peak.metabolite_id not in metabolite_peaks: metabolite_peaks[peak.metabolite_id] = [] metabolite_peaks[peak.metabolite_id].append(peak) self._metabolite_peaks = metabolite_peaks @property def metabolite_peaks(self) -> Dict[str, List[MetabolitePeak]]: return self._metabolite_peaks def query_n(self, qu: List[float], tolerance=0.0075, missing_thresh=0.8): graphs = {} for q in qu: lower = MetabolitePeak(None, q - tolerance, None) upper = MetabolitePeak(None, q + tolerance, None) for retrieved_peak in self._peaks.irange(lower, upper): if retrieved_peak.metabolite_id not in graphs: graphs[retrieved_peak.metabolite_id] = nx.Graph() if f'l_{q:.5f}' not in graphs[retrieved_peak.metabolite_id]: graphs[retrieved_peak.metabolite_id].add_node(f'l_{q:.5f}', bipartite=0) if retrieved_peak.id not in graphs[retrieved_peak.metabolite_id]: graphs[retrieved_peak.metabolite_id].add_node(retrieved_peak.id, bipartite=1) graphs[retrieved_peak.metabolite_id].add_edge(f'l_{q:.5f}', retrieved_peak.id) result_map = [] for met, graph in graphs.items(): result = { 'metabolite_id': met, 'matches': {} } for i in nx.components.connected_components(graph): matching = nx.bipartite.maximum_matching(graph.subgraph(i)) result['matches'].update({k: v for k, v in matching.items() if k.startswith("l_")}) result['score'] = len(result['matches']) / len(self.metabolite_peaks[met]) result['missing'] = [] matched_peaks = [self.peak_dict[peak_id] for peak_id in result['matches'].values()] for peak in self.metabolite_peaks[met]: if peak not in matched_peaks and any([p.amp * missing_thresh <= peak.amp for p in matched_peaks]): # if there is any peak larger than any seen peak # this peak is considered missing result['missing'].append(peak.id) result_map.append(result) return result_map def query(self, qu: List[float], tolerance=0.0075): result_map = {} for q in qu: matched = set() lower = MetabolitePeak(None, q - tolerance, None) upper = MetabolitePeak(None, q + tolerance, None) for retrieved_peak in self._peaks.irange(lower, upper): if retrieved_peak.metabolite_id not in result_map: result_map[retrieved_peak.metabolite_id] = 0 if retrieved_peak.metabolite_id not in matched: matched.add(retrieved_peak.metabolite_id) result_map[retrieved_peak.metabolite_id] += 1 metabolite_list = [] for met_id, overlap in result_map.items(): jaccard_score = overlap / len(self.metabolite_peaks[met_id]) metabolite_list.append({ "metabolite_id": met_id, "score": jaccard_score, "overlap": overlap }) return sorted(metabolite_list, key=lambda i: i['score'], reverse=True)
# %% SortedList from sortedcontainers import SortedList, SortedDict, SortedSet sl = SortedList([100, 7, 10, 11, 13, 14]) print(sl) sl.add(99) sl.remove(7) print(sl) sl.update([1, 0, 2]) # add the entire list print(sl) # irange(minimum=None, maximum=None, inclusive=True, True, reverse=False) rangeList = list(sl.irange(10, 14, inclusive=[True, False])) print(rangeList) print(sl.index(10)) # 3 # print(sl.index(-99)) # Throw Error s2 = SortedList([1, 7, 7, 7, 7, 10, 11, 13, 14]) print(f"left most idx: {s2.bisect_left(7)}") print(f"right most idx: {s2.bisect_right(7)}") print(f"out of boundary < min, idx={s2.bisect_left(-100)}") print(f"out of boundary > max, len={len(s2)}, idx={s2.bisect_left(100)}") # %% SortedDict sd = SortedDict() sd["c"] = 3 sd["a"] = 1 sd["b"] = 2 del sd["a"] print(sd)
def test_irange(): sl = SortedList(load=7) assert [] == list(sl.irange()) values = list(range(53)) sl.update(values) for start in range(53): for end in range(start, 53): assert list(sl.irange(start, end)) == values[start:(end + 1)] assert list(sl.irange(start, end, reverse=True)) == values[start:(end + 1)][::-1] for start in range(53): for end in range(start, 53): assert list(range(start, end)) == list(sl.irange(start, end, (True, False))) for start in range(53): for end in range(start, 53): assert list(range(start + 1, end + 1)) == list(sl.irange(start, end, (False, True))) for start in range(53): for end in range(start, 53): assert list(range(start + 1, end)) == list(sl.irange(start, end, (False, False))) for start in range(53): assert list(range(start, 53)) == list(sl.irange(start)) for end in range(53): assert list(range(0, end)) == list(sl.irange(None, end, (True, False))) assert values == list(sl.irange(inclusive=(False, False))) assert [] == list(sl.irange(53)) assert values == list(sl.irange(None, 53, (True, False)))
class _Vertex: coordinates: Point half_edges_by_order: SortedList def _key(self, half_edge): assert half_edge[0] == self.coordinates return vector_frangle( np.array(half_edge[1], dtype=object) - half_edge[0]) def __init__(self, coordinates): self.coordinates = coordinates self.half_edges_by_order = SortedList(key=self._key) def add_half_edge(self, half_edge): existing = list( self.half_edges_by_order.irange(half_edge, half_edge, inclusive=(True, True))) assert len(existing) == 0, fstr(half_edge, existing, approx=True) self.half_edges_by_order.add(half_edge) def get_next_cw(self, half_edge): assert half_edge[0] == self.coordinates nxt = list( islice( self.half_edges_by_order.irange(half_edge, None, inclusive=(False, True)), 1)) if len(nxt) == 0: nxt = list( islice( self.half_edges_by_order.irange(None, None, inclusive=(True, True)), 1)) assert len(nxt) == 1 return nxt[0] def get_next_ccw(self, half_edge): assert half_edge[0] == self.coordinates nxt = list( islice( self.half_edges_by_order.irange(None, half_edge, inclusive=(True, False), reverse=True), 1)) if len(nxt) == 0: nxt = list( islice( self.half_edges_by_order.irange(None, None, inclusive=(True, True), reverse=True), 1)) assert len(nxt) == 1, (self.half_edges_by_order, nxt) prev = self.get_next_cw(nxt[0]) assert prev == half_edge, fstr(half_edge, nxt[0], prev, self._key(half_edge), self._key(nxt[0]), self._key(prev)) return nxt[0]
def processthread(trange): teamstatsScoreidx = teamstatsheaders.index('Score') sortedScoreidx = setsize + teamstatsScoreidx threadSortedResults = SortedList(key=lambda x: -x[sortedScoreidx]) #todo check this index minScore = 0 istart = trange[0] isize = trange[1] - trange[0] iend = trange[1] doprinting = trange[2] logfilepath = os.path.join(currentdir, trange[3]) resumeset = istart if resumethreads and os.path.exists(logfilepath): with open(logfilepath, 'rb') as logfile: resumeset = pickle.load(logfile) if resumeset < istart or resumeset >= iend - 1: print('cant resume set ' + logfilepath) resumeset = istart else: print( 'resuming ' + os.path.basename(logfilepath) + ' from ' + str(resumeset)) for someset in pickle.load(logfile): threadSortedResults.add(someset) map_subpool_to_csv = list(range(len(processedcsv) - 1)) #indexes of all the pokemon in csvdata and pkmnstats #reverse to generate 'reverse colexigraphical' order for somesetidx in seededchoices: #remove the seeded choices if any map_subpool_to_csv.remove(somesetidx) subsetpool = list(range(len(processedcsv) - 1 - len(seededchoices)))[::-1] #make a separate index. ex: pkmnstats[ 1 + map_subpool_to_csv[setidx]] if len(map_subpool_to_csv) != len(subsetpool): print('error in map_subpool_to_csv') exit() subsetsize = setsize - len(seededchoices) threadcombinator = IndexedCombination( subsetsize, subsetpool) threadtimerstart = timeit.default_timer() skippedrange = 0 #for setindex in range( resumeset, iend): setindex = resumeset while setindex < iend: for iprintsplit in range(nprintsplit): parentset = threadcombinator.get_nth_combination(setindex) if len(seededchoices) > 0: fullset = seededchoices + parentpool_to_csvdata(map_subpool_to_csv,parentset) else: fullset = parentpool_to_csvdata(map_subpool_to_csv,parentset) if len(threadSortedResults) < maxresultsize: if filtersetbyweakness(fullset) and filtersetbyattack(fullset): teamcompare = list(fullset) + teamstats(fullset) threadSortedResults.add(teamcompare) else: teamcompare = list(fullset) + teamstats(fullset) if teamcompare[sortedScoreidx] > minScore: #todo check this index if filtersetbyweakness(fullset) and filtersetbyattack(fullset): threadSortedResults.add(teamcompare) threadSortedResults.pop() minScore = threadSortedResults[-1][sortedScoreidx] else: #skip to next combination where its possible for the stats to be high enough. nextset = threadcombinator.skiptonextmaxima(parentset) if len(nextset) == 0: new_setindex = iend else: new_setindex = threadcombinator.get_n_from_combination(nextset) - 1 skippedrange += (new_setindex + 1 - setindex) setindex = new_setindex setindex += 1 if setindex >= iend: break if doprinting: threadtimernow = timeit.default_timer() combinationspersecond = (setindex - istart)/(threadtimernow - threadtimerstart) print("{0:.6f}".format((setindex - istart)/isize * 100), '%' + ' with ' + str(skippedrange) + ' skipped of ' + str(setindex - istart) + ' and SortedResults has ' + str(len(threadSortedResults)) + '/' + str(maxresultsize) + ' at ' + '{0:.1f}'.format(combinationspersecond) + ' combinations/s', end='\r') if resumethreads: with open(logfilepath, 'wb') as logfile: pickle.dump(setindex, logfile, protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(list(threadSortedResults.irange()), logfile, protocol=pickle.HIGHEST_PROTOCOL) if doprinting: print('') if resumethreads: with open(logfilepath, 'wb') as logfile: pickle.dump(setindex, logfile, protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(list(threadSortedResults.irange()), logfile, protocol=pickle.HIGHEST_PROTOCOL) return [list(threadSortedResults.irange())]
class ListWithAdjustments(object): """ To prepare inserts, we adjust elements to be inserted and elements in the underlying list. We don't want to actually touch the underlying list, but we need to remember the adjustments, because later adjustments may depend on and readjust earlier ones. """ def __init__(self, orig_list): """ Orig_list must be a a SortedListWithKey. """ self._orig_list = orig_list self._key = orig_list._key # Stores pairs (i, new_key) where i is an index into orig_list. # Note that adjustments don't affect the order in the original list, so the list is sorted # both on keys an on indices; and a missing index i means that (i, orig_key) fits into the # adjustments list both by key and by index. self._adjustments = SortedListWithKey(key=lambda pair: pair[1]) # Stores keys for new insertions. self._insertions = SortedList() def get_insertions(self): return self._insertions def get_adjustments(self): return self._adjustments def _adj_bisect_key_left(self, key): """ Works as bisect_key_left(key) on the orig_list as if all adjustments have been applied. """ adj_index = self._adjustments.bisect_key_left(key) adj_next = (self._adjustments[adj_index][0] if adj_index < len(self._adjustments) else len( self._orig_list)) adj_prev = self._adjustments[adj_index - 1][0] if adj_index > 0 else -1 orig_index = self._orig_list.bisect_key_left(key) if adj_prev < orig_index and orig_index < adj_next: return orig_index return adj_next def _adj_get_key(self, index): """ Returns the key corresponding to the given index into orig_list as if all adjustments have been applied. """ i = bisect.bisect_left(self._adjustments, (index, float('-inf'))) if i < len(self._adjustments) and self._adjustments[i][0] == index: return self._adjustments[i][1] return self._key(self._orig_list[index]) def count_range(self, begin, end): """ Returns the number of elements with keys in the half-open interval [begin, end). """ adj_begin = self._adj_bisect_key_left(begin) adj_end = self._adj_bisect_key_left(end) ins_begin = self._insertions.bisect_left(begin) ins_end = self._insertions.bisect_left(end) return (adj_end - adj_begin) + (ins_end - ins_begin) def _adjust_range(self, begin, end): """ Make changes to stored adjustments and insertions to distribute them equally in the half-open interval of keys [begin, end). """ adj_begin = self._adj_bisect_key_left(begin) adj_end = self._adj_bisect_key_left(end) ins_begin = self._insertions.bisect_left(begin) ins_end = self._insertions.bisect_left(end) self._do_adjust_range(adj_begin, adj_end, ins_begin, ins_end, begin, end) def _adjust_all(self): """ Renumber everything to be equally distributed in the open interval (new_begin, new_end). """ orig_len = len(self._orig_list) ins_len = len(self._insertions) self._do_adjust_range(0, orig_len, 0, ins_len, 0.0, orig_len + ins_len + 1.0) def _do_adjust_range(self, adj_begin, adj_end, ins_begin, ins_end, new_begin_key, new_end_key): """ Implements renumbering as used by _adjust_range() and _adjust_all(). """ count = (adj_end - adj_begin) + (ins_end - ins_begin) prev_keys = ([(self._adj_get_key(i), False, i) for i in xrange(adj_begin, adj_end)] + [(self._insertions[i], True, i) for i in xrange(ins_begin, ins_end)]) prev_keys.sort() new_keys = get_range(new_begin_key, new_end_key, count) for (old_key, is_insert, i), new_key in zip(prev_keys, new_keys): if is_insert: self._insertions.remove(old_key) self._insertions.add(new_key) else: # (i, old_key) pair may not be among _adjustments, so we discard() rather than remove(). self._adjustments.discard((i, old_key)) self._adjustments.add((i, new_key)) def prep_inserts_at_index(self, index, count): # This is the crux of the algorithm, inspired by the [Bender] paper (cited above). # Here's a brief summary of the algorithm, and of our departures from it. # - The algorithm inserts keys while it is able. When there isn't enough space, it walks # enclosing intervals around the key it wants to insert, doubling the interval each time, # until it finds an interval that doesn't overflow. The overflow threshold is calculated in # such a way that the bigger the interval, the smaller the density it seeks. # - The algorithm uses integers, picking the number of bits to work for list length between # n/2 and 2n, and rebuilding from scratch any time length moves out of this range. We don't # rebuild anything, don't change number of bits, and use floats. This breaks some of the # theoretical results, and thinking about floats is much harder than about integers. So we # are not on particularly solid ground with these changes (but it seems to work). # - We try different thresholds, which seems to perform better. This is mentioned in "Variable # T" section of [Bender] paper, but our approach isn't quite the same. So it's also on shaky # theoretical ground. assert count > 0 begin = self._adj_get_key(index - 1) if index > 0 else 0.0 end = self._adj_get_key(index) if index < len( self._orig_list) else begin + count + 1 if begin < 0 or end <= 0 or math.isinf(max(begin, end)): # This should only happen if we have some invalid positions (e.g. from before we started # using this logic). In this case, just renumber everything 1 through n (leaving space so # that the count insertions take the first count integers). self._insertions.update([begin if index > 0 else float('-inf')] * count) self._adjust_all() return self._insertions.update(get_range(begin, end, count)) if not is_valid_range(begin, self._insertions.irange(begin, end), end): assert self.count_range(begin, end) > 0 min_key, max_key = self._find_sparse_enough_range(begin, end) self._adjust_range(min_key, max_key) assert is_valid_range(begin, self._insertions.irange(begin, end), end) def _find_sparse_enough_range(self, begin, end): # frac is a parameter used for relabeling, corresponding to 2/T in [Bender]. Its # interpretation is that frac^i is the overflow limit for intervals of size 2^i. for frac in (1.14, 1.3): thresh = 1 for i in xrange(64): rbegin, rend = range_around_float(begin, i) assert self.count_range(rbegin, rend) > 0 if end <= rend and self.count_range(rbegin, rend) < thresh: return (rbegin, rend) thresh *= frac raise ValueError("This isn't expected")
def preprocesscsvdata(validcsvpath, csvsavepath): #Load pkmn stats from csv data if not os.path.exists(validcsvpath): print('could not find ' + validcsvpath) sys.exit(1) global customelements global processedheaders scoreidx = processedheaders.index('Score') processedcsvdata = SortedList(key=lambda x: float(x[scoreidx])) #ascending order csvheaders = dict() csvdata = [[]] with open(validcsvpath,'r') as csvdatafile: csvreader = csv.reader(csvdatafile, delimiter=',') i = 0 for row in csvreader: if i is 0: j = 0 for col in row: csvheaders[col] = j j += 1 else: csvdata.append(row) i += 1 #expectedcsvheaders = ['Name','Type1','Type2', 'HP','Atk','Def','SpecialAtk','SpecialDef','Speed', 'Ability1','Ability2','HiddenAbility'] for pkmndata in csvdata: processedpkmndata = [] if len(pkmndata) is len(csvheaders): #this protects against csvdata[0] = []. not sure why this happens processedpkmndata += list( pkmndata[csvheaders[i]] for i in ['Name','Type1','Type2']) abilityandandres = calcpkmnresistances( pkmndata, csvheaders) ability = abilityandandres[0] processedpkmndata += [ability] thispkmnstats = calcStats(pkmndata, csvheaders, ability) processedpkmndata += list(map(str,thispkmnstats)) processedpkmndata += list(map(str,abilityandandres[1])) #Resistances if float(thispkmnstats[-1]) < pkmn_score_thresh and not pkmndata[csvheaders['Name']] in seededchoices_names: continue processedcsvdata.add(processedpkmndata) if generation >= 3: print('To adjust ability assumptions, edit supported abilities by each pkmn in the unprocessed input csv') # or disable unwanted abilities via the command line') global pkmnstats global processedcsv processedcsv = [processedheaders] scoreidx = processedheaders.index('Score') with open(csvsavepath,'w') as csvdatafile: csvwriter = csv.writer(csvdatafile, delimiter=',') csvwriter.writerow(processedheaders) pkmnidx = 0 hpidx = processedheaders.index('HP') for somerow in processedcsvdata.irange(): processedcsv.append( somerow ) csvwriter.writerow(somerow) for i in range(len(processedheaders) - hpidx): pkmnstats[pkmnidx, i] = float(somerow[i + hpidx]) pkmnidx += 1
class SCEngine: ''' Fast tree-based implementation for indexing, using the ``sortedcontainers`` package. Parameters ---------- data : Table Sorted columns of the original table row_index : Column object Row numbers corresponding to data columns unique : bool (defaults to False) Whether the values of the index must be unique ''' def __init__(self, data, row_index, unique=False): node_keys = map(tuple, data) self._nodes = SortedList(starmap(Node, zip(node_keys, row_index))) self._unique = unique def add(self, key, value): ''' Add a key, value pair. ''' if self._unique and (key in self._nodes): message = 'duplicate {0:!r} in unique index'.format(key) raise ValueError(message) self._nodes.add(Node(key, value)) def find(self, key): ''' Find rows corresponding to the given key. ''' return [node.value for node in self._nodes.irange(key, key)] def remove(self, key, data=None): ''' Remove data from the given key. ''' if data is not None: item = Node(key, data) try: self._nodes.remove(item) except ValueError: return False return True items = list(self._nodes.irange(key, key)) for item in items: self._nodes.remove(item) return bool(items) def shift_left(self, row): ''' Decrement rows larger than the given row. ''' for node in self._nodes: if node.value > row: node.value -= 1 def shift_right(self, row): ''' Increment rows greater than or equal to the given row. ''' for node in self._nodes: if node.value >= row: node.value += 1 def items(self): ''' Return a list of key, data tuples. ''' result = OrderedDict() for node in self._nodes: if node.key in result: result[node.key].append(node.value) else: result[node.key] = [node.value] return result.items() def sort(self): ''' Make row order align with key order. ''' for index, node in enumerate(self._nodes): node.value = index def sorted_data(self): ''' Return a list of rows in order sorted by key. ''' return [node.value for node in self._nodes] def range(self, lower, upper, bounds=(True, True)): ''' Return row values in the given range. ''' iterator = self._nodes.irange(lower, upper, bounds) return [node.value for node in iterator] def replace_rows(self, row_map): ''' Replace rows with the values in row_map. ''' nodes = [node for node in self._nodes if node.value in row_map] for node in nodes: node.value = row_map[node.value] self._nodes.clear() self._nodes.update(nodes) def __repr__(self): return '{0!r}'.format(list(self._nodes))
class SCEngine: ''' Fast tree-based implementation for indexing, using the ``sortedcontainers`` package. Parameters ---------- data : Table Sorted columns of the original table row_index : Column object Row numbers corresponding to data columns unique : bool (defaults to False) Whether the values of the index must be unique ''' def __init__(self, data, row_index, unique=False): node_keys = map(tuple, data) self._nodes = SortedList(starmap(Node, zip(node_keys, row_index))) self._unique = unique def add(self, key, value): ''' Add a key, value pair. ''' if self._unique and (key in self._nodes): message = f'duplicate {key:!r} in unique index' raise ValueError(message) self._nodes.add(Node(key, value)) def find(self, key): ''' Find rows corresponding to the given key. ''' return [node.value for node in self._nodes.irange(key, key)] def remove(self, key, data=None): ''' Remove data from the given key. ''' if data is not None: item = Node(key, data) try: self._nodes.remove(item) except ValueError: return False return True items = list(self._nodes.irange(key, key)) for item in items: self._nodes.remove(item) return bool(items) def shift_left(self, row): ''' Decrement rows larger than the given row. ''' for node in self._nodes: if node.value > row: node.value -= 1 def shift_right(self, row): ''' Increment rows greater than or equal to the given row. ''' for node in self._nodes: if node.value >= row: node.value += 1 def items(self): ''' Return a list of key, data tuples. ''' result = OrderedDict() for node in self._nodes: if node.key in result: result[node.key].append(node.value) else: result[node.key] = [node.value] return result.items() def sort(self): ''' Make row order align with key order. ''' for index, node in enumerate(self._nodes): node.value = index def sorted_data(self): ''' Return a list of rows in order sorted by key. ''' return [node.value for node in self._nodes] def range(self, lower, upper, bounds=(True, True)): ''' Return row values in the given range. ''' iterator = self._nodes.irange(lower, upper, bounds) return [node.value for node in iterator] def replace_rows(self, row_map): ''' Replace rows with the values in row_map. ''' nodes = [node for node in self._nodes if node.value in row_map] for node in nodes: node.value = row_map[node.value] self._nodes.clear() self._nodes.update(nodes) def __repr__(self): if len(self._nodes) > 6: nodes = list(self._nodes[:3]) + ['...'] + list(self._nodes[-3:]) else: nodes = self._nodes nodes_str = ', '.join(str(node) for node in nodes) return f'<{self.__class__.__name__} nodes={nodes_str}>'