def test_eq(): this = SortedList(range(10), load=4) that = SortedList(range(20), load=4) assert not (this == that) that.clear() that.update(range(10)) assert this == that
def test_irange(): sl = SortedList(load=7) assert [] == list(sl.irange()) values = list(range(53)) sl.update(values) for start in range(53): for end in range(start, 53): assert list(sl.irange(start, end)) == values[start:(end + 1)] assert list(sl.irange(start, end, reverse=True)) == values[start:(end + 1)][::-1] for start in range(53): for end in range(start, 53): assert list(range(start, end)) == list(sl.irange(start, end, (True, False))) for start in range(53): for end in range(start, 53): assert list(range(start + 1, end + 1)) == list(sl.irange(start, end, (False, True))) for start in range(53): for end in range(start, 53): assert list(range(start + 1, end)) == list(sl.irange(start, end, (False, False))) for start in range(53): assert list(range(start, 53)) == list(sl.irange(start)) for end in range(53): assert list(range(0, end)) == list(sl.irange(None, end, (True, False))) assert values == list(sl.irange(inclusive=(False, False))) assert [] == list(sl.irange(53)) assert values == list(sl.irange(None, 53, (True, False)))
def collect_matches(): initial_summoner_name = "GustavEnk" region = "EUW" summoner = Summoner(name=initial_summoner_name, region=region) patch = Patch.from_str("8.9", region=region) unpulled_summoner_ids = SortedList([summoner.id]) pulled_summoner_ids = SortedList() unpulled_match_ids = SortedList() pulled_match_ids = SortedList() while unpulled_summoner_ids: # Get a random summoner from our list of unpulled summoners and pull their match history new_summoner_id = random.choice(unpulled_summoner_ids) new_summoner = Summoner(id=new_summoner_id, region=region) matches = filter_match_history(new_summoner, patch) unpulled_match_ids.update([match.id for match in matches]) unpulled_summoner_ids.remove(new_summoner_id) pulled_summoner_ids.add(new_summoner_id) while unpulled_match_ids: # Get a random match from our list of matches new_match_id = random.choice(unpulled_match_ids) new_match = Match(id=new_match_id, region=region) for participant in new_match.participants: if participant.summoner.id not in pulled_summoner_ids and participant.summoner.id not in unpulled_summoner_ids: unpulled_summoner_ids.add(participant.summoner.id) # The above lines will trigger the match to load its data by iterating over all the participants. # If you have a database in your datapipeline, the match will automatically be stored in it. unpulled_match_ids.remove(new_match_id) pulled_match_ids.add(new_match_id)
class DijkstraFixedPoint: def __init__(self, automaton, initial_set, accepted_set): self.automaton = automaton self.set_to_visit = SortedList(initial_set,key= lambda d: -len(d)) self.accepted_set = accepted_set def iter_fix_point_set(self,max_size=10): if len(self.set_to_visit)==0: raise StopIteration() F = self.set_to_visit.pop() nF = {k:[v] for k,v in F.items()} new_size_of_fp = len(nF) reach_accepted_set = False for u,lu in F.items(): labelled_edges = self.automaton.get_labelled_successors(u) succ = labelled_edges[lu] for s in succ: if s in self.accepted_set: reach_accepted_set = True if (s not in nF) and (s not in self.accepted_set): nF[s] = list(self.automaton.get_successor_labels(s)) new_size_of_fp = len(nF) if new_size_of_fp>max_size: return False,F newF = self.expand_successor_set(nF) if F in newF: newF.remove(F) self.set_to_visit.update(newF) accept_fix_point = (len(newF)==0) and reach_accepted_set return accept_fix_point,F def expand_successor_set(self,nF): sF = [] # import operator # size = reduce(operator.mul, [len(v) for v in nF.values()], 1) for conf in itertools.product(*nF.values()): sF.append({k:v for k,v in zip(nF.keys(),conf)}) return sF def __iter__(self): return self def next(self): return self.iter_fix_point_set() def next_fixed_point(self,max_size): fp_found = 0 try: while fp_found==False: fp_found,fp = self.iter_fix_point_set(max_size) #print "#"*len(fp) except StopIteration: return False,None return fp_found,fp
def test_bisect_right(): slt = SortedList() assert slt.bisect_right(10) == 0 slt = SortedList(range(100), load=17) slt.update(range(100)) slt._check() assert slt.bisect_right(10) == 22 assert slt.bisect_right(200) == 200
def test_bisect_left(): slt = SortedList() assert slt.bisect_left(0) == 0 slt = SortedList(range(100), load=17) slt.update(range(100)) slt._check() assert slt.bisect_left(50) == 100 assert slt.bisect_left(200) == 200
def test_bisect(): slt = SortedList() assert slt.bisect(10) == 0 slt = SortedList(range(100)) slt._reset(17) slt.update(range(100)) slt._check() assert slt.bisect(10) == 22 assert slt.bisect(200) == 200
def test_bisect_right(): slt = SortedList() assert slt.bisect_right(10) == 0 slt = SortedList(range(100)) slt._reset(17) slt.update(range(100)) slt._check() assert slt.bisect_right(10) == 22 assert slt.bisect_right(200) == 200
def test_bisect_left(): slt = SortedList() assert slt.bisect_left(0) == 0 slt = SortedList(range(100)) slt._reset(17) slt.update(range(100)) slt._check() assert slt.bisect_left(50) == 100 assert slt.bisect_left(200) == 200
class MAE_AVL(object): def __init__(self, y): y = sorted(y) self.median = np.median(y) idx = int(math.ceil(len(y) / 2.)) self.less_than = SortedList() self.less_than.update(y[:idx]) self.less_than_sum = sum(y[:idx]) self.less_than_items = len(self.less_than) self.greater_than = SortedList() self.greater_than.update(y[idx:]) self.greater_than_sum = sum(y[idx:]) self.greater_than_items = len(self.greater_than) def _update(self, values, add_or_remove_fn, add_or_subtract_values): for value in values: if self.less_than and value <= self.less_than[-1]: add_or_remove_fn(self.less_than, value) self.less_than_sum = add_or_subtract_values(self.less_than_sum, value) self.less_than_items = add_or_subtract_values(self.less_than_items, 1) else: add_or_remove_fn(self.greater_than, value) self.greater_than_sum = add_or_subtract_values(self.greater_than_sum, value) self.greater_than_items = add_or_subtract_values(self.greater_than_items, 1) if len(self.less_than) > len(self.greater_than)+1: while len(self.less_than) > len(self.greater_than)+1: x = self.less_than.pop(index=-1) self.greater_than.add(x) self.less_than_sum -= x self.greater_than_sum += x self.less_than_items -= 1 self.greater_than_items += 1 elif len(self.greater_than) > len(self.less_than): while len(self.greater_than) > len(self.less_than): x = self.greater_than.pop(index=0) self.less_than.add(x) self.less_than_sum += x self.greater_than_sum -= x self.less_than_items += 1 self.greater_than_items -= 1 if len(self.less_than) > len(self.greater_than): self.median = self.less_than[-1] else: self.median = (self.less_than[-1] + self.greater_than[0]) / 2. def remove(self, values): self._update(values, SortedList.remove, operator.sub) def add(self, values): self._update(values, SortedList.add, operator.add)
def create_initial_solutions(self): """ Creates the initial list of solutions of a metaheuristic. """ solution = SortedList([], key=lambda x: -x[1]) for cs in self.clonal_selections: initial_solutions = cs.create_initial_solutions() cs.solutions = initial_solutions cs.solutions = cs.evaluate(cs.solutions) solution.update(initial_solutions) # self.update_history() return solution
def _search_in_btrees(self, token: str) -> list: sub_token = token wildcard_pos = -1 results = SortedList() while sub_token[wildcard_pos + 1:]: wildcard_pos = sub_token.index('*') docs = self.straight_btree.get(sub_token[:wildcard_pos]) results.update(docs) sub_token = sub_token[wildcard_pos + 1:] return list(results)
def intersect(self, other): new = SortedList() new.update(self.times) # copy times count = [0, 0] for time, typ in other.times: new.add((time, 2 * typ)) for time, typ in new: count[typ % 2] += typ if count[0] * count[1] != 0: return True return False
def test_update(): slt = SortedList() slt.update(range(1000)) assert all(tup[0] == tup[1] for tup in zip(slt, list(range(1000)))) assert len(slt) == 1000 slt._check() slt.update(range(10000)) assert len(slt) == 11000 slt._check()
def fast_generator(rotors: list): from sortedcontainers import SortedList intersection_pairs = set() intersections_set = set() status_array = SortedList() event_points: list = [] for rotor_center in rotors: affiliations: list = [ SemiCircle(circle_center=rotor_center, side=side) for side in [SemiCircleSide.left, SemiCircleSide.right] ] heapq.heappush( event_points, EventPoint(coordinates=(rotor_center[0], rotor_center[1] + 1), affiliations=affiliations, event_type=EventPointType.upper)) heapq.heappush( event_points, EventPoint(coordinates=(rotor_center[0], rotor_center[1] - 1), affiliations=affiliations, event_type=EventPointType.bottom)) while event_points: next_event_point: EventPoint = heapq.heappop(event_points) shared.sweep_line_progress = next_event_point.coordinates[1] if next_event_point.event_type == EventPointType.upper: status_array.update(next_event_point.affiliations) if next_event_point.event_type == EventPointType.intersection: status_array.discard(next_event_point.affiliations[0]) status_array.discard(next_event_point.affiliations[1]) status_array.update(next_event_point.affiliations) left_semi_circle_position: int = status_array.index( min(next_event_point.affiliations)) # assert max(next_event_point.affiliations) == status_array[left_semi_circle_position + (-1) ** (next_event_point.event_type == EventPointType.intersection)] if next_event_point.event_type == EventPointType.bottom: status_array.discard(next_event_point.affiliations[0]) status_array.discard(next_event_point.affiliations[1]) refine_intersections( intersection_pairs=intersection_pairs, intersections_set=intersections_set, status_array=status_array, event_points=event_points, left_semi_circle_position=left_semi_circle_position, deletion=next_event_point.event_type == EventPointType.bottom) for pair in intersection_pairs: yield pair
def test_contains(): slt = SortedList() assert 0 not in slt slt.update(range(10000)) for val in range(10000): assert val in slt assert 10000 not in slt slt._check()
def get_subscribed_articles_list(search_subscriptions, topic_subscriptions): subscribed_articles = SortedList(key=lambda x: -x.id) if not topic_subscriptions and not search_subscriptions: return (each for each in Article.query.order_by(Article.published_time.desc()).limit(10000)) else: for sub in topic_subscriptions: subscribed_articles.update(sub.topic.all_articles()) for user_search in search_subscriptions: search = user_search.search.keywords subscribed_articles.update(get_articles_for_search_term(search)) return subscribed_articles
def maxSlidingWindow(self, nums: List[int], k: int) -> List[int]: """ leetcode内置了sortedcontainers,可以使用SortedList 但是sl的删除元素操作复杂度仍是O(n),需要继续寻找更优解 """ from sortedcontainers import SortedList sl = SortedList() sl.update(nums[:k]) n = len(nums) result = [] for i in range(n - k + 1): result.append(sl[-1]) sl.remove(nums[i]) if i + k < n: sl.add(nums[i + k]) return result
def bentley_ottmann(segments): """ Bentley-Ottmann algorithm implementation. """ # create queue of events events = SortedList() events.update( Event(min(*seg), EventType.BEGIN, (seg, )) for seg in segments) events.update(Event(max(*seg), EventType.END) for seg in segments) # create sweep line status status = SortedList() # intersections points result = set() # while there are events to handle while events: event = events.pop(0) if event.type == EventType.BEGIN: for seg1 in event.segments: for seg2 in status: point = intersection(*seg1, *seg2, restriction_1='segment', restriction_2='segment') if point is None: continue point = (round(point[0], 15), round(point[1], 15)) if point not in result: result.add(point) events.add(Event(point, EventType.INTERSECTION)) for seg in event.segments: status.add(seg) elif event.type == EventType.END: for seg in event.segments: status.remove(seg) elif event.type == EventType.INTERSECTION: pass return result
def bentley_ottmann_generator(segments): """ Bentley-Ottmann algorithm in form of a generator that yields next steps for animation. """ # create queue of events events = SortedList() events.update(Event(min(*seg), EventType.BEGIN, (seg, )) for seg in segments) events.update(Event(max(*seg), EventType.END) for seg in segments) # create sweep line status status = SortedList() # intersections points result = set() # while there are events to handle while events: yield [e.point for e in events], list(result), events[0].point[0] event = events.pop(0) if event.type == EventType.BEGIN: for seg1 in event.segments: for seg2 in status: point = intersection(*seg1, *seg2, restriction_1='segment', restriction_2='segment') if point is None: continue point = (round(point[0], 15), round(point[1], 15)) if point not in result: result.add(point) events.add(Event(point, EventType.INTERSECTION)) for seg in event.segments: status.add(seg) elif event.type == EventType.END: for seg in event.segments: status.remove(seg) elif event.type == EventType.INTERSECTION: pass yield [], list(result), 1000
def _merge_subtrees( left_tree: SortedList, right_tree: SortedList, max_size: int) -> Tuple[SortedList, Optional[SortedList]]: """ Combine the elements of two trees into one larger tree, splitting them again if the larger tree exceeds a given size :param left_tree: Tree containing smaller elements :param right_tree: Tree containing larger elements :param max_size: Maximum size the combined tree can be before splitting :return: The combined tree and None if both trees' elements are less than max_size, the tree with the smaller elements and the tree with the larger elements otherwise """ if len(left_tree) + len(right_tree) <= max_size: left_tree.update(right_tree) result = (left_tree, None) else: total_median = (len(left_tree) + len(right_tree)) // 2 - 1 if total_median < len(left_tree): total_median = left_tree[total_median] else: total_median = right_tree[total_median - len(left_tree)] # Taking advantage of the fact the number of elements in a subtree can only # be twice the bit length of the maximum element before splitting median_rep = YFastTrie._calculate_representative( total_median, max_size // 2) if median_rep <= max(left_tree): from_tree = left_tree to_tree = right_tree side = -1 else: from_tree = right_tree to_tree = left_tree side = 0 while max(left_tree) > median_rep or min(right_tree) <= median_rep: to_tree.add(from_tree.pop(side)) result = (left_tree, right_tree) return result
class SortedCollection: def __init__(self): self.items = SortedList() def get_len(self): return len(self.items) def peek(self): if len(self.items) == 0: raise EmptyCollectionException return self.items[0] def pop(self): if len(self.items) == 0: raise EmptyCollectionException return self.items.pop(0) def add(self, l): print("Type info: " + str(type(self.items)) + str(type(l))) self.items.update(l)
def step(self) -> None: """ Performs one iteration/step of the algorithm's loop. """ for cs in self.clonal_selections: cs.step() for i in range(self.number_of_populations): for (j, _) in sorted(self.ranking[i].items(), key=lambda x: x[1], reverse=True)[:self.mixes_number]: if i == j: continue if random.random() < self.mix_rate: affinity_i, affinity_j = self.mix( self.clonal_selections[i], self.clonal_selections[j]) self.ranking[i][j] = affinity_j self.ranking[j][i] = affinity_i solution = SortedList([], key=lambda x: -x[1]) for cs in self.clonal_selections: solution.update(cs.solutions) self.solutions = solution self.update_history()
def update_segment(self, segment: SkylineSegment, y: int, item: Item) -> List[SkylineSegment]: """ Clips the line segment under the new item and returns an updated skyline segment list. """ if self.use_waste_map: seg_i = self.skyline.index(segment) self.add_to_wastemap(seg_i, item, y) new_segments = SortedList([]) for seg in self.skyline: new_segments.update(self.clip_segment(seg, item)) # Create new segment if room above item if item.height + item.y < self.height: new_seg_y = item.y + item.height new_seg = SkylineSegment(segment.x, new_seg_y, item.width) new_segments.add(new_seg) return new_segments
def test_irange(): sl = SortedList() sl._reset(7) assert [] == list(sl.irange()) values = list(range(53)) sl.update(values) for start in range(53): for end in range(start, 53): assert list(sl.irange(start, end)) == values[start:(end + 1)] assert list(sl.irange( start, end, reverse=True)) == values[start:(end + 1)][::-1] for start in range(53): for end in range(start, 53): assert list(range(start, end)) == list( sl.irange(start, end, (True, False))) for start in range(53): for end in range(start, 53): assert list(range(start + 1, end + 1)) == list( sl.irange(start, end, (False, True))) for start in range(53): for end in range(start, 53): assert list(range(start + 1, end)) == list( sl.irange(start, end, (False, False))) for start in range(53): assert list(range(start, 53)) == list(sl.irange(start)) for end in range(53): assert list(range(0, end)) == list(sl.irange(None, end, (True, False))) assert values == list(sl.irange(inclusive=(False, False))) assert [] == list(sl.irange(53)) assert values == list(sl.irange(None, 53, (True, False)))
class Rational: def __init__(self): self.numerator = SortedList() self.denominator = SortedList() def multiply_by(self, f): self.numerator.update(primefac.primefac(f)) def divide_by(self, d): self.denominator.update(primefac.primefac(d)) def value(self): if len(self.numerator) == 0 or len(self.denominator) == 0: return None numerator_index = 0 denominator_index = 0 while numerator_index < len( self.numerator) and denominator_index < len(self.denominator): if self.numerator[numerator_index] == self.denominator[ denominator_index]: del self.numerator[numerator_index] del self.denominator[denominator_index] elif self.numerator[numerator_index] < self.denominator[ denominator_index]: numerator_index += 1 else: denominator_index += 1 self.numerator.add(1) self.denominator.add(1) num_product = reduce(lambda x, y: mpfr(x) * y, self.numerator) den_product = reduce(lambda x, y: mpfr(x) * y, self.denominator) if num_product <= 0 or den_product <= 0: return 0 val = num_product / den_product if val > 1: return 1 return val
def test_SortedList(self): # construct sorted_list = SortedList([1, 2, 3, 4]) sorted_list = SortedList() # add for i in range(5, 0, -1): sorted_list.add(i) # adding elements using the update() function elements = [10, 9, 8, 7, 6] sorted_list.update(elements) # prints the updated list in sorted order print('list after updating: ', sorted_list) # removing a particular element using value sorted_list.discard(8) # removing all elements sorted_list.clear() print('list after removing all elements using clear: ', sorted_list) return
class ListRouter(Router): __slots__ = ("_handlers", ) def __init__(self, priority=0): super().__init__(priority) self._handlers = SortedList([], key=self._key) def add_handler(self, handler): self._handlers.add(handler) def merge(self, other_router): self._assert_routers_can_merge(other_router) self._handlers.update(other_router._handlers) async def handle(self, update, ctx): if not self._check_update(update, ctx): return hr.SKIPPED for handler in self._handlers: if await handler.handle(update, ctx) != hr.SKIPPED: return hr.COMPLETE return hr.SKIPPED
class LeafNode(Node): def __init__(self, records=[]): self.records = SortedList(records) def __contains__(self, item): return item in self.records def __len__(self): return len(self.records) def add(self, items): if not isinstance(items, list): items = [items] self.records.update(items) self.records = SortedList(SortedSet(self.records)) def split(self): assert (len(self.records) > 1) median = self.records[len(self.records) // 2 - 1] idx = len(self.records) // 2 - 1 left_node = BTree.LeafNode(self.records[:idx]) right_node = BTree.LeafNode(self.records[idx:]) return left_node, right_node, median
def test_islice(): sl = SortedList(load=7) assert [] == list(sl.islice()) values = list(range(53)) sl.update(values) for start in range(53): for stop in range(53): assert list(sl.islice(start, stop)) == values[start:stop] for start in range(53): for stop in range(53): assert list(sl.islice(start, stop, reverse=True)) == values[start:stop][::-1] for start in range(53): assert list(sl.islice(start=start)) == values[start:] assert list(sl.islice(start=start, reverse=True)) == values[start:][::-1] for stop in range(53): assert list(sl.islice(stop=stop)) == values[:stop] assert list(sl.islice(stop=stop, reverse=True)) == values[:stop][::-1]
def ok(t): ts = tasks[:t] ws = workers[-t:] p = pills sl = SortedList() sl.update(ws) for i in reversed(range(len(ts))): if sl[-1] >= ts[i]: sl.pop() continue if sl[-1] + strength < ts[i]: return False # remove the one >= ts[i] - strength index = sl.bisect_left(ts[i] - strength) del sl[index] p -= 1 if p < 0: return False return True
def main(): files = find_files_to_dwnld(FETCH_NMIN, LAST_NMINUTES) if not files: print('No files to download.') return print('Number Of Files:', len(files)) num_files = len(files) num_threads = num_files if num_files < MAX_CONNECTIONS else MAX_CONNECTIONS files_grp = create_file_group(files, num_threads) succ_dwnld = SortedList( key=lambda fi: fi.file_name_ts if USE_FILENAME_TS else fi.file_mod_ts) failed_dwnld = SortedList( key=lambda fi: fi.file_name_ts if USE_FILENAME_TS else fi.file_mod_ts) futures = [] with ThreadPoolExecutor(max_workers=num_threads) as executor: for i in range(num_threads): fut = executor.submit(download_files, files_grp[i], LOCAL_CUST_DIR, threadname=str(i)) futures.append(fut) for fut in futures: suc, fail = fut.result() succ_dwnld.update(suc) failed_dwnld.update(fail) print( f'Successful Downloads: {len(succ_dwnld)}, Failed_download: {len(failed_dwnld)}.' ) # store last timestamp only if FETCH_NMIN is set to false if not FETCH_NMIN: ts = succ_dwnld[-1].file_name_ts if USE_FILENAME_TS else succ_dwnld[ -1].file_mod_ts set_last_stored_ts(ts)
class Population: def __init__(self, population_size, environment, fitness_func): self.max_population_size = population_size self.environment = environment self.fitness_func = fitness_func initial_population = [ PolygonOrganism(GENOME_SIZE, environment) for _ in range(population_size) ] for organism in initial_population: organism.fitness = self.fitness_func(organism) self.population = SortedList(initial_population) def evolve(self): """ Do one epoch of evolution and return the fittest organism """ offspring = self.procreate() self.population.update(offspring) self.decimate() def procreate(self): offspring = [] for i in range(NUM_REPRODUCTIONS): left_parent = self.population[i] right_parent = self.population[i + NUM_REPRODUCTIONS] child = left_parent.mate(right_parent) child.fitness = self.fitness_func(child) offspring.append(child) return offspring def decimate(self): while self.max_population_size < len(self.population): self.population.pop()
class Rational: def __init__(self): self.numerator = SortedList() self.denominator = SortedList() def multiply_by(self,f): self.numerator.update(primefac.primefac(f)) def divide_by(self,d): self.denominator.update(primefac.primefac(d)) def value(self): if len(self.numerator) == 0 or len(self.denominator) == 0: return None numerator_index = 0 denominator_index = 0 while numerator_index < len(self.numerator) and denominator_index < len(self.denominator): if self.numerator[numerator_index] == self.denominator[denominator_index]: del self.numerator[numerator_index] del self.denominator[denominator_index] elif self.numerator[numerator_index] < self.denominator[denominator_index]: numerator_index += 1 else: denominator_index += 1 self.numerator.add(1) self.denominator.add(1) num_product = reduce(lambda x, y: mpfr(x)*y, self.numerator) den_product = reduce(lambda x, y: mpfr(x)*y, self.denominator) if num_product <= 0 or den_product <= 0: return 0 val = num_product/den_product if val > 1: return 1 return val
class TimeRange: @staticmethod def __parse_time(timerange): time1 = int(timerange[:2]) * 60 + int(timerange[3:5]) timerange = timerange[6:] time2 = int(timerange[:2]) * 60 + int(timerange[3:5]) return time1, time2 def __init__(self, times): self.times = SortedList() for time1, time2 in map(self.__parse_time, times): self.times.update([(time1, -1), (time2, 1)]) def intersect(self, other): new = SortedList() new.update(self.times) # copy times count = [0, 0] for time, typ in other.times: new.add((time, 2 * typ)) for time, typ in new: count[typ % 2] += typ if count[0] * count[1] != 0: return True return False
def test_update(): slt = SortedList() slt.update(range(1000)) assert len(slt) == 1000 slt._check() slt.update(range(100)) assert len(slt) == 1100 slt._check() slt.update(range(10000)) assert len(slt) == 11100 slt._check() values = sorted(chain(range(1000), range(100), range(10000))) assert all(tup[0] == tup[1] for tup in zip(slt, values))
class SCEngine: ''' Fast tree-based implementation for indexing, using the ``sortedcontainers`` package. Parameters ---------- data : Table Sorted columns of the original table row_index : Column object Row numbers corresponding to data columns unique : bool (defaults to False) Whether the values of the index must be unique ''' def __init__(self, data, row_index, unique=False): node_keys = map(tuple, data) self._nodes = SortedList(starmap(Node, zip(node_keys, row_index))) self._unique = unique def add(self, key, value): ''' Add a key, value pair. ''' if self._unique and (key in self._nodes): message = 'duplicate {0:!r} in unique index'.format(key) raise ValueError(message) self._nodes.add(Node(key, value)) def find(self, key): ''' Find rows corresponding to the given key. ''' return [node.value for node in self._nodes.irange(key, key)] def remove(self, key, data=None): ''' Remove data from the given key. ''' if data is not None: item = Node(key, data) try: self._nodes.remove(item) except ValueError: return False return True items = list(self._nodes.irange(key, key)) for item in items: self._nodes.remove(item) return bool(items) def shift_left(self, row): ''' Decrement rows larger than the given row. ''' for node in self._nodes: if node.value > row: node.value -= 1 def shift_right(self, row): ''' Increment rows greater than or equal to the given row. ''' for node in self._nodes: if node.value >= row: node.value += 1 def items(self): ''' Return a list of key, data tuples. ''' result = OrderedDict() for node in self._nodes: if node.key in result: result[node.key].append(node.value) else: result[node.key] = [node.value] return result.items() def sort(self): ''' Make row order align with key order. ''' for index, node in enumerate(self._nodes): node.value = index def sorted_data(self): ''' Return a list of rows in order sorted by key. ''' return [node.value for node in self._nodes] def range(self, lower, upper, bounds=(True, True)): ''' Return row values in the given range. ''' iterator = self._nodes.irange(lower, upper, bounds) return [node.value for node in iterator] def replace_rows(self, row_map): ''' Replace rows with the values in row_map. ''' nodes = [node for node in self._nodes if node.value in row_map] for node in nodes: node.value = row_map[node.value] self._nodes.clear() self._nodes.update(nodes) def __repr__(self): return '{0!r}'.format(list(self._nodes))
from sortedcontainers import SortedList # Implementation of SortedList # http://www.grantjenks.com/docs/sortedcontainers/introduction.html#sorted-list # It keeps the sorted structure # Insertion O(N) -> .add((val, "key")) # Access O(1) sorted_data = SortedList() # Add multiple key,value pair sorted_data.update([(100, "MSFT"), (400, "APPL"), (200, "GOOGL")]) # Add one key,value pair sorted_data.add((300, "AMZN")) print(sorted_data) # Print the first N stock def first_n_traded_stock(sorted_list, n): print(sorted_list[:-n-1:-1]) first_n_traded_stock(sorted_data, 3) # There is also a heapq implementation which is O(NlogN) # https://stackoverflow.com/a/38833175/9209546 import heapq from operator import itemgetter
class Analyse(): def __init__(self, config): self.year = config['YEAR'] self.startTime = config['START_TIME'] self.endTime = config['END_TIME'] self.stockList = config['STOCK_LIST'] self.mode = config['MODE'] self.logBucket = config['LOG_BUCKET_DATA'] self.hedgeFlag = config['HEDGE'] self.hedgeStock = config['HEDGE_STOCK'] self.divideByVol = config['DIVIDE_BY_VOLATILITY'] self.modStockList = self.stockList if (self.hedgeFlag): self.betaCorrelation = config['BETA_CORR'] self.modStockList = [config['HEDGE_STOCK']] + self.stockList self.corrFlag = config['BETA_CORR_TYPE'] if (self.mode == 'bucket'): self.bucketSize = config['BUCKET_SIZE'] self.numBucket = config['NUM_BUCKET'] elif (self.mode == 'percentile'): self.bucketSize = config['BUCKET_SIZE'] self.minSize = config['MIN_SIZE'] self.maxSize = config['MAX_SIZE'] self.absFlag = config['ABS_FLAG'] config['STOCK_LIST'] = self.modStockList # Datastore contains functions to read and update prices self.dataStore = dataStore(config) config['STOCK_LIST'] = self.stockList # Class members containing relevant Statistics # self.results: Dictionary containing stock names as keys # Maps to a list of lists, where each list member # contains gapSize, timeStamp, Open/Close prices # along with holding periods, etc self.results = {} self.gapListNormalized = [] self.prevCloseVWAPWindow = config['VWAP_PREV_CLOSE_WINDOW'] self.currOpenVWAPWindow = config['VWAP_CURR_OPEN_WINDOW'] self.posEntryVWAPWindow = config['VWAP_POSITION_ENTRY_WINDOW'] self.posExitVWAPWindow = config['VWAP_POSITION_EXIT_WINDOW'] self.printFlag = 0 self.stopLoss = config['STOP_LOSS'] self.targetPrice = config['TARGET_PRICE'] self.tTestFlag = config['T_TEST_FLAG'] if (self.tTestFlag): self.profitByGapPercentile = {} for i in range(0, 100): self.profitByGapPercentile[i] = [] self.stockReturns = {} def loadData(self): ''' Loads price data for the specified year and stock list Returns: None, only class members are modified ''' self.dataStore.loadPriceData() for stock in self.stockList: price = pd.DataFrame( self.dataStore.priceDataList[stock][:]).iloc[:, 6] returns = ((price / price.shift(1)) - 1)[1:] self.stockReturns[stock] = returns if (self.hedgeFlag): price = pd.DataFrame( self.dataStore.priceDataList[self.hedgeStock][:]).iloc[:, 6] returns = ((price / price.shift(1)) - 1)[1:] self.stockReturns[self.hedgeStock] = returns # print(self.stockReturns) def getRetList(self, stock): price = pd.DataFrame( self.dataStore.priceDataList[stock][::minInDay]).iloc[:, 6] price = ((price / price.shift(1)) - 1)[1:] return price def getBenchmarkVolatility(self): price = pd.DataFrame( self.hedgePriceList[self.hedgeStock][::minInDay]).iloc[:, 6] price = ((price / price.shift(1)) - 1)[1:] return price def getVolatilityNDays(self, stock, n, currTimeRow): """ Gets the volatility by taking returns of close prices for the last n days and does P(t) / P(t-1) - 1 for each of the n days and takes stDev """ # price = pd.DataFrame(self.dataStore.priceDataList[stock][currTimeRow - 1 - (n * 375):currTimeRow - 1]).iloc[:, 6] # returns = ((price / price.shift(1)) - 1)[1:] returns = self.stockReturns[stock].iloc[currTimeRow - 1 - (n * 375):currTimeRow - 1] if (debug): print("Volatility: " + str(np.std(returns))) return np.std(returns) def getCorrelation(self, stock1, stock2, i1, i2, n): """ Takes the prices of two stocks, calculates their return and gives their correlation """ # price1 = pd.DataFrame(self.dataStore.priceDataList[stock1][-(n * 375) - 1 + i1:i1]).iloc[:, 6] # price2 = pd.DataFrame(self.dataStore.priceDataList[stock2][-(n * 375) - 1 + i2:i2]).iloc[:, 6] returns1 = self.stockReturns[stock1].iloc[i1 - 1 - (n * 375):i1 - 1] returns2 = self.stockReturns[stock2].iloc[i2 - 1 - (n * 375):i2 - 1] print(i1, i2) # print(returns1[-10:]) # print(returns2[-10:]) # if(len(price1) > len(price2)): # # print("Price1: " + str(price1)) # # print("Price2: " + str(price2)) # price1 = price1[-len(price2):] # print(i1,i2,len(price1),len(price2)) # if(len(price2) > len(price1)): # price2 = price2[-len(price1):] # print(i1,i2,len(price1),len(price2)) correlation = np.corrcoef(returns1, returns2)[1][0] return correlation def getVolAvgPrice(self, stock, left, right): ''' Computes the volume weighted price for the range [left, right) price = (low + high + open + close)/4 ''' if (debug): print('\n' + ''.join(['*'] * 50)) print("Stock prices") print(left, right) print("Left price: " + str(self.dataStore.priceDataList[stock][left])) print("Right price: " + str(self.dataStore.priceDataList[stock][right])) price = np.array(self.dataStore.priceDataList[stock][left:right])[:, 5:] price = price.astype(np.float64) # 5, 6, 7, 8, 9: Open, Close, Low, High, Volume # After trimming off strings, 0, 1, 2, 3, 4: Opne, Close, Low, High, Volume avgPrice = (price[:, 0] + price[:, 1] + price[:, 2] + price[:, 3]) / 4.0 volume = price[:, 4] volAvgPrice = np.average(avgPrice, weights=volume) return volAvgPrice def getTTestScores(self, boundary, profitByGapPercentileLocal, verbose=False): #Returns the T test score and p-value of two arrays arr1 = [] arr2 = [] for i in range(1, boundary + 1): arr1 += profitByGapPercentileLocal[i] for i in range(boundary + 1, 99): arr2 += profitByGapPercentileLocal[i] tTest = ttest_ind(arr1, arr2) tValue, pValue = tTest[0], tTest[1] if (verbose): print("Boundary: " + str(boundary)) print("T Value: " + str(tValue)) print("P Value: " + str(pValue)) return tValue, pValue def getGapStats(self, holdPeriodList, volType='nGapVol', verbose=False): ''' Gives the statistics (Gap trading) for all hold periods specified The stats include timestamp, curr open price (after VWAP), prev close price (after VWAP), volatility holding period (H), min price/max price in interval, closing price after H etc Args: holdPeriodList: Contains holding periods as number of minutes volType; dailyVol or nDayVol (n = 30 by default) Returns: Dictionary as described above ''' statList = {} priceList = {} gapList = {} if (self.hedgeFlag): # BM is benchmark gapListBM = [] volListBM = [] timeListBM = [] # retList contains daily returns retListBM = [] priceListBM = [] priceTimeBM = [] #Stores all the timestamps for which the benchmark is indexed benchmarkTimeStamps = [ eachList[0] for eachList in self.dataStore.priceDataList[self.hedgeStock] ] volN = 70 # For standard volatility calculation of gapsize if (volType != 'stdVol'): volN = 30 volDays = 70 # For standard volatility of entire calculation of returns for stock in self.modStockList: # Perform analysis for each stock infoList = self.dataStore.priceDataList[stock] statList[stock] = [] priceList[stock] = [] gapList[stock] = [] # gapListBenchmark[self.hedgeStock] = [] retList = self.getRetList(stock) prevTime = 0 print 'Currently analysing:', stock for i in range(len(infoList)): currTime = infoList[i][0] currTimeStamp = datetime.fromtimestamp(currTime) currDay = currTimeStamp.date() currHour = currTimeStamp.time().hour currMins = currTimeStamp.time().minute # Account for duplicates if (prevTime == currTime): continue prevTime = currTime if (not (self.startTime <= currTime <= self.endTime)): # Check if it is in the valid range continue if ((currHour == 9) and (currMins == 15)): # Checking for day starting time if (stock == 'SBIN' and currTimeStamp.date().day == 9 and currTimeStamp.date().month == 11 and self.year == 2016): self.printFlag = 1 if (debug): print('\n' + ''.join(['*'] * 50)) #getting prices for stock currOpen = self.getVolAvgPrice(stock, i, i + self.currOpenVWAPWindow) prevClose = self.getVolAvgPrice( stock, i - self.prevCloseVWAPWindow, i) posEntryPrice = self.getVolAvgPrice( stock, i + self.currOpenVWAPWindow, i + self.currOpenVWAPWindow + self.posEntryVWAPWindow) if ((self.hedgeFlag) and (self.hedgeStock == stock)): priceListBM.append(currOpen) priceTimeBM.append(currTime) priceList[stock].append(currOpen) gapList[stock].append((currOpen - prevClose) / prevClose) # Not enough samples to compute std dev, added five to handle edge cases if (len(gapList[stock]) < volN + 5): continue # Refers to the stats common accross the holding periods commStats = {} commStats['time'] = currTime commStats['readableTime'] = datetime.fromtimestamp( currTime) commStats['ticker'] = stock commStats['currOpen'] = currOpen commStats['prevClose'] = prevClose commStats['posEntryPrice'] = posEntryPrice commStats['gapSize'] = ((currOpen - prevClose) / prevClose) if (self.absFlag): commStats['gapSize'] = np.abs(commStats['gapSize']) if (volType == 'stdVol'): commStats['volatility'] = np.std( retList[len(gapList[stock]) - volN:len(gapList[stock])]) else: commStats['volatility'] = np.std( gapList[stock][-volN:]) commStats['gapRatio'] = commStats['gapSize'] / commStats[ 'volatility'] #correct volatility using stDev of returns for 70 days of per minute returns commStats['stockVolatility'] = self.getVolatilityNDays( stock, volDays, i) if (self.hedgeFlag): if (stock != self.hedgeStock): # Binary search in the timeStamps of the benchmark row = bisect(timeListBM, currTime) - 1 retBM = retListBM[row] volBM = volListBM[row] bmI = bisect_left(benchmarkTimeStamps, currTime) posEntryBM = self.getVolAvgPrice( self.hedgeStock, bmI + self.currOpenVWAPWindow, bmI + self.currOpenVWAPWindow + self.posEntryVWAPWindow) #modifying volatility commStats[ 'indexVolatility'] = self.getVolatilityNDays( self.hedgeStock, volDays, bmI) if (debug): #Prints the timestamps of both the current stock row and the current benchmark row print( self.dataStore.priceDataList[stock][i][0], self.dataStore.priceDataList[ self.hedgeStock][bmI][0]) commStats['posEntryPriceBM'] = posEntryBM if (self.corrFlag != 'constant'): priceRow = bisect(priceTimeBM, currTime) # print len(priceList[stock][-volN:]) # print len(priceList[stock]) # print len(priceListBM) # print -volN + priceRow, priceRow # print len(priceList[self.hedgeStock][-volN + priceRow: priceRow]) self.betaCorrelation = np.corrcoef( priceList[stock][-volN:], priceListBM[-volN + priceRow:priceRow])[1][0] # self.betaCorrelation = self.getCorrelation(stock,self.hedgeStock,i,bmI,volDays) # beta = self.betaCorrelation * (volBM / commStats['volatility']) # beta = self.betaCorrelation * (commStats['volatility'] / volBM) beta = self.betaCorrelation * ( commStats['stockVolatility'] / commStats['indexVolatility']) if (debug): print("Stock Volatility: " + str(commStats['stockVolatility'])) print("Index Volatility: " + str(commStats['indexVolatility'])) commStats['betaCorr'] = self.betaCorrelation commStats['Beta'] = beta if (verbose): print(''.join(['*'] * 50)) print("Beta : " + str(beta)) print("Stock : " + stock) print("Stock currOpen: " + str(currOpen)) print("Stock prevClose: " + str(prevClose)) print("Stock Return: " + str(commStats['gapSize'])) print("Stock Volatility: " + str(commStats['volatility'])) print("Stock Normalized Return: " + str(commStats['gapRatio'])) print("Benchmark Return: " + str(retBM)) print("Benchmark Volatility: " + str(volBM)) print("Benchmark Normalized Return: " + str(retBM / volBM)) else: timeListBM.append(currTime) retListBM.append(commStats['gapSize']) volListBM.append(commStats['volatility']) minPriceList = [float(infoList[i][6])] maxPriceList = [float(infoList[i][6])] # Identifying the array index limit holdLim = min(max(holdPeriodList), len(infoList) - i - 1) for j in range(holdLim): minPriceList.append( min(minPriceList[-1], float(infoList[i + j][6]))) maxPriceList.append( max(maxPriceList[-1], float(infoList[i + j][6]))) #Appending volatility normalized gap value for determining distribution plot self.gapListNormalized.append(commStats['gapSize'] / commStats['volatility']) reachedStopOrTarget = 0 stopOrTargetRelReturn = 0 for hold in holdPeriodList: tmpStats = commStats.copy() minPrice = minPriceList[min(hold, holdLim)] maxPrice = maxPriceList[min(hold, holdLim)] tmpStats['holdPeriod'] = hold tmpStats['min'] = minPrice tmpStats['max'] = maxPrice tmpStats['finClose'] = infoList[min( (i + self.currOpenVWAPWindow + self.posEntryVWAPWindow + hold), len(infoList) - 1)][6] #Normalizing the volatility based on hold period tmpStats['stockVolAfterNorm'] = commStats[ 'stockVolatility'] * np.sqrt(hold) if (self.hedgeFlag): bmI = bisect_left(benchmarkTimeStamps, currTime) tmpStats['finCloseBM'] = self.dataStore.priceDataList[self.hedgeStock][min((bmI + self.currOpenVWAPWindow + self.posEntryVWAPWindow + hold)\ , len(self.dataStore.priceDataList[self.hedgeStock]) - 1)][6] # exitTime = infoList[i + hold][0] # bmI = bisect_left(benchmarkTimeStamps, exitTime) # tmpStats['finCloseBM'] = self.dataStore.priceDataList[self.hedgeStock][min((bmIExit), len(infoList) -1)][6] if (not (stock == self.hedgeStock)): #Calculating profits and all tmpStats['profit'] = ((- np.sign(tmpStats['currOpen'] - tmpStats['prevClose'])) * \ ((tmpStats['finClose'] - tmpStats['posEntryPrice']) / tmpStats['posEntryPrice'])) tmpStats['absReturn'] = tmpStats['profit'] tmpStats['absReturnPerUnitVol'] = tmpStats[ 'absReturn'] / tmpStats['stockVolAfterNorm'] if (self.hedgeFlag): tmpStats['marketReturn'] = ( (tmpStats['finCloseBM'] - tmpStats['posEntryPriceBM']) / tmpStats['posEntryPriceBM']) tmpStats['returnOffset'] = ( (tmpStats['finCloseBM'] - tmpStats['posEntryPriceBM']) / tmpStats['posEntryPriceBM'] ) * tmpStats['Beta'] tmpStats['relReturn'] = tmpStats['profit'] + ( np.sign(tmpStats['currOpen'] - tmpStats['prevClose']) * tmpStats['returnOffset']) tmpStats['relReturnPerUnitVol'] = tmpStats[ 'relReturn'] / tmpStats['stockVolAfterNorm'] if ((tmpStats['relReturn'] <= self.stopLoss or tmpStats['relReturn'] >= self.targetPrice) and reachedStopOrTarget == 0): reachedStopOrTarget = 1 stopOrTargetRelReturn = tmpStats[ 'relReturn'] if (reachedStopOrTarget): tmpStats[ 'relReturnWithStopLoss'] = stopOrTargetRelReturn else: tmpStats[ 'relReturnWithStopLoss'] = tmpStats[ 'relReturn'] else: tmpStats['relReturn'] = tmpStats['profit'] tmpStats['relReturnPerUnitVol'] = tmpStats[ 'absReturnPerUnitVol'] # tmpStats['profitDividedByVol'] = tmpStats['relReturn'] / tmpStats['stockVolAfterNorm'] if (self.printFlag == 1): for key in tmpStats: print(key + ": " + str(tmpStats[key])) statList[stock].append(tmpStats) if (not (stock == self.hedgeStock)): grandDict[stock].append(tmpStats) # grandDF.append(tmpStats) self.printFlag = 0 self.results = statList # print sorted([statList[key][x]['gapRatio'] for key in statList.keys() for x in range(len(statList[key]))])[-1000:-900] return statList def compileResults(self, holdPeriodList): ''' Compile the results extracted from getGapStats() The rows are indexed with RELATIVE RANK The columns are Count (For all stocks), also compute stock based results. E, P, R fraction. Win Rate: The fraction of actual fades Anti: Average profit on winning fade trades With: Average loss on losing fade trades Exp: Expectation of profit Args: Hold period list, should be consistent with getGapStats() Returns: Matrix with the following column convention 0: Count, 1: E, 2: P, 3: R, 4: P(S), 5: Anti, 6: With, 7:Exp ''' self.timeWiseStats = {} self.cumStats = {} for hold in holdPeriodList: # numStocks rows, column mapping is given above if self.mode == 'relative': numRows = len(self.stockList) elif self.mode == 'percentile': numRows = int(100 / self.bucketSize) else: numRows = (2 * self.numBucket) + 1 self.cumStats[hold] = np.zeros((numRows, 8)) self.timeWiseStats[hold] = {} if (self.logBucket): # Stores a list of timetamps for each bucket # Stores a list of self.bucketTimeList = [] self.bucketTradeList = [] tmpDict = {key: [] for key in self.stockList} for i in range(numRows): self.bucketTimeList.append(list()) self.bucketTradeList.append(tmpDict.copy()) for stockId in range(len(self.stockList)): stock = self.stockList[stockId] for i in range(len(self.results[stock])): tmpStats = self.results[stock][i] time = tmpStats['time'] hold = tmpStats['holdPeriod'] currOpen = tmpStats['currOpen'] prevClose = tmpStats['prevClose'] minPrice = tmpStats['min'] maxPrice = tmpStats['max'] finClose = tmpStats['finClose'] gapRatio = tmpStats['gapRatio'] gapSize = tmpStats['gapSize'] # volatility= tmpStats['volatility'] posEntry = tmpStats['posEntryPrice'] finClose = tmpStats['finClose'] volatility = tmpStats['stockVolAfterNorm'] #Hedging support, not technically hedging, just offsetting with respect to the index return if (self.hedgeFlag): hedge = ( (tmpStats['finCloseBM'] - tmpStats['posEntryPriceBM']) / tmpStats['posEntryPriceBM']) * tmpStats['Beta'] if (self.divideByVol): hedge /= volatility # Initial 8 elements represent the standard stats # The last ones will be used for ranking later tmpArr = np.zeros(12) tmpArr[0] += 1 tmpArr[8] = stockId tmpArr[9] = gapSize tmpArr[10] = gapRatio tmpArr[11] = stockId targetPrice = finClose profit = ((-np.sign(currOpen - prevClose)) * ((targetPrice - posEntry) / posEntry)) if (self.divideByVol): profit /= volatility if (self.hedgeFlag): profit -= (-np.sign(currOpen - prevClose)) * hedge fillFlag = np.sign(profit) if (fillFlag < 0): # Refers to the E case i.e. extension tmpArr[1] += 1 tmpArr[6] += profit else: if ((currOpen - prevClose) * (prevClose - targetPrice) < 0): # Refers to the P case i.e. partial fill tmpArr[2] += 1 else: # Refers to the R case i.e. reversal tmpArr[3] += 1 # Adding profits tmpArr[5] += profit # Adding the result to the corresponding time in the dict if (time not in self.timeWiseStats[hold]): self.timeWiseStats[hold][time] = [] self.timeWiseStats[hold][time].append(tmpArr) for hold in holdPeriodList: if self.mode == 'percentile': minSize = self.minSize maxSize = self.maxSize self.gapQueue = deque([], maxlen=maxSize) self.orderedGaps = SortedList(load=50) for time in sorted(self.timeWiseStats[hold].keys()): if (self.mode == 'relative'): # Sort the list according to the magnitude of gap size self.timeWiseStats[hold][time].sort( key=lambda x: np.abs(x[-1]), reverse=True) for i in range(len(self.timeWiseStats[hold][time])): self.cumStats[hold][i] += self.timeWiseStats[hold][ time][i][:8] elif (self.mode == 'percentile'): newGapLen = len(self.timeWiseStats[hold][time]) newValList = [] # If there are enough elements for identifying percentile if (len(self.gapQueue) >= minSize): for i in range(newGapLen): searchKey = self.timeWiseStats[hold][time][i][10] # if (self.absFlag): # searchKey = np.abs(searchKey) percentile = self.orderedGaps.bisect_left( searchKey) currSize = len(self.gapQueue) # To avoid having percentile as 1.0, since percentile <= percSize + 1 percentile = percentile / (currSize + 2.0) row = int(percentile * int(100 / self.bucketSize)) # print row self.cumStats[hold][row] += self.timeWiseStats[ hold][time][i][:8] if (self.tTestFlag): self.profitByGapPercentile[int( percentile * 100)].append( self.timeWiseStats[hold][time][i][5] + self.timeWiseStats[hold][time][i][6]) if (self.logBucket): # Adding time to this bucket's list self.bucketTimeList[row].append(time) # Since at least one of these is zero, by construction profit = self.timeWiseStats[hold][time][i][ 5] + self.timeWiseStats[hold][time][i][6] stockId = int( self.timeWiseStats[hold][time][i][11]) self.bucketTradeList[row][ self.stockList[stockId]].append(profit) bucketTradeListGlobal[row][ self.stockList[stockId]].append(profit) # Updating the queue and removing elements from the tree for i in range(newGapLen): lastVal = self.gapQueue.popleft() self.orderedGaps.remove(lastVal) for i in range(newGapLen): searchKey = self.timeWiseStats[hold][time][i][10] # if (self.absFlag): # searchKey = np.abs(searchKey) newValList.append(searchKey) # Adding the new values to the queue simultaneously self.gapQueue.extend(newValList) # Adding the new values to the tree simultaneously self.orderedGaps.update(newValList) else: for i in range(len(self.timeWiseStats[hold][time])): # Sort the list according to the magnitude of gap size gapRatio = self.timeWiseStats[hold][time][i][10] # Get the position in the matrix, note that the bucket sizes are of size 10% bucket = int( np.sign(gapRatio) * int(np.abs(gapRatio * 10) / self.bucketSize)) bucket = int( np.sign(bucket) * self.numBucket ) if np.abs(bucket) >= self.numBucket else bucket row = self.numBucket + bucket self.cumStats[hold][row] += self.timeWiseStats[hold][ time][i][:8] def tTestWrapper(self, profitByGapPercentile, verbose=True): """ Tries various boundary values and gets the stats for each value from 1..99 as the boundary for percentile and Perfroms T Test on the profits >=value and <=value arrays """ print(''.join(['*'] * 50)) print("Cumulative Stats") if (self.tTestFlag): for i in range(10, 100, 10): tValue, pValue = self.getTTestScores(i, profitByGapPercentile) if (verbose): print("Boundary: " + str(i)) print("T Value: " + str(tValue)) print("P Value: " + str(pValue)) def getProfitGapPercentile(self): return self.profitByGapPercentile def finalizeStats(self, holdPeriodList): ''' Finally processes the stats matrices, note that the resulting matrices cannot be compiled again directly as frequencies have become probs ''' for hold in holdPeriodList: self.cumStats[hold] = processStatMatrix(self.cumStats[hold]) def plotDistribution(self, plotSeries, saveAsFile=False, logValues=False): ''' Plots a histogram for the given plotsSeries Args: saveAsFile: Whether to save to file or plotting on screen logValues: Whether the y axis is log scaled Return: None, side effects could include saving a file ''' stDev = np.std(plotSeries) #xLabels from -3*sigma to 3*sigma xLabels = np.array(range(-3, 4)) * stDev plt.figure(figsize=(100, 100)) fig, ax = plt.subplots(1, 1) axes = plt.gca() plt.hist(plotSeries, bins=100, log=logValues) plt.xlabel("Normalized Gap Size") plt.ylabel("Number of Gap Sizes") axes.set_xlim([xLabels[0] - 0.5, xLabels[-1] + 0.5]) ax.set_xticks(xLabels) plt.tight_layout() if (saveAsFile): plt.savefig("results/gapDistribution.svg") else: plt.show()