def most_recent_data(bundle_name, timestamp, environ=None): """Get the path to the most recent data after ``date``for the given bundle. Parameters ---------- bundle_name : str The name of the bundle to lookup. timestamp : datetime The timestamp to begin searching on or before. environ : dict, optional An environment dict to forward to zipline_root. """ if bundle_name not in bundles: raise UnknownBundle(bundle_name) try: candidates = os.listdir(pth.data_path([bundle_name], environ=environ)) return pth.data_path( [bundle_name, max(filter(complement(pth.hidden), candidates), key=from_bundle_ingest_dirname)], environ=environ, ) except (ValueError, OSError) as e: if getattr(e, "errno", ~errno.ENOENT) != errno.ENOENT: raise raise ValueError("no data for bundle %r on or before %s" % (bundle_name, timestamp))
def _expect_element(collection): template = ( "%(funcname)s() expected a value in {collection} " "for argument '%(argname)s', but got %(actual)s instead.").format( collection=collection) return make_check( ValueError, template, complement(op.contains(collection)), repr, )
def _expect_element(collection): template = ( "%(funcname)s() expected a value in {collection} " "for argument '%(argname)s', but got %(actual)s instead." ).format(collection=collection) return make_check( ValueError, template, complement(op.contains(collection)), repr, )
def clean_dir_info(obj): """ If needing to display a dictionary of some class that doesnt have __dict__, the boltons.dir_dict will work, but this cleans it up for serializing """ try: d = dir_dict(obj) except Exception: d = dict() d1 = keyfilter(complement(_is_dunder), d) return dictfilter(d1, factory = AttributeDict)
def read_results_frame(output_dir: str) -> DataFrame: """ Reads the results.json file into a pandas DataFrame. :param output_dir: The name of the output directory of the grid search. :returns: The results as a pandas data frame. Excludes the cross validation run lists (but keeps the cross validation mean values). :raises ValueError: If the output directory doesn't have a ``results.json`` file in it. """ results = read_results(output_dir) return DataFrame(data=listmap( lambda r: keyfilter(complement(_frame_exclude_col), r), results))
def compare_local_remote_versions( local_versions: Iterable[str], remotes: Iterable[Remote], worker_count: int, ) -> Tuple[Tuple[Remote, str]]: """Returns the list of remotes_with_new_versions with versions greater than the maximum local one""" max_version_local = max(set( filter(complement(_is_live_version), local_versions)), default="") return tuple( filter( lambda remote_version: version_compare(remote_version[1], max_version_local) > 0, process_remotes_list(remotes, worker_count=worker_count), ))
def _expect_element(collection): if isinstance(collection, (set, frozenset)): # Special case the error message for set and frozen set to make it # less verbose. collection_for_error_message = tuple(sorted(collection)) else: collection_for_error_message = collection template = ( "%(funcname)s() expected a value in {collection} " "for argument '%(argname)s', but got %(actual)s instead." ).format(collection=collection_for_error_message) return make_check( ValueError, template, complement(op.contains(collection)), repr, )
def _expect_element(collection): if isinstance(collection, (set, frozenset)): # Special case the error message for set and frozen set to make it # less verbose. collection_for_error_message = tuple(sorted(collection)) else: collection_for_error_message = collection template = ( "%(funcname)s() expected a value in {collection} " "for argument '%(argname)s', but got %(actual)s instead.").format( collection=collection_for_error_message) return make_check( ValueError, template, complement(op.contains(collection)), repr, )
def make_while_loop_test_expr(loop_body_instrs): """ Make an expression in the context of a while-loop test. Code of the form:: while <expr>: <body> generates a POP_JUMP_IF_FALSE for the loop test, while code of the form:: while not <expr>: <body> generates a POP_JUMP_IF_TRUE for the loop test. Code of the form:: while True: <body> generates no jumps at all. """ bottom_of_loop = loop_body_instrs[-1] is_jump_to_bottom = compose(op.is_(bottom_of_loop), op.attrgetter('arg')) # Consume instructions until we find a jump to the bottom of the loop. test_builders = deque( popwhile(complement(is_jump_to_bottom), loop_body_instrs, side='left') ) # If we consumed the entire loop body without finding a jump, assume this # is a while True loop. Return the rest of the instructions as the loop # body. if not loop_body_instrs: return ast.NameConstant(value=True), test_builders # Top of the body is either a POP_JUMP_IF_TRUE or POP_JUMP_IF_FALSE. jump = loop_body_instrs.popleft() expr = make_expr(test_builders) if isinstance(jump, instrs.POP_JUMP_IF_TRUE): return ast.UnaryOp(op=ast.Not(), operand=expr), loop_body_instrs else: return expr, loop_body_instrs
def _dict_contains(small_dict: Dict[str, Any], big_dict: Dict[str, Any]) -> bool: """ Returns true if ``small_dict`` is contained by ``big_dict``. :param small_dict: The smaller dict that may or may not be contained by the bigger dict. :param big_dict: The bigger dict that may or may not contain the smaller dict. :returns: True if the bigger dict contains the smaller dict. """ # Remove the list values from big_dict because they are unhashable. # The implicit assumption is that small_dict doesn't have list elements. # Since this is called in the context of model hyperparameters I think # this is a safe assumption. return set(small_dict.items()) <= \ set(valfilter(complement(listels), big_dict).items())
def most_recent_data(bundle_name, timestamp=None, environ=None): """Get the path to the most recent data after ``date``for the given bundle. Parameters ---------- bundle_name : str The name of the bundle to lookup. timestamp : datetime The timestamp to begin searching on or before. environ : dict, optional An environment dict to forward to zipline_root. """ if timestamp is None: timestamp = pd.Timestamp.utcnow() if bundle_name not in bundles: raise UnknownBundle(bundle_name) try: candidates = os.listdir( pth.data_path([bundle_name], environ=environ), ) return pth.data_path( [ bundle_name, max( filter(complement(pth.hidden), candidates), key=from_bundle_ingest_dirname, ) ], environ=environ, ) except (ValueError, OSError) as e: if getattr(e, 'errno', errno.ENOENT) != errno.ENOENT: raise raise ValueError( 'no data for bundle {bundle!r} on or before {timestamp}\n' 'maybe you need to run: $ zipline ingest -b {bundle}'.format( bundle=bundle_name, timestamp=timestamp, ), )
def dichotomize(predicate, iterable): """Take a predicate and an iterable and return the pair of iterables of elements which do and do not satisfy the predicate. Parameters ---------- predicate : callable[any, bool] The predicate function to partition with. iterable : iterable[any] The elements to partition. Returns ------- trues : iterable[any] The sequence of values where the predicate evaluated to True. falses : iterable[any] The sequence of values where the predicate evaluated to False. Notes ----- This is a lazy version of: .. code-block:: Python def partition(predicate, sequence): sequence = list(sequence) return ( filter(predicate, sequence), filter(complement(predicate), sequence), ) """ true_queue = deque() false_queue = deque() it = iter(iterable) return ( _predicate_iter(it, predicate, true_queue, false_queue), _predicate_iter(it, complement(predicate), false_queue, true_queue), )
def most_recent_data(bundle_name, timestamp, environ=None): """Get the path to the most recent data after ``date``for the given bundle. Parameters ---------- bundle_name : str The name of the bundle to lookup. timestamp : datetime The timestamp to begin searching on or before. environ : dict, optional An environment dict to forward to catalyst_root. """ if bundle_name not in bundles: raise UnknownBundle(bundle_name) try: candidates = os.listdir( pth.data_path([bundle_name], environ=environ), ) return pth.data_path( [bundle_name, max( filter(complement(pth.hidden), candidates), key=from_bundle_ingest_dirname, )], environ=environ, ) except (ValueError, OSError) as e: if getattr(e, 'errno', errno.ENOENT) != errno.ENOENT: raise raise ValueError( 'no data for bundle {bundle!r} on or before {timestamp}\n' 'maybe you need to run: $ catalyst ingest -b {bundle}'.format( bundle=bundle_name, timestamp=timestamp, ), )
def get_higher_versions_in_repology( package: Package, repology_cache: dict ) -> Optional[Iterable]: """Returns all versions from repology which are higher than the ones in package. If the package is not in repology cache - returns None""" repology_name = _get_repology_name_for_pkg( atomname=package.atomname, cache=repology_cache ) if repology_name is None: return None my_versions = tuple( map( compose(Version, strip_revision), filter(complement(_is_live_version), package.versions), ) ) if my_versions: max_version = max(my_versions).value op = compose(lambda x: x < 0, partial(version_compare, max_version)) return filter(op, _get_versions_from_repology_repos(repology_name)) else: return _get_versions_from_repology_repos(repology_name)
def get_dirs_and_files_in_path(path): # filter function def isdir(a): return os.path.isdir(a) # gives the opposite results as above not_isdir = toolz.complement(isdir) if not path and platform.system() == 'Windows': import win32api drives = win32api.GetLogicalDriveStrings() drives = [d for d in drives.split('\000') if d] return drives elif os.path.exists(path): r = os.listdir(path) # 2x acccess means I have to remove the generator f = [os.path.join(path, a) for a in r] dirs = filter(isdir, f) files = filter(not_isdir, f) else: try: head, tail = os.path.split(path) r = os.listdir(head) filtered_everything = filter(lambda a: a.startswith(tail), r) # because this was accesssed twice, I needed to remove the generator filtered_everything = [os.path.join(head, a) for a in filtered_everything] dirs = filter(isdir, filtered_everything) files = filter(not_isdir, filtered_everything) except Exception as e: print('{0} doesn\'t even exist you stupid'.format(head)) return None result = (sorted(list(toolz.take(100, dirs))), sorted(list(toolz.take(100, files)))) return result
class HQRules(RuleBasedStateMachine): items_received = Bundle('received items') def __init__(self): super().__init__() self.items_added = [] self.n_items_received = 0 self.hq = None @property def n_items_added(self): return len(self.items_added) def has_hq(self): return self.hq is not None def backlog_is_empty(self): return self.n_items_received == self.n_items_added def backlog_is_too_full(self): return ( self.has_hq() and self.hq.max_backlog and (self.n_items_added - self.n_items_received == self.hq.max_backlog) ) def history_is_full(self): return self.has_hq() and self.hq.history_full() @precondition(complement(has_hq)) @rule( history_size=st.one_of(st.just(None), st.integers(min_value=0, max_value=4)), max_backlog=st.integers(min_value=0, max_value=4), ) def initialize(self, history_size, max_backlog): self.hq = HistoryQueue(history_size, max_backlog=max_backlog) @preconditions(has_hq, complement(backlog_is_too_full)) @rule(item=anything()) def put(self, item): self.hq.put_nowait(item) self.items_added.append(item) @precondition(backlog_is_too_full) @rule(item=anything()) def attempt_to_put(self, item): assert self.hq.backlog_full() with pytest.raises(asyncio.QueueFull): self.hq.put_nowait(item) @preconditions(has_hq, backlog_is_empty) def attempt_to_get(self): assert self.hq.backlog_empty() with pytest.raises(asyncio.QueueEmpty): self.hq.get_nowait() @precondition(complement(backlog_is_empty)) @rule(target=items_received) def get(self): item = ReceivedItem(self.n_items_received, self.hq.get_nowait()) print(item) self.n_items_received += 1 return item @precondition(history_is_full) def check_full(self): assert self.hq.history_full() @rule(item=items_received) def check_item(self, item): if self.hq.history_size is None or item[0] <= self.hq.history_size: self.check_item_not_filled_yet(item) else: self.check_item_filled(item) def check_item_filled(self, item_off_bundle): i, item = item_off_bundle assert len(item) == self.hq.history_size + 1 for item_added, item_received in zip(self.items_added[i - self.hq.history_size:], reversed(item)): assert item_added is item_received def check_item_not_filled_yet(self, item_off_bundle): i, item = item_off_bundle assert len(item) == i + 1 for item_added, item_received in zip(self.items_added, reversed(item)): assert item_added is item_received
def clean(name, before=None, after=None, keep_last=None, environ=os.environ): """Clean up data that was created with ``ingest`` or ``$ python -m zipline ingest`` Parameters ---------- name : str The name of the bundle to remove data for. before : datetime, optional Remove data ingested before this date. This argument is mutually exclusive with: keep_last after : datetime, optional Remove data ingested after this date. This argument is mutually exclusive with: keep_last keep_last : int, optional Remove all but the last ``keep_last`` ingestions. This argument is mutually exclusive with: before after environ : mapping, optional The environment variables. Defaults of os.environ. Returns ------- cleaned : set[str] The names of the runs that were removed. Raises ------ BadClean Raised when ``before`` and or ``after`` are passed with ``keep_last``. This is a subclass of ``ValueError``. """ try: all_runs = sorted( filter( complement(pth.hidden), os.listdir(pth.data_path([name], environ=environ)), ), key=from_bundle_ingest_dirname, ) except OSError as e: if e.errno != errno.ENOENT: raise raise UnknownBundle(name) if ((before is not None or after is not None) and keep_last is not None): raise BadClean(before, after, keep_last) if keep_last is None: def should_clean(name): dt = from_bundle_ingest_dirname(name) return ( (before is not None and dt < before) or (after is not None and dt > after) ) else: last_n_dts = set(all_runs[-keep_last:]) def should_clean(name): return name not in last_n_dts cleaned = set() for run in all_runs: if should_clean(run): path = pth.data_path([name, run], environ=environ) shutil.rmtree(path) cleaned.add(path) return cleaned
def not_correct_order(lines): valid = pipe( all_indices(lines), complement(monotonic), ) return (valid, 'Start and end tags are not in the correct order.')
def complement(x, object=None): return λ[toolz.complement(x)] if object == None else x[toolz.complement(object)]
def ingest(name, environ=os.environ, timestamp=None, assets_versions=(), show_progress=False, writer="bcolz"): """Ingest data for a given bundle. Parameters ---------- name : str The name of the bundle. environ : mapping, optional The environment variables. By default this is os.environ. timestamp : datetime, optional The timestamp to use for the load. By default this is the current time. assets_versions : Iterable[int], optional Versions of the assets db to which to downgrade. show_progress : bool, optional Tell the ingest function to display the progress where possible. incremental : bool, optional Tell the ingest function to incremental ingest """ try: bundle = bundles[name] except KeyError: raise UnknownBundle(name) calendar = get_calendar(bundle.calendar_name) start_session = bundle.start_session end_session = bundle.end_session if start_session is None or start_session < calendar.first_session: start_session = calendar.first_session if end_session is None or end_session > calendar.last_session: end_session = calendar.last_session try: candidates = os.listdir(pth.data_path([name], environ=environ), ) timestr = max( filter(complement(pth.hidden), candidates), key=from_bundle_ingest_dirname, ) except Exception: if timestamp is None: timestamp = pd.Timestamp.utcnow() timestamp = timestamp.tz_convert('utc').tz_localize(None) timestr = to_bundle_ingest_dirname(timestamp) cachepath = cache_path(name, environ=environ) pth.ensure_directory(pth.data_path([name, timestr], environ=environ)) pth.ensure_directory(cachepath) with dataframe_cache(cachepath, clean_on_failure=False) as cache, \ ExitStack() as stack: # we use `cleanup_on_failure=False` so that we don't purge the # cache directory if the load fails in the middle if bundle.create_writers: wd = stack.enter_context( working_dir(pth.data_path([], environ=environ))) daily_bars_path = daily_equity_path(name, timestr, environ=environ) pth.ensure_directory(daily_bars_path) daily_bar_writer = BcolzDailyBarWriter( daily_bars_path, calendar, start_session, end_session, ) # Do an empty write to ensure that the daily ctables exist # when we create the SQLiteAdjustmentWriter below. The # SQLiteAdjustmentWriter needs to open the daily ctables so # that it can compute the adjustment ratios for the dividends. daily_bar_writer.write(()) minute_bars_path = minute_equity_path(name, timestr, environ=environ) pth.ensure_directory(minute_bars_path) if writer == "rocksdb": minute_bar_writer = RocksdbMinuteBarWriter( minute_bars_path, calendar, start_session, end_session, minutes_per_day=bundle.minutes_per_day, ) else: minute_bar_writer = BcolzMinuteBarWriter( minute_bars_path, calendar, start_session, end_session, minutes_per_day=bundle.minutes_per_day, ) wd.ensure_dir( name, timestr, ) asset_db_writer = AssetDBWriter( wd.getpath(*asset_db_relative( name, timestr, environ=environ, ))) fundamental_db_writer = FundamentalWriter( wd.getpath(*fundamental_db_releative( name, timestr, environ=environ))) adjustment_db_writer = stack.enter_context( SQLiteAdjustmentWriter( wd.getpath(*adjustment_db_relative( name, timestr, environ=environ)), BcolzDailyBarReader(daily_bars_path), calendar.all_sessions, overwrite=True, )) else: daily_bar_writer = None minute_bar_writer = None asset_db_writer = None adjustment_db_writer = None fundamental_db_writer = None if assets_versions: raise ValueError('Need to ingest a bundle that creates ' 'writers in order to downgrade the assets' ' db.') bundle.ingest( environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_db_writer, fundamental_db_writer, calendar, start_session, end_session, cache, show_progress, pth.data_path([name, timestr], environ=environ), ) for version in sorted(set(assets_versions), reverse=True): version_path = wd.getpath(*asset_db_relative( name, timestr, environ=environ, db_version=version, )) with working_file(version_path) as wf: shutil.copy2(assets_db_path, wf.path) downgrade(wf.path, version)
def not_a(type_): """More curryable version of not isinstance.""" return complement(is_a(type_))
def clean_text(text: str) -> str: return filter_text(text, complement(is_control_char))
def remove_control_chars(string): return "".join(list(filter(complement(is_control_char), string)))
def default_if_true(predicate, default): """Implements the rule: default if v else v""" predicate = complement(predicate) return default_if_false(predicate, default)
def ingest(name, environ=os.environ, timestamp=None, assets_versions=(), show_progress=False, writer="bcolz"): """Ingest data for a given bundle. Parameters ---------- name : str The name of the bundle. environ : mapping, optional The environment variables. By default this is os.environ. timestamp : datetime, optional The timestamp to use for the load. By default this is the current time. assets_versions : Iterable[int], optional Versions of the assets db to which to downgrade. show_progress : bool, optional Tell the ingest function to display the progress where possible. incremental : bool, optional Tell the ingest function to incremental ingest """ try: bundle = bundles[name] except KeyError: raise UnknownBundle(name) calendar = get_calendar(bundle.calendar_name) start_session = bundle.start_session end_session = bundle.end_session if start_session is None or start_session < calendar.first_session: start_session = calendar.first_session if end_session is None or end_session > calendar.last_session: end_session = calendar.last_session try: candidates = os.listdir( pth.data_path([name], environ=environ), ) timestr = max( filter(complement(pth.hidden), candidates), key=from_bundle_ingest_dirname, ) except Exception: if timestamp is None: timestamp = pd.Timestamp.utcnow() timestamp = timestamp.tz_convert('utc').tz_localize(None) timestr = to_bundle_ingest_dirname(timestamp) cachepath = cache_path(name, environ=environ) pth.ensure_directory(pth.data_path([name, timestr], environ=environ)) pth.ensure_directory(cachepath) with dataframe_cache(cachepath, clean_on_failure=False) as cache, \ ExitStack() as stack: # we use `cleanup_on_failure=False` so that we don't purge the # cache directory if the load fails in the middle if bundle.create_writers: wd = stack.enter_context(working_dir( pth.data_path([], environ=environ)) ) daily_bars_path = daily_equity_path(name, timestr, environ=environ) pth.ensure_directory( daily_bars_path ) daily_bar_writer = BcolzDailyBarWriter( daily_bars_path, calendar, start_session, end_session, ) # Do an empty write to ensure that the daily ctables exist # when we create the SQLiteAdjustmentWriter below. The # SQLiteAdjustmentWriter needs to open the daily ctables so # that it can compute the adjustment ratios for the dividends. daily_bar_writer.write(()) minute_bars_path = minute_equity_path(name, timestr, environ=environ) pth.ensure_directory( minute_bars_path ) if writer == "rocksdb": minute_bar_writer = RocksdbMinuteBarWriter( minute_bars_path, calendar, start_session, end_session, minutes_per_day=bundle.minutes_per_day, ) else: minute_bar_writer = BcolzMinuteBarWriter( minute_bars_path, calendar, start_session, end_session, minutes_per_day=bundle.minutes_per_day, ) wd.ensure_dir( name, timestr, ) asset_db_writer = AssetDBWriter( wd.getpath(*asset_db_relative( name, timestr, environ=environ, )) ) fundamental_db_writer = FundamentalWriter( wd.getpath(*fundamental_db_releative( name, timestr, environ=environ )) ) adjustment_db_writer = stack.enter_context( SQLiteAdjustmentWriter( wd.getpath(*adjustment_db_relative( name, timestr, environ=environ)), BcolzDailyBarReader(daily_bars_path), calendar.all_sessions, overwrite=True, ) ) else: daily_bar_writer = None minute_bar_writer = None asset_db_writer = None adjustment_db_writer = None fundamental_db_writer = None if assets_versions: raise ValueError('Need to ingest a bundle that creates ' 'writers in order to downgrade the assets' ' db.') bundle.ingest( environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_db_writer, fundamental_db_writer, calendar, start_session, end_session, cache, show_progress, pth.data_path([name, timestr], environ=environ), ) for version in sorted(set(assets_versions), reverse=True): version_path = wd.getpath(*asset_db_relative( name, timestr, environ=environ, db_version=version, )) with working_file(version_path) as wf: shutil.copy2(assets_db_path, wf.path) downgrade(wf.path, version)
# -*- encoding: utf-8 -*- import toolz print_list = lambda x: print(list(x)) l = list(range(25)) # 1. use toolz.curry instead functools.partial @toolz.curry def not_multiple_of(x, y): return y % x != 0 print_list(filter(not_multiple_of(3), l)) print_list(filter(not_multiple_of(7), l)) print('-' * 20, '\n') # 2. use toolz.remove print_list(toolz.remove(toolz.complement(not_multiple_of(3)), l)) print_list(toolz.remove(toolz.complement(not_multiple_of(7)), l))
def check(condition, exception): return do_if(toolz.complement(condition), make_raise(exception))
def clean(name, before=None, after=None, keep_last=None, environ=os.environ): """Clean up data that was created with ``ingest`` or ``$ python -m zipline ingest`` Parameters ---------- name : str The name of the bundle to remove data for. before : datetime, optional Remove data ingested before this date. This argument is mutually exclusive with: keep_last after : datetime, optional Remove data ingested after this date. This argument is mutually exclusive with: keep_last keep_last : int, optional Remove all but the last ``keep_last`` ingestions. This argument is mutually exclusive with: before after environ : mapping, optional The environment variables. Defaults of os.environ. Returns ------- cleaned : set[str] The names of the runs that were removed. Raises ------ BadClean Raised when ``before`` and or ``after`` are passed with ``keep_last``. This is a subclass of ``ValueError``. """ try: all_runs = sorted( filter( complement(pth.hidden), os.listdir(pth.data_path([name], environ=environ)), ), key=from_bundle_ingest_dirname, ) except OSError as e: if e.errno != errno.ENOENT: raise raise UnknownBundle(name) if ((before is not None or after is not None) and keep_last is not None): raise BadClean(before, after, keep_last) if keep_last is None: def should_clean(name): dt = from_bundle_ingest_dirname(name) return ((before is not None and dt < before) or (after is not None and dt > after)) elif keep_last >= 0: last_n_dts = set(take(keep_last, reversed(all_runs))) def should_clean(name): return name not in last_n_dts else: raise BadClean(before, after, keep_last) cleaned = set() for run in all_runs: if should_clean(run): path = pth.data_path([name, run], environ=environ) shutil.rmtree(path) cleaned.add(path) return cleaned
from zipline.utils.numpy_utils import repeat_last_axis AD_FIELD_NAME = 'asof_date' TS_FIELD_NAME = 'timestamp' SID_FIELD_NAME = 'sid' valid_deltas_node_types = ( bz.expr.Field, bz.expr.ReLabel, bz.expr.Symbol, ) traversable_nodes = ( bz.expr.Field, bz.expr.Label, ) is_invalid_deltas_node = complement(flip(isinstance, valid_deltas_node_types)) getname = op.attrgetter('__name__') class _ExprRepr(object): """Box for repring expressions with the str of the expression. Parameters ---------- expr : Expr The expression to box for repring. """ __slots__ = 'expr', def __init__(self, expr): self.expr = expr