def compute_down(expr, ec, profiler_output=None, compute_kwargs=None, odo_kwargs=None, **kwargs): """Compute down for blaze clients. Parameters ---------- expr : Expr The expression to send to the server. ec : Client The blaze client to compute against. namespace : dict[Symbol -> any], optional The namespace to compute the expression in. This will be amended to include that data for the server. By default this will just be the client mapping to the server's data. compute_kwargs : dict, optional Extra kwargs to pass to compute on the server. odo_kwargs : dict, optional Extra kwargs to pass to odo on the server. profile : bool, optional Should blaze server run cProfile over the computation of the expression and the serialization of the response. profiler_output : file-like object, optional A file like object to hold the profiling output from the server. If this is not passed then the server will write the data to the server's filesystem """ from .server import to_tree kwargs = keymap(u8, kwargs) tree = to_tree(expr) serial = ec.serial if profiler_output is not None: kwargs[u'profile'] = True kwargs[u'profiler_output'] = ':response' kwargs[u'compute_kwargs'] = keymap(u8, compute_kwargs or {}) kwargs[u'odo_kwargs'] = keymap(u8, odo_kwargs or {}) r = post( ec, '/compute', data=serial.dumps(assoc(kwargs, u'expr', tree)), auth=ec.auth, headers=mimetype(serial), ) if not ok(r): raise ValueError("Bad response: %s" % reason(r)) response = serial.loads(content(r)) if profiler_output is not None: profiler_output.write(response[u'profiler_output']) return serial.data_loads(response[u'data'])
def to_dict(self): return { 'is_started': self.is_started, 'title': self.title, 'grid': { str(i): [chain.to_dict() if chain else None for chain in column] for i, column in enumerate(self.grid) }, 'player_order': self.player_order, 'current_turn_player': self.current_turn_player, 'current_action_player': self.current_action_player, 'current_action_type': self.current_action_type.value, 'current_action_details': self.current_action_details, 'stock_availability': toolz.keymap(lambda b: b.value, self.stock_availability), 'money_by_player': self.money_by_player, 'stock_by_player': toolz.valmap( lambda stock_map: toolz.keymap(lambda brand: brand.value, stock_map), self.stock_by_player), 'user_data_by_id': self.user_data_by_id, 'tiles_remaining': self.tiles_remaining, 'cost_by_brand': toolz.keymap(lambda b: b.value, self.cost_by_brand), 'inactive_brands': [brand.value for brand in self.inactive_brands], 'active_brands': [brand.value for brand in self.active_brands], 'most_recently_placed_tile': None if not self.most_recently_placed_tile else self.most_recently_placed_tile.to_dict(), 'most_recent_actions': self.most_recent_actions, 'acquisition_resolution_queue': [{ 'player_id': details['player_id'], 'acquirer': details['acquirer'].value, 'acquiree': details['acquiree'].value, 'acquiree_cost_at_acquisition_time': details['acquiree_cost_at_acquisition_time'] } for details in self.acquisition_resolution_queue] }
def _scatter(self, data, workers=None, broadcast=False): """ Scatter data to local data dictionary Rather than send data out to the cluster we keep data local. However we do report to the scheduler that the local worker has the scattered data. This allows other workers to come by and steal this data if desired. Keywords like ``broadcast=`` do not work, however operations like ``.replicate`` work fine after calling scatter, which can fill in for this functionality. """ with log_errors(): if not (workers is None and broadcast is False): raise NotImplementedError( "Scatter from worker doesn't support workers or broadcast keywords" ) if isinstance(data, dict) and not all( isinstance(k, (bytes, str)) for k in data): d = yield self._scatter(keymap(tokey, data), workers, broadcast) raise gen.Return({k: d[tokey(k)] for k in data}) if isinstance(data, (list, tuple, set, frozenset)): keys = [] for x in data: try: keys.append(tokenize(x)) except: keys.append(str(uuid.uuid1())) data2 = dict(zip(keys, data)) elif isinstance(data, dict): keys = set(data) data2 = data else: raise TypeError("Don't know how to scatter %s" % type(data)) nbytes = valmap(sizeof, data2) # self.worker.data.update(data2) # thread safety matters self.worker.loop.add_callback(self.worker.data.update, data2) yield self.scheduler.update_data( who_has={key: [self.worker.address] for key in data2}, nbytes=valmap(sizeof, data2), client=self.id) if isinstance(data, dict): out = {k: Future(k, self) for k in data} elif isinstance(data, (tuple, list, set, frozenset)): out = type(data)([Future(k, self) for k in keys]) else: raise TypeError("Input to scatter must be a list or dict") for key in keys: self.futures[key].finish(type=None) raise gen.Return(out)
def loads(b): """ Transform bytestream back into Python value """ header_length, = struct.unpack('I', b[:4]) if header_length: header = msgpack.loads(b[4: header_length + 4], encoding='utf8') else: header = {} payload = b[header_length + 4:] if header.get('compression'): try: decompress = compressions[header['compression']]['decompress'] payload = decompress(payload) except KeyError: raise ValueError("Data is compressed as %s but we don't have this" " installed" % header['compression'].decode()) msg = msgpack.loads(payload, encoding='utf8') if header.get('decode'): if isinstance(msg, dict) and msg: msg = keymap(bytes.decode, msg) elif isinstance(msg, bytes): msg = msg.decode() else: raise TypeError("Asked to decode a %s" % type(msg).__name__) return msg
def loads(b): """ Transform bytestream back into Python value """ header_length, = struct.unpack('I', b[:4]) if header_length: header = msgpack.loads(b[4:header_length + 4], encoding='utf8') else: header = {} payload = b[header_length + 4:] if header.get('compression'): try: decompress = compressions[header['compression']]['decompress'] payload = decompress(payload) except KeyError: raise ValueError("Data is compressed as %s but we don't have this" " installed" % header['compression'].decode()) msg = msgpack.loads(payload, encoding='utf8') if header.get('decode'): if isinstance(msg, dict) and msg: msg = keymap(bytes.decode, msg) elif isinstance(msg, bytes): msg = msg.decode() else: raise TypeError("Asked to decode a %s" % type(msg).__name__) return msg
def create_branch(self, branch_name, files, parents=[], message='', signature=None): # 1. create tree files = toolz.keymap(lambda path: tuple(path.split('/')), files) files = unflatten(files) tree_id = self.create_tree(files) # 2. create commit with the tree created above # TODO(kszucs): pass signature explicitly author = committer = self.signature commit_id = self.repo.create_commit(None, author, committer, message, tree_id, parents) commit = self.repo[commit_id] # 3. create branch pointing to the previously created commit branch = self.repo.create_branch(branch_name, commit) # append to the pushable references self._updated_refs.append('refs/heads/{}'.format(branch_name)) return branch
def requirejson_wrapper(*args, **kwargs): # TODO(vishesh): malformed JSON gives 500 error, should give 400, # can't seem to catch the ValueError from json.loads try: # GET/DELETE have no body. PUT/PATCH/POST have bodies. r = None if (request.method in ['GET', 'DELETE'] or (request.method in ['POST', 'PUT', 'PATCH'] and 'json' not in request.content_type)): r = {k: request.params[k] for k in request.params} else: r = request.json except ValueError as e: jsonabort(400, ('Request should be parseable json, got error: ' '' + str(e.args))) if r == None: # the only time that r will be None is if the json part fails. # request.params being empty will give an empty dictionary instead, # so this logic is okay (don't need to change the expected # content-type based on the request method). jsonabort(400, ('Content-Type should be application/json, got ' '' + str(request.content_type))) if type(r) is not dict: jsonabort(400, 'Request must be a JSON object, not {}'.format( typename(r))) if not all(k in r for k in keys): jsonabort(400, 'Request is missing keys: ' + str(list(set(keys) - r.keys()))) if strict and not all(p in keys or p in opts for p in r): # since we know that all k in keys is present in r # if the lengths are unequal then for sure there are extra keys. jsonabort(400, 'Strict mode: request has unrecognized keys: ' + str(list(r.keys() - set(keys)))) p = t.keymap(lambda k: k.replace('-', '_'), t.merge(opts, r)) # python 3.5+ type checking, replace known types with variables. if sys.version_info[0] >= 3 and sys.version_info[1] >= 5: ann = req_fun.__annotations__ for (k, v) in p.items(): if k in ann: try: p[k] = ann[k](v) except: jsonabort(400, 'Parameter {} should be type {}, got {}'.format( k, ann[k], type(v))) overlap = set(kwargs) & set(p) if len(overlap) > 0: raise ValueError( 'keyword args being clobbered by json params: ' + str(overlap)) return req_fun(*args, **t.merge(kwargs, p))
def test_novel_deltas_macro(self): asset_info = asset_infos[0][0] base_dates = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-04') ]) baseline = pd.DataFrame({ 'value': (0, 1), 'asof_date': base_dates, 'timestamp': base_dates, }) expr = bz.Data(baseline, name='expr', dshape=self.macro_dshape) deltas = bz.Data(baseline, name='deltas', dshape=self.macro_dshape) deltas = bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ) nassets = len(asset_info) expected_views = keymap(pd.Timestamp, { '2014-01-03': repeat_last_axis( np.array([10.0, 10.0, 10.0]), nassets, ), '2014-01-06': repeat_last_axis( np.array([10.0, 10.0, 11.0]), nassets, ), }) cal = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), # omitting the 4th and 5th to simulate a weekend pd.Timestamp('2014-01-06'), ]) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([10] * nassets, [11] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value',), ) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=cal, start=cal[2], end=cal[-1], window_length=3, compute_fn=op.itemgetter(-1), )
def _sniff_dialect(self, path): kwargs = self._kwargs dialect = sniff_dialect(path, self._sniff_nbytes, encoding=self.encoding) kwargs = merge(dialect, keymap(alias, kwargs)) return valfilter(lambda x: x is not None, dict((d, kwargs[d]) for d in dialect_terms if d in kwargs))
def _scatter(self, data, workers=None, broadcast=False): """ Scatter data to local data dictionary Rather than send data out to the cluster we keep data local. However we do report to the scheduler that the local worker has the scattered data. This allows other workers to come by and steal this data if desired. Keywords like ``broadcast=`` do not work, however operations like ``.replicate`` work fine after calling scatter, which can fill in for this functionality. """ with log_errors(): if not (workers is None and broadcast is False): raise NotImplementedError("Scatter from worker doesn't support workers or broadcast keywords") if isinstance(data, dict) and not all(isinstance(k, (bytes, str)) for k in data): d = yield self._scatter(keymap(tokey, data), workers, broadcast) raise gen.Return({k: d[tokey(k)] for k in data}) if isinstance(data, (list, tuple, set, frozenset)): keys = [] for x in data: try: keys.append(tokenize(x)) except: keys.append(str(uuid.uuid1())) data2 = dict(zip(keys, data)) elif isinstance(data, dict): keys = set(data) data2 = data else: raise TypeError("Don't know how to scatter %s" % type(data)) nbytes = valmap(sizeof, data2) # self.worker.data.update(data2) # thread safety matters self.worker.loop.add_callback(self.worker.data.update, data2) yield self.scheduler.update_data( who_has={key: [self.worker.address] for key in data2}, nbytes=valmap(sizeof, data2), client=self.id) if isinstance(data, dict): out = {k: Future(k, self) for k in data} elif isinstance(data, (tuple, list, set, frozenset)): out = type(data)([Future(k, self) for k in keys]) else: raise TypeError( "Input to scatter must be a list or dict") for key in keys: self.futures[key]['status'] = 'finished' self.futures[key]['event'].set() raise gen.Return(out)
def test_novel_deltas(self, asset_info): base_dates = pd.DatetimeIndex([pd.Timestamp("2014-01-01"), pd.Timestamp("2014-01-04")]) repeated_dates = base_dates.repeat(3) baseline = pd.DataFrame( { "sid": self.sids * 2, "value": (0, 1, 2, 1, 2, 3), "asof_date": repeated_dates, "timestamp": repeated_dates, } ) expr = bz.Data(baseline, name="expr", dshape=self.dshape) deltas = bz.Data(baseline, name="deltas", dshape=self.dshape) deltas = bz.transform(deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1)) expected_views = keymap( pd.Timestamp, { "2014-01-03": np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [10.0, 11.0, 12.0]]), "2014-01-06": np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [11.0, 12.0, 13.0]]), }, ) if len(asset_info) == 4: expected_views = valmap(lambda view: np.c_[view, [np.nan, np.nan, np.nan]], expected_views) expected_output_buffer = [10, 11, 12, np.nan, 11, 12, 13, np.nan] else: expected_output_buffer = [10, 11, 12, 11, 12, 13] cal = pd.DatetimeIndex( [ pd.Timestamp("2014-01-01"), pd.Timestamp("2014-01-02"), pd.Timestamp("2014-01-03"), # omitting the 4th and 5th to simulate a weekend pd.Timestamp("2014-01-06"), ] ) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( expected_output_buffer, index=pd.MultiIndex.from_product( (sorted(expected_views.keys()), finder.retrieve_all(asset_info.index)) ), columns=("value",), ) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=cal, start=cal[2], end=cal[-1], window_length=3, compute_fn=op.itemgetter(-1), )
def _scatter(self, data, workers=None, broadcast=False, direct=None): """ Scatter data to local data dictionary Rather than send data out to the cluster we keep data local. However we do report to the scheduler that the local worker has the scattered data. This allows other workers to come by and steal this data if desired. Keywords like ``broadcast=`` do not work, however operations like ``.replicate`` work fine after calling scatter, which can fill in for this functionality. """ with log_errors(): if not (workers is None and broadcast is False): raise NotImplementedError("Scatter from worker doesn't support workers or broadcast keywords") if isinstance(data, dict) and not all(isinstance(k, (bytes, str)) for k in data): d = yield self._scatter(keymap(tokey, data), workers, broadcast) raise gen.Return({k: d[tokey(k)] for k in data}) if isinstance(data, type(range(0))): data = list(data) input_type = type(data) names = False unpack = False if isinstance(data, (set, frozenset)): data = list(data) if not isinstance(data, (dict, list, tuple, set, frozenset)): unpack = True data = [data] if isinstance(data, (list, tuple)): names = list(map(tokenize, data)) data = dict(zip(names, data)) types = valmap(type, data) assert isinstance(data, dict) self.worker.update_data(data=data, report=False) yield self.scheduler.update_data( who_has={key: [self.worker.address] for key in data}, nbytes=valmap(sizeof, data), client=self.id) out = {k: self._Future(k, self) for k in data} for key, typ in types.items(): self.futures[key].finish(type=typ) if issubclass(input_type, (list, tuple, set, frozenset)): out = input_type(out[k] for k in names) if unpack: assert len(out) == 1 out = list(out.values())[0] raise gen.Return(out)
def test_deltas(self, asset_info): expr = bz.Data(self.df, name='expr', dshape=self.dshape) deltas = bz.Data(self.df, dshape=self.dshape) deltas = bz.Data( odo( bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ), pd.DataFrame, ), name='delta', dshape=self.dshape, ) expected_views = keymap(pd.Timestamp, { '2014-01-02': np.array([[10.0, 11.0, 12.0], [1.0, 2.0, 3.0]]), '2014-01-03': np.array([[11.0, 12.0, 13.0], [2.0, 3.0, 4.0]]), '2014-01-04': np.array([[12.0, 13.0, 14.0], [12.0, 13.0, 14.0]]), }) nassets = len(asset_info) if nassets == 4: expected_views = valmap( lambda view: np.c_[view, [np.nan, np.nan]], expected_views, ) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([12] * nassets, [13] * nassets, [14] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value',), ) dates = self.dates dates = dates.insert(len(dates), dates[-1] + timedelta(days=1)) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )
def read(cls, rootdir): path = cls.metadata_path(rootdir) with open(path) as fp: raw_data = json.load(fp) try: version = raw_data['version'] except KeyError: # Version was first written with version 1, assume 0, # if version does not match. version = 0 default_ohlc_ratio = raw_data['ohlc_ratio'] if version >= 1: minutes_per_day = raw_data['minutes_per_day'] else: # version 0 always assumed US equities. minutes_per_day = US_EQUITIES_MINUTES_PER_DAY if version >= 2: calendar = get_calendar(raw_data['calendar_name']) start_session = pd.Timestamp(raw_data['start_session'], tz='UTC') end_session = pd.Timestamp(raw_data['end_session'], tz='UTC') else: # No calendar info included in older versions, so # default to SZSH. # # SZSH calendar = get_calendar() start_session = pd.Timestamp(raw_data['first_trading_day'], tz='UTC') end_session = calendar.minute_to_session_label( pd.Timestamp(raw_data['market_closes'][-1], unit='m', tz='UTC')) if version >= 3: ohlc_ratios_per_sid = raw_data['ohlc_ratios_per_sid'] if ohlc_ratios_per_sid is not None: ohlc_ratios_per_sid = keymap(int, ohlc_ratios_per_sid) else: ohlc_ratios_per_sid = None return cls( default_ohlc_ratio, ohlc_ratios_per_sid, calendar, start_session, end_session, minutes_per_day, version=version, )
def test_deltas(self, asset_info): expr = bz.Data(self.df, name='expr', dshape=self.dshape) deltas = bz.Data(self.df, dshape=self.dshape) deltas = bz.Data( odo( bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ), pd.DataFrame, ), name='delta', dshape=self.dshape, ) expected_views = keymap( pd.Timestamp, { '2014-01-02': np.array([[10.0, 11.0, 12.0], [1.0, 2.0, 3.0]]), '2014-01-03': np.array([[11.0, 12.0, 13.0], [2.0, 3.0, 4.0]]), '2014-01-04': np.array([[12.0, 13.0, 14.0], [12.0, 13.0, 14.0] ]), }) nassets = len(asset_info) if nassets == 4: expected_views = valmap( lambda view: np.c_[view, [np.nan, np.nan]], expected_views, ) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([12] * nassets, [13] * nassets, [14] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value', ), ) dates = self.dates dates = dates.insert(len(dates), dates[-1] + timedelta(days=1)) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )
def probabilities(self, probabilities: dict): if probabilities is None: self._probabilities = None elif isinstance(probabilities, str): self._probabilities = {validate_str(probabilities): 1.0} elif isinstance(probabilities, dict): if abs(sum(probabilities.values()) - 1.0) > 1.0e-9: raise ValueError("probabilities must sum to 1.0") self._probabilities = keymap(validate_str, probabilities) else: raise TypeError("probabilities must be dict or single value")
def read(cls, rootdir): path = cls.metadata_path(rootdir) with open(path) as fp: raw_data = json.load(fp) try: version = raw_data["version"] except KeyError: # Version was first written with version 1, assume 0, # if version does not match. version = 0 default_ohlc_ratio = raw_data["ohlc_ratio"] if version >= 1: minutes_per_day = raw_data["minutes_per_day"] else: # version 0 always assumed US equities. minutes_per_day = US_EQUITIES_MINUTES_PER_DAY if version >= 2: calendar = get_calendar(raw_data["calendar_name"]) start_session = pd.Timestamp(raw_data["start_session"], tz="UTC") end_session = pd.Timestamp(raw_data["end_session"], tz="UTC") else: # No calendar info included in older versions, so # default to NYSE. calendar = get_calendar("XNYS") start_session = pd.Timestamp(raw_data["first_trading_day"], tz="UTC") end_session = calendar.minute_to_session_label( pd.Timestamp(raw_data["market_closes"][-1], unit="m", tz="UTC")) if version >= 3: ohlc_ratios_per_sid = raw_data["ohlc_ratios_per_sid"] if ohlc_ratios_per_sid is not None: ohlc_ratios_per_sid = keymap(int, ohlc_ratios_per_sid) else: ohlc_ratios_per_sid = None return cls( default_ohlc_ratio, ohlc_ratios_per_sid, calendar, start_session, end_session, minutes_per_day, version=version, )
def read(cls, rootdir): path = cls.metadata_path(rootdir) with open(path) as fp: raw_data = json.load(fp) try: version = raw_data['version'] except KeyError: # Version was first written with version 1, assume 0, # if version does not match. version = 0 default_ohlc_ratio = raw_data['ohlc_ratio'] if version >= 1: minutes_per_day = raw_data['minutes_per_day'] else: # version 0 always assumed US equities. minutes_per_day = US_EQUITIES_MINUTES_PER_DAY if version >= 2: calendar = get_calendar(raw_data['calendar_name']) start_session = pd.Timestamp( raw_data['start_session'], tz='UTC') end_session = pd.Timestamp(raw_data['end_session'], tz='UTC') else: # No calendar info included in older versions, so # default to NYSE. calendar = get_calendar('NYSE') start_session = pd.Timestamp( raw_data['first_trading_day'], tz='UTC') end_session = calendar.minute_to_session_label( pd.Timestamp( raw_data['market_closes'][-1], unit='m', tz='UTC') ) if version >= 3: ohlc_ratios_per_sid = raw_data['ohlc_ratios_per_sid'] if ohlc_ratios_per_sid is not None: ohlc_ratios_per_sid = keymap(int, ohlc_ratios_per_sid) else: ohlc_ratios_per_sid = None return cls( default_ohlc_ratio, ohlc_ratios_per_sid, calendar, start_session, end_session, minutes_per_day, version=version, )
def __init__(self, path, has_header='no-input', encoding='utf-8', **kwargs): self.path = path if has_header == 'no-input': if not os.path.exists(path): self.has_header = True else: self.has_header = None else: self.has_header = has_header self.encoding = encoding kwargs = keymap(alias, kwargs) self.dialect = dict((d, kwargs[d]) for d in dialect_terms if d in kwargs)
def test_deltas_only_one_delta_in_universe(self, asset_info): expr = bz.Data(self.df, name='expr', dshape=self.dshape) deltas = pd.DataFrame({ 'sid': [65, 66], 'asof_date': [self.dates[1], self.dates[0]], 'timestamp': [self.dates[2], self.dates[1]], 'value': [10, 11], }) deltas = bz.Data(deltas, name='deltas', dshape=self.dshape) expected_views = keymap(pd.Timestamp, { '2014-01-02': np.array([[0.0, 11.0, 2.0], [1.0, 2.0, 3.0]]), '2014-01-03': np.array([[10.0, 2.0, 3.0], [2.0, 3.0, 4.0]]), '2014-01-04': np.array([[2.0, 3.0, 4.0], [2.0, 3.0, 4.0]]), }) nassets = len(asset_info) if nassets == 4: expected_views = valmap( lambda view: np.c_[view, [np.nan, np.nan]], expected_views, ) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( columns=[ 'value', ], data=np.array([11, 10, 4]).repeat(len(asset_info.index)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), ) dates = self.dates dates = dates.insert(len(dates), dates[-1] + timedelta(days=1)) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )
def __init__(self, path, has_header=None, encoding='utf-8', sniff_nbytes=10000, **kwargs): self.path = path if has_header is None: self.has_header = (not os.path.exists(path) or infer_header(path, sniff_nbytes)) else: self.has_header = has_header self.encoding = encoding if encoding is not None else 'utf-8' kwargs = merge(sniff_dialect(path, sniff_nbytes, encoding=encoding), keymap(alias, kwargs)) self.dialect = valfilter(bool, dict((d, kwargs[d]) for d in dialect_terms if d in kwargs))
def from_dict(state_data): new_state = GameState(state_data['title']) new_state.is_started = state_data['is_started'] new_state.grid = GameState._build_grid_from_firestore_map( state_data['grid']) new_state.player_order = state_data['player_order'] new_state.current_turn_player = state_data['current_turn_player'] new_state.current_action_player = state_data['current_action_player'] new_state.current_action_type = ActionType( state_data['current_action_type']) new_state.current_action_details = state_data['current_action_details'] new_state.stock_availability = { Brand(brand): stock_count for brand, stock_count in state_data['stock_availability'].items() } new_state.money_by_player = state_data['money_by_player'] new_state.stock_by_player = { player: { Brand(brand_value): amount for brand_value, amount in stock_map.items() } for player, stock_map in state_data['stock_by_player'].items() } new_state.user_data_by_id = state_data['user_data_by_id'] new_state.tiles_remaining = state_data['tiles_remaining'] new_state.cost_by_brand = toolz.keymap(Brand, state_data['cost_by_brand']) new_state.inactive_brands = [ Brand(brand_value) for brand_value in state_data['inactive_brands'] ] new_state.active_brands = [ Brand(brand_value) for brand_value in state_data['active_brands'] ] new_state.most_recently_placed_tile = None if not state_data[ 'most_recently_placed_tile'] else Tile( **state_data['most_recently_placed_tile']) new_state.most_recent_actions = state_data['most_recent_actions'] new_state.acquisition_resolution_queue = [{ 'player_id': details['player_id'], 'acquirer': Brand(details['acquirer']), 'acquiree': Brand(details['acquiree']), 'acquiree_cost_at_acquisition_time': details['acquiree_cost_at_acquisition_time'] } for details in state_data['acquisition_resolution_queue']] return new_state
def test_deltas_only_one_delta_in_universe(self, asset_info): expr = bz.Data(self.df, name='expr', dshape=self.dshape) deltas = pd.DataFrame({ 'sid': [65, 66], 'asof_date': [self.dates[1], self.dates[0]], 'timestamp': [self.dates[2], self.dates[1]], 'value': [10, 11], }) deltas = bz.Data(deltas, name='deltas', dshape=self.dshape) expected_views = keymap( pd.Timestamp, { '2014-01-02': np.array([[0.0, 11.0, 2.0], [1.0, 2.0, 3.0]]), '2014-01-03': np.array([[10.0, 2.0, 3.0], [2.0, 3.0, 4.0]]), '2014-01-04': np.array([[2.0, 3.0, 4.0], [2.0, 3.0, 4.0]]), }) nassets = len(asset_info) if nassets == 4: expected_views = valmap( lambda view: np.c_[view, [np.nan, np.nan]], expected_views, ) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( columns=[ 'value', ], data=np.array([11, 10, 4]).repeat(len(asset_info.index)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), ) dates = self.dates dates = dates.insert(len(dates), dates[-1] + timedelta(days=1)) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )
def __init__(self, path, has_header='no-input', encoding='utf-8', **kwargs): self.path = path if has_header == 'no-input': if not os.path.exists(path): self.has_header = True else: self.has_header = None else: self.has_header = has_header self.encoding = encoding kwargs = keymap(alias, kwargs) self.dialect = dict( (d, kwargs[d]) for d in dialect_terms if d in kwargs)
def __init__(self, path, has_header=None, encoding='utf-8', sniff_nbytes=10000, **kwargs): self.path = path if has_header is None: self.has_header = (not os.path.exists(path) or infer_header(path, sniff_nbytes)) else: self.has_header = has_header self.encoding = encoding if encoding is not None else 'utf-8' kwargs = merge(sniff_dialect(path, sniff_nbytes, encoding=encoding), keymap(alias, kwargs)) self.dialect = valfilter( bool, dict((d, kwargs[d]) for d in dialect_terms if d in kwargs))
def get_rights_positions(self, dts): # 获取当天为配股登记日的仓位 --- 卖出 因为需要停盘产生机会成本 assets = set(self.positions) # print('ledger assets', assets) rights = self.position_tracker.retrieve_equity_rights(assets, dts) # print('ledger rights', rights) mapping_protocol = keymap(lambda x: x.sid, self.positions) # print('ledger mapping_protocol', mapping_protocol) union_assets = set(mapping_protocol) & set(rights.index) # print('ledger union_assets', union_assets) union_positions = keyfilter(lambda x: x in union_assets, mapping_protocol) if union_assets else None # print('ledger union_positions', union_positions) right_positions = list( union_positions.values()) if union_positions else [] # print('right_positions', right_positions) return right_positions
def get_teams(event_comment: str) -> Teams: """ Parse the event comment to figure out who was playing on which team. :param event_comment: The comment from meetup saying what the teams are. :return: Teams dict with 'Red' and 'Blue' keys, the values are lists of player names. """ pattern = re.compile('\s*(red|blue):([^$]*)$', flags=re.IGNORECASE) teams = dict( pattern.findall(line) | to(list) | to(first) for line in event_comment.split('\n') if pattern.match(line)) teams = keymap(str.title, teams) teams = valmap(lambda x: [y.strip() for y in x.split(',')], teams) return teams
def patch_cacheops(g): REDIS_URL = g.get('REDIS_URL') if not REDIS_URL: return log_setting('CACHEOPS', 'is enabled') g['CACHEOPS_REDIS'] = keymap(str.lower, dj_redis_url.parse(REDIS_URL)) g['INSTALLED_APPS'].append('cacheops') g['CACHEOPS_DEGRADE_ON_FAILURE'] = True g['CACHEOPS_DEFAULTS'] = {'timeout': IN_SECONDS.FIFTEEN_MINUTES} g['CACHEOPS'] = { # Automatically cache any User.objects.get() calls for 15 minutes # This includes request.user or post.author access, # where Post.author is a foreign key to auth.User 'auth.user': {'ops': 'get'}, 'core.user': {'ops': 'get'}, # Automatically cache all gets and queryset fetches # to other django.contrib.auth models for an hour 'auth.*': {'ops': ('fetch', 'get'), 'timeout': IN_SECONDS.ONE_HOUR}, # Cache gets, fetches, counts and exists to Permission # 'all' is just an alias for ('get', 'fetch', 'count', 'exists') 'auth.permission': {'ops': 'all', 'timeout': IN_SECONDS.ONE_HOUR}, # Basically Never changing objects. Allow local_get (in memory) 'event.event': {'ops': 'all', 'local_get': True}, 'ticket.tickettype': {'ops': 'all', 'local_get': True}, 'ticket.tickettier': {'ops': 'all', 'local_get': True}, 'ticket.ticketaddontype': {'ops': 'all', 'local_get': False}, # Enable manual caching on all other models with default timeout of an hour # Use Post.objects.cache().get(...) # or Tags.objects.filter(...).order_by(...).cache() # to cache particular ORM request. # Invalidation is still automatic '*.*': {'ops': (), 'timeout': IN_SECONDS.ONE_HOUR}, # And since ops is empty by default you can rewrite last line as: '*.*': {'timeout': IN_SECONDS.ONE_HOUR}, }
def validated_payouts(payouts_in): """ This method validates json transactions. It ensures `recipient` addresses are valid ETH addresses, and expands `bucket` aliases into proper bucket_id's. """ # swap bucket name with matching ID payouts = [{ **x, 'bucket': buckets[x['bucket']], 'amount': float(x['amount'].replace(',', '')), } for x in (keymap(rename_field, y) for y in payouts_in)] # validate addresses for payout in payouts: validate_address(payout['recipient']) return payouts
def update_matches(date: datetime.date, teams: Teams, winner: typing.Literal['r', 'b', 'd']) -> List[PlayerMatch]: """ Update the matches json file containing a history of the matches that were played. If the match for a particular date already exists those values are overwritten with the new values. :param date: Date that the match took place. :param teams: Which players played on which colour team. :param winner: Which team was the winner (or draw) :return: The records from all matches that have been played. """ winner = {'b': 'Blue', 'r': 'Red', 'd': 'Draw'}[winner] with open(DIR / '../data/matches.json') as f: matches = json.load(f) matches_by_date = groupby(lambda x: x['date'], matches | to(list)) matches_by_date = keymap( lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').date(), matches_by_date, ) matches_by_date[date] = [{ 'date': date.strftime('%Y-%m-%d'), 'name': name, 'team': team, 'points': { team: 3, 'Draw': 1 }.get(winner, 0), } for team, names in teams.items() for name in names] with open(DIR / '../data/matches.json', 'w') as f: matches = [ match for date, matches in matches_by_date.items() for match in matches ] json.dump(sorted(matches, key=lambda x: x['date'], reverse=True), f, indent=2) return matches
def current_portfolio_weights(self): """ Compute each asset's weight in the portfolio by calculating its held value divided by the total value of all positions. Each equity's value is its price times the number of shares held. Each futures contract's value is its unit price times number of shares held times the multiplier. """ if self.positions: # due to asset varies from tag name --- different pipelines has the same sid p_values = valmap(lambda x: x.last_sync_price * x.amount, self.positions) p_values = keymap(lambda x: x.sid, p_values) aggregate = merge_with(sum, p_values) weights = pd.Series(aggregate) / self.portfolio_value else: weights = pd.Series(dtype='float') return weights.to_dict()
def test_deltas_macro(self): asset_info = asset_infos[0][0] expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape) deltas = bz.Data( self.macro_df.iloc[:-1], name='deltas', dshape=self.macro_dshape, ) deltas = bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ) nassets = len(asset_info) expected_views = keymap( pd.Timestamp, { '2014-01-02': repeat_last_axis(np.array([10.0, 1.0]), nassets), '2014-01-03': repeat_last_axis(np.array([11.0, 2.0]), nassets), }) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([10] * nassets, [11] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value', ), ) dates = self.dates self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )
def test_deltas_macro(self): asset_info = asset_infos[0][0] expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape) deltas = bz.Data( self.macro_df.iloc[:-1], name='deltas', dshape=self.macro_dshape, ) deltas = bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ) nassets = len(asset_info) expected_views = keymap(pd.Timestamp, { '2014-01-02': repeat_last_axis(np.array([10.0, 1.0]), nassets), '2014-01-03': repeat_last_axis(np.array([11.0, 2.0]), nassets), }) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([10] * nassets, [11] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value',), ) dates = self.dates self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )
def create_sentiment(): sa = vader.SentimentIntensityAnalyzer() import unicodecsv as csv comments_with_sentiment = ( toolz.merge( dict( id=d["id"], time=dt.datetime.utcfromtimestamp(float(d["created_utc"])).strftime("%Y-%m-%d %H:%M:%S"), ups=d["ups"], contr=d["controversiality"] ), toolz.keymap( lambda x: "vader_" + x, sa.polarity_scores(d["body"]) ), dict(zip( ["pattern_polarity", "pattern_subjectivity"], pattern.sentiment(d["body"]) )) ) for d in corpus.load_json( p=None, # just do all include_is=True, include_oos=True, #filter_deleted=False ) ) with open("data-sentiment/sentiment.csv", "w") as o: c = comments_with_sentiment.next() writer = csv.DictWriter(o, c.keys()) writer.writeheader() writer.writerow(c) for c in comments_with_sentiment: writer.writerow(c)
def test_novel_deltas_macro(self): asset_info = asset_infos[0][0] base_dates = pd.DatetimeIndex( [pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-04')]) baseline = pd.DataFrame({ 'value': (0, 1), 'asof_date': base_dates, 'timestamp': base_dates, }) expr = bz.Data(baseline, name='expr', dshape=self.macro_dshape) deltas = bz.Data(baseline, name='deltas', dshape=self.macro_dshape) deltas = bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ) nassets = len(asset_info) expected_views = keymap( pd.Timestamp, { '2014-01-03': repeat_last_axis( np.array([10.0, 10.0, 10.0]), nassets, ), '2014-01-06': repeat_last_axis( np.array([10.0, 10.0, 11.0]), nassets, ), }) cal = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), # omitting the 4th and 5th to simulate a weekend pd.Timestamp('2014-01-06'), ]) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([10] * nassets, [11] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value', ), ) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=cal, start=cal[2], end=cal[-1], window_length=3, compute_fn=op.itemgetter(-1), )
def schema(self): """ Examples -------- >>> from blaze import symbol >>> t = symbol('t', 'var * {name: string, amount: int}') >>> s = symbol('t', 'var * {name: string, id: int}') >>> join(t, s).schema dshape("{name: string, amount: int32, id: int32}") >>> join(t, s, how='left').schema dshape("{name: string, amount: int32, id: ?int32}") Overlapping but non-joined fields append _left, _right >>> a = symbol('a', 'var * {x: int, y: int}') >>> b = symbol('b', 'var * {x: int, y: int}') >>> join(a, b, 'x').fields ['x', 'y_left', 'y_right'] """ option = lambda dt: dt if isinstance(dt, Option) else Option(dt) on_left = self.on_left if not isinstance(on_left, list): on_left = on_left, on_right = self.on_right if not isinstance(on_right, list): on_right = on_right, right_types = keymap( dict(zip(on_right, on_left)).get, self.rhs.dshape.measure.dict, ) joined = ( (name, promote(dt, right_types[name], promote_option=False)) for n, (name, dt) in enumerate(filter( compose(op.contains(on_left), first), self.lhs.dshape.measure.fields, )) ) left = [ (name, dt) for name, dt in zip( self.lhs.fields, types_of_fields(self.lhs.fields, self.lhs) ) if name not in on_left ] right = [ (name, dt) for name, dt in zip( self.rhs.fields, types_of_fields(self.rhs.fields, self.rhs) ) if name not in on_right ] # Handle overlapping but non-joined case, e.g. left_other = set(name for name, dt in left if name not in on_left) right_other = set(name for name, dt in right if name not in on_right) overlap = left_other & right_other left_suffix, right_suffix = self.suffixes left = ((name + left_suffix if name in overlap else name, dt) for name, dt in left) right = ((name + right_suffix if name in overlap else name, dt) for name, dt in right) if self.how in ('right', 'outer'): left = ((name, option(dt)) for name, dt in left) if self.how in ('left', 'outer'): right = ((name, option(dt)) for name, dt in right) return dshape(Record(chain(joined, left, right)))
def append(self, data, lock=None): log('Client appends', self.address, str(len(data)) + ' keys') data = keymap(serialize_key, data) payload = list(chain.from_iterable(data.items())) self.send(b'append', payload)
dt.Struct: np.object_, }, ) _numpy_dtypes = toolz.keymap( np.dtype, { 'bool': dt.boolean, 'int8': dt.int8, 'int16': dt.int16, 'int32': dt.int32, 'int64': dt.int64, 'uint8': dt.uint8, 'uint16': dt.uint16, 'uint32': dt.uint32, 'uint64': dt.uint64, 'float16': dt.float16, 'float32': dt.float32, 'float64': dt.float64, 'double': dt.double, 'unicode': dt.string, 'str': dt.string, 'datetime64': dt.timestamp, 'datetime64[ns]': dt.timestamp, 'timedelta64': dt.interval, 'timedelta64[ns]': dt.Interval('ns'), }, ) _inferable_pandas_dtypes = { 'boolean': dt.boolean, 'string': dt.string,
MAPPING = {'object': 'String', 'uint64': 'UInt64', 'uint32': 'UInt32', 'uint16': 'UInt16', 'uint8': 'UInt8', 'float64': 'Float64', 'float32': 'Float32', 'int64': 'Int64', 'int32': 'Int32', 'int16': 'Int16', 'int8': 'Int8', 'datetime64[D]': 'Date', 'datetime64[ns]': 'DateTime'} PD2CH = keymap(np.dtype, MAPPING) CH2PD = itemmap(reversed, MAPPING) CH2PD['Null'] = 'object' CH2PD['Nothing'] = 'object' NULLABLE_COLS = ['UInt64', 'UInt32', 'UInt16', 'UInt8', 'Float64', 'Float32', 'Int64', 'Int32', 'Int16', 'Int8', 'String', 'DateTime'] for col in NULLABLE_COLS: CH2PD['Nullable({})'.format(col)] = CH2PD[col] PY3 = sys.version_info[0] == 3 def normalize(df, index=True): if index: df = df.reset_index()
def test_novel_deltas(self, asset_info): base_dates = pd.DatetimeIndex( [pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-04')]) repeated_dates = base_dates.repeat(3) baseline = pd.DataFrame({ 'sid': self.sids * 2, 'value': (0, 1, 2, 1, 2, 3), 'asof_date': repeated_dates, 'timestamp': repeated_dates, }) expr = bz.Data(baseline, name='expr', dshape=self.dshape) deltas = bz.Data(baseline, name='deltas', dshape=self.dshape) deltas = bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ) expected_views = keymap( pd.Timestamp, { '2014-01-03': np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [10.0, 11.0, 12.0]]), '2014-01-06': np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [11.0, 12.0, 13.0]]), }) if len(asset_info) == 4: expected_views = valmap( lambda view: np.c_[view, [np.nan, np.nan, np.nan]], expected_views, ) expected_output_buffer = [10, 11, 12, np.nan, 11, 12, 13, np.nan] else: expected_output_buffer = [10, 11, 12, 11, 12, 13] cal = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), # omitting the 4th and 5th to simulate a weekend pd.Timestamp('2014-01-06'), ]) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( expected_output_buffer, index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value', ), ) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=cal, start=cal[2], end=cal[-1], window_length=3, compute_fn=op.itemgetter(-1), )
}, ) _numpy_dtypes = toolz.keymap( np.dtype, { 'bool': dt.boolean, 'int8': dt.int8, 'int16': dt.int16, 'int32': dt.int32, 'int64': dt.int64, 'uint8': dt.uint8, 'uint16': dt.uint16, 'uint32': dt.uint32, 'uint64': dt.uint64, 'float16': dt.float16, 'float32': dt.float32, 'float64': dt.float64, 'double': dt.double, 'unicode': dt.string, 'str': dt.string, 'datetime64': dt.timestamp, 'datetime64[ns]': dt.timestamp, 'timedelta64': dt.interval, 'timedelta64[ns]': dt.Interval('ns'), }, ) _inferable_pandas_dtypes = { 'boolean': dt.boolean,
def append(self, data, lock=None): logger.debug('Client appends %s %s', self.address, str(len(data)) + ' keys') data = keymap(serialize_key, data) payload = list(chain.from_iterable(data.items())) self.send(b'append', payload)
def scalar_type(t): # compatibility return dtype(t).scalar_type() _numpy_to_ibis = toolz.keymap( np.dtype, { 'bool': boolean, 'int8': int8, 'int16': int16, 'int32': int32, 'int64': int64, 'uint8': uint8, 'uint16': uint16, 'uint32': uint32, 'uint64': uint64, 'float16': float16, 'float32': float32, 'float64': float64, 'double': double, 'str': string, 'datetime64': timestamp, 'datetime64[ns]': timestamp, 'timedelta64': interval, 'timedelta64[ns]': Interval('ns') }) dtype = Dispatcher('dtype') validate_type = dtype
def schema(self): """ Examples -------- >>> from blaze import symbol >>> t = symbol('t', 'var * {name: string, amount: int}') >>> s = symbol('t', 'var * {name: string, id: int}') >>> join(t, s).schema dshape("{name: string, amount: int32, id: int32}") >>> join(t, s, how='left').schema dshape("{name: string, amount: int32, id: ?int32}") Overlapping but non-joined fields append _left, _right >>> a = symbol('a', 'var * {x: int, y: int}') >>> b = symbol('b', 'var * {x: int, y: int}') >>> join(a, b, 'x').fields ['x', 'y_left', 'y_right'] """ option = lambda dt: dt if isinstance(dt, Option) else Option(dt) on_left = self.on_left if not isinstance(on_left, list): on_left = on_left, on_right = self.on_right if not isinstance(on_right, list): on_right = on_right, right_types = keymap( dict(zip(on_right, on_left)).get, self.rhs.dshape.measure.dict, ) joined = ((name, promote(dt, right_types[name], promote_option=False)) for n, (name, dt) in enumerate( filter( compose(op.contains(on_left), first), self.lhs.dshape.measure.fields, ))) left = [(name, dt) for name, dt in zip( self.lhs.fields, types_of_fields(self.lhs.fields, self.lhs)) if name not in on_left] right = [(name, dt) for name, dt in zip( self.rhs.fields, types_of_fields(self.rhs.fields, self.rhs)) if name not in on_right] # Handle overlapping but non-joined case, e.g. left_other = set(name for name, dt in left if name not in on_left) right_other = set(name for name, dt in right if name not in on_right) overlap = left_other & right_other left_suffix, right_suffix = self.suffixes left = ((name + left_suffix if name in overlap else name, dt) for name, dt in left) right = ((name + right_suffix if name in overlap else name, dt) for name, dt in right) if self.how in ('right', 'outer'): left = ((name, option(dt)) for name, dt in left) if self.how in ('left', 'outer'): right = ((name, option(dt)) for name, dt in right) return dshape(Record(chain(joined, left, right)))
def get(self): resp = keymap(str, valmap(sizeof, self.server.data)) self.write(resp)
def test_novel_deltas(self, asset_info): base_dates = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-04') ]) repeated_dates = base_dates.repeat(3) baseline = pd.DataFrame({ 'sid': self.sids * 2, 'value': (0., 1., 2., 1., 2., 3.), 'int_value': (0, 1, 2, 1, 2, 3), 'asof_date': repeated_dates, 'timestamp': repeated_dates, }) expr = bz.data(baseline, name='expr', dshape=self.dshape) deltas = bz.data( odo( bz.transform( expr, value=expr.value + 10, timestamp=expr.timestamp + timedelta(days=1), ), pd.DataFrame, ), name='delta', dshape=self.dshape, ) expected_views = keymap(pd.Timestamp, { '2014-01-03': np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [10.0, 11.0, 12.0]]), '2014-01-06': np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [11.0, 12.0, 13.0]]), }) if len(asset_info) == 4: expected_views = valmap( lambda view: np.c_[view, [np.nan, np.nan, np.nan]], expected_views, ) expected_output_buffer = [10, 11, 12, np.nan, 11, 12, 13, np.nan] else: expected_output_buffer = [10, 11, 12, 11, 12, 13] cal = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), # omitting the 4th and 5th to simulate a weekend pd.Timestamp('2014-01-06'), ]) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( expected_output_buffer, index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value',), ) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=cal, start=cal[2], end=cal[-1], window_length=3, compute_fn=op.itemgetter(-1), )