def _fetch_queries(self): query_aggregations = aggregate_queries(self._data_queries) for dataset_id, query_map in query_aggregations.items(): for query in query_map.values(): df = fetch_query(query) for query_dimensions, query_infos in query['queries'].items(): if valid_dimensions(query_dimensions, df): queried_df = df.query( build_query_string(query_dimensions)) for query_info in query_infos: measure = query_info.query.coordinate.measure query_info.data = queried_df[measure if isinstance( measure, str) else measure.value] else: for query_info in query_infos: query_info.data = Series() for query_info in self._data_queries: if query_info.data is None or len(query_info.data) == 0: asyncio.get_event_loop().run_until_complete( query_info.processor.calculate( query_info.attr, ProcessorResult( False, f'No data found for ' f'Coordinate {query_info.query.coordinate}'), self.rule_cache)) else: asyncio.get_event_loop().run_until_complete( query_info.processor.calculate( query_info.attr, ProcessorResult(True, query_info.data), self.rule_cache))
def process(self, entity: Entity) -> ProcessorResult: """ Fetch the entity and resolve the field """ try: # First try to get the value off the entity entity_dict = entity.get_entity() data = get(entity_dict, self.field) if data: return ProcessorResult(True, data) # If not found, try to get the value from the asset identifiers identifier = next( iter( filter(lambda x: x['type'] == self.field, entity_dict.get('identifiers', []))), None) if identifier: return ProcessorResult(True, identifier['value']) # Return a failed processor result if no field was found on the object or it's identifiers return ProcessorResult( False, f'Unable to find {self.field} in identifiers for entity {entity.get_marquee_id()}' ) except ValueError: return ProcessorResult(False, "Could not get field on entity")
def process(self): a_data = self.children_data.get('a') if isinstance(a_data, ProcessorResult): if a_data.success: date_array = a_data.data.reset_index()['date'] if self.end is None: self.end = max(date_array) if self.start is None: self.start = min(date_array) if not isinstance(self.weekdays_only, bool): self.value = ProcessorResult( False, "DateRangeProcessor requires weekdays_only argument to be a boolean." ) yesterday = date.today() - timedelta(days=1) with PricingContext(pricing_date=yesterday): # for EOD datasets latest datapoint is T-1, # relative dates will be evaluated using yesterday as base_date if isinstance(self.end, RelativeDate): self.end = self.end.apply_rule() if isinstance(self.start, RelativeDate): self.start = self.start.apply_rule() result = date_range(a_data.data, start_date=self.start, end_date=self.end, weekdays_only=self.weekdays_only) self.value = ProcessorResult(True, result) else: self.value = ProcessorResult( False, "DateRangeProcessor does not have 'a' series values yet") else: self.value = ProcessorResult( False, "DateRangeProcessor does not have 'a' series yet")
def process(self): min_data = self.children_data.get('minimum') max_data = self.children_data.get('maximum') markers_data = [ self.children_data.get(marker.name) for marker in self.markers ] if isinstance(min_data, ProcessorResult) and isinstance( max_data, ProcessorResult): if min_data.success and max_data.success: result = { 'min': min_data.data.get(0), 'max': max_data.data.get(0), 'markers': [] } for marker_data in markers_data: if marker_data and marker_data.success and marker_data.data: valid, reason = validate_markers_data( result['min'], result['max'], marker_data.data) if valid: result['markers'].append(marker_data.data) else: result['markers'].append({ **marker_data.data, **{ 'invalidReason': reason } }) self.value = ProcessorResult(True, result) else: self.value = ProcessorResult( False, "Processor does not have min, max values yet") else: self.value = ProcessorResult( False, "Processor does not have min, max data yet")
def _fetch_queries(self): query_aggregations = aggregate_queries(self._data_queries) for dataset_id, query_map in query_aggregations.items(): for query in query_map.values(): df = fetch_query(query) for query_dimensions, query_infos in query['queries'].items(): if valid_dimensions(query_dimensions, df): queried_df = df.query( build_query_string(query_dimensions)) for query_info in query_infos: query_info.data = queried_df[ query_info.query.coordinate.measure] else: for query_info in query_infos: query_info.data = Series() for query_info in self._data_queries: if query_info.data is None or len(query_info.data) == 0: query_info.processor.calculate( query_info.attr, ProcessorResult( False, f'No data found for ' f'Coordinate {query_info.query.coordinate}')) else: query_info.processor.calculate( query_info.attr, ProcessorResult(True, query_info.data))
def process(self): a_data = self.children_data.get('a') if isinstance(a_data, ProcessorResult): if a_data.success: data_series = a_data.data change_pd = data_series.tail(2) change = returns(change_pd).iloc[-1] # Pass in all values except last value (which is last value) returns_series = returns(data_series.head(-1)) std_result = std( returns_series, w=Window(None, 0) if self.w is None else self.w).iloc[-1] if change is not None and std_result != 0: self.value = ProcessorResult( True, pd.Series([change / std_result])) else: self.value = ProcessorResult( False, "StdMoveProcessor returns a NaN") else: self.value = ProcessorResult( False, "StdMoveProcessor does not have 'a' series values yet") else: self.value = ProcessorResult( False, "StdMoveProcessor does not have 'a' series yet") return self.value
def process(self): a_data = self.children_data.get('a') if isinstance(a_data, ProcessorResult): if a_data.success: b_data = self.children_data.get('b') # Need to check if the child node b was set in the first place. if self.children.get('b') and isinstance( b_data, ProcessorResult): if b_data.success: result = percentiles(a_data.data, b_data.data, w=self.w) self.value = ProcessorResult(True, result) else: self.value = ProcessorResult( True, 'PercentilesProcessor: b is not a valid series.') result = percentiles(a_data.data, w=self.w) self.value = ProcessorResult(True, result) else: self.value = ProcessorResult( False, "PercentilesProcessor does not have 'a' series values yet") else: self.value = ProcessorResult( False, "PercentilesProcessor does not have 'a' series yet") return self.value
def process(self): key: str = self.dimension.value if isinstance(self.dimension, Enum) else self.dimension coordinate = self.children.get('a') dimension_value = coordinate.dimensions.get(key) if coordinate else None if dimension_value: return ProcessorResult(True, dimension_value) else: return ProcessorResult(False, f'Dimension {key} not in given coordinate')
def process(self): a_data = self.children_data.get('a') if isinstance(a_data, ProcessorResult): if a_data.success: result = diff(a_data.data, self.obs) self.value = ProcessorResult(True, result) else: self.value = ProcessorResult(False, "DiffProcessor does not have 'a' series values yet") else: self.value = ProcessorResult(False, "DiffProcessor does not have 'a' series yet")
def process(self): a_data = self.children_data.get('a') if isinstance(a_data, ProcessorResult): if a_data.success: result = volatility(a_data.data, self.w, self.returns_type) self.value = ProcessorResult(True, result) else: self.value = ProcessorResult(False, 'Could not compute volatility') else: self.value = ProcessorResult(False, 'Processor does not have data')
def update(self, result: ProcessorResult) -> None: """ Sets the value of the cell""" if isinstance(result.data, Series): if result.data.empty: self.value = ProcessorResult(False, 'Empty series as a result of processing.') else: self.value = ProcessorResult(True, result.data.iloc[-1]) else: self.value = ProcessorResult(True, result.data) self.updated_time = get_utc_now()
def process(self): a_data = self.children_data.get('a') benchmark_data = self.children_data.get('benchmark') if isinstance(a_data, ProcessorResult) and isinstance(benchmark_data, ProcessorResult): if a_data.success and benchmark_data.success: result = correlation(a_data.data, benchmark_data.data, w=self.w, type_=self.type_) self.value = ProcessorResult(True, result) else: self.value = ProcessorResult(False, "Processor does not have A and Benchmark data yet") else: self.value = ProcessorResult(False, "Processor does not have A and Benchmark data yet")
def process(self) -> None: a_data = self.children_data.get('a') b_data = self.children_data.get('b') if isinstance(a_data, ProcessorResult) and isinstance(b_data, ProcessorResult): if a_data.success and b_data.success: result = a_data.data.append(b_data.data) self.value = ProcessorResult(True, result) else: self.value = ProcessorResult(False, "Processor does not have A and B data yet") else: self.value = ProcessorResult(False, "Processor does not have A and B data yet")
def process(self) -> None: """ Calculate the result and store it as the processor value """ a = self.children_data.get('a') if isinstance(a, ProcessorResult): if a.success and isinstance(a.data, Series): self.value = ProcessorResult(True, pd.Series(min(a.data))) else: self.value = ProcessorResult( False, "Processor does not data series yet") else: self.value = ProcessorResult(False, "Processor does not have series yet")
def process(self) -> None: a_data = self.children_data.get('a') if isinstance(a_data, ProcessorResult): if a_data.success: data = a_data.data if self.observations is None: if len(data) > 1: self.value = ProcessorResult(True, Series([(data.iloc[-1] - data.iloc[0]) / data.iloc[-1]])) else: self.value = ProcessorResult(True, 'Series has is less than 2.') else: value = returns(a_data.data, self.observations, self.type_) self.value = ProcessorResult(True, value)
def process(self): a_data = self.children_data.get('a') if isinstance(a_data, ProcessorResult): if a_data.success: series_length = len(a_data.data) window = None if self.w: window = self.w if self.w <= series_length else series_length result = zscores(a_data.data, w=window) self.value = ProcessorResult(True, result) else: self.value = ProcessorResult(False, "ZscoresProcessor does not have 'a' series values yet") else: self.value = ProcessorResult(False, "ZscoresProcessor does not have 'a' series yet")
def process(self, cross1: Entity) -> ProcessorResult: if isinstance(cross1, Cross) and isinstance(self.cross2, Cross): try: with DataContext(self.start, self.end): result = fx_implied_correlation(cross1, self.cross2, self.tenor) self.value = ProcessorResult(True, result) except Exception as e: self.value = ProcessorResult(False, str(e)) else: self.value = ProcessorResult( False, "Processor does not have valid crosses as inputs") return self.value
def process(self): a_data = self.children_data.get('a') if isinstance(a_data, ProcessorResult): if a_data.success: self.value = ProcessorResult( True, { 'name': self.name, 'value': a_data.data.get(0), 'shape': self.shape.value }) else: self.value = ProcessorResult(False, 'Could not compute pipe marker') else: self.value = ProcessorResult(False, 'Processor does not have data')
async def update(self, attribute: str, result: ProcessorResult, rdate_entity_map: Dict[str, date], pool: ProcessPoolExecutor = None): """ Handle the update of a single coordinate and recalculate the value :param attribute: Attribute alinging to data coordinate in the processor :param result: Processor result including success and series from data query """ self.__handle_date_range(result, rdate_entity_map) self.children_data[attribute] = result if isinstance(result, ProcessorResult): if result.success: try: if pool: value = await asyncio.get_running_loop().run_in_executor(pool, self.process) self.value = value else: self.process() self.post_process() except Exception as e: self.value = ProcessorResult(False, f'Error Calculating processor {self.__class__.__name__} due to {e}') else: self.value = result
def __init__(self, name: str, processor: BaseProcessor, entity: Entity, dimension_overrides: List[Override], column_index: int, row_index: int, row_group: str = None): # Cell starts with root processor # Deep copies so the processor and children are unique objects self.cell_id = str(uuid.uuid4()) self.processor: BaseProcessor = copy.deepcopy(processor) self.entity: Entity = entity self.name: str = name self.dimension_overrides = dimension_overrides self.column_index = column_index self.row_index = row_index self.row_group = row_group self.updated_time: Optional[str] = None # Default the value for a cell processor self.value: ProcessorResult = ProcessorResult(False, DATA_CELL_NOT_CALCULATED) # Store the cell data queries self.data_queries: List[DataQueryInfo] = []
def process(self): """ Calculate the result and store it as the processor value """ a_data = self.children_data.get('a') if isinstance(a_data, ProcessorResult): if a_data.success and isinstance(a_data.data, Series): index = -1 * self.n self.value = ProcessorResult(True, pd.Series(a_data.data[index])) else: self.value = ProcessorResult( False, "NthLastProcessor does not have 'a' series values yet") else: self.value = ProcessorResult( False, "NthLastProcessor does not have 'a' series values yet") return self.value
def process(self): a_data = self.children_data.get('a') if isinstance(a_data, ProcessorResult): if a_data.success: series_length = len(a_data.data) window = None if self.w: window = self.w if self.w <= series_length else series_length result = percentile(a_data.data, self.n, w=window) if not isinstance(result, pd.Series): result = pd.Series(result) self.value = ProcessorResult(True, result) else: self.value = ProcessorResult(False, "PercentileProcessor does not have 'a' series values yet") else: self.value = ProcessorResult(False, "PercentileProcessor does not have 'a' series yet")
def process(self): """ Calculate the result and store it as the processor value """ a_data = self.children_data.get('a') if isinstance(a_data, ProcessorResult): if a_data.success and isinstance(a_data.data, Series): self.value = ProcessorResult(True, pd.Series(a_data.data[-1:])) return self.value
def __init__(self): self.id = f'{self.__class__.__name__}-{str(uuid.uuid4())}' self.value: ProcessorResult = ProcessorResult(False, 'Value not set') self.parent: Optional[BaseProcessor] = None self.parent_attr: Optional[str] = None self.children: Dict[str, Union[DataCoordinateOrProcessor, DataQueryInfo]] = {} self.children_data: Dict[str, ProcessorResult] = {} self.data_cell = None
def process(self): a_data = self.children_data.get('a') if isinstance(a_data, ProcessorResult): if not a_data.success: self.value = a_data return if self.dividend: value = a_data.data.div(self.dividend) self.value = ProcessorResult(True, value) return b_data = self.children_data.get('b') if isinstance(b_data, ProcessorResult): if b_data.success: value = a_data.data.div(b_data.data) self.value = ProcessorResult(True, value) else: self.value = b_data
def process(self): start = self.children_data.get('start') end = self.children_data.get('end') if isinstance(start, ProcessorResult) and isinstance(end, ProcessorResult): if start.success and end.success: self.value = ProcessorResult(True, { 'name': self.name, 'start': start.data.get(0), 'end': end.data.get(0), 'shape': self.shape.value }) else: self.value = ProcessorResult(False, "Processor does not have start and end values yet") else: self.value = ProcessorResult(False, "Processor does not have start and end data yet") return self.value
def process(self): a_data = self.children_data.get('a') if isinstance(a_data, ProcessorResult): if a_data.success: result = zscores( a_data.data, w=Window(None, 0) if self.w is None else self.w) self.value = ProcessorResult(True, result) else: self.value = ProcessorResult( False, "ZscoresProcessor does not have 'a' series values yet") else: self.value = ProcessorResult( False, "ZscoresProcessor does not have 'a' series yet") return self.value
def process(self): a_data = self.children_data.get('a') if isinstance(a_data, ProcessorResult): if not a_data.success: self.value = a_data return self.value data = a_data.data if len(data) >= 2: value = data.drop(data.index[-1].date(), errors='ignore') if len(value) >= 2: self.value = ProcessorResult(True, value[-2:]) return self.value self.value = ProcessorResult( False, 'Not enough values given to OneDayProcessor.') return self.value
def process(self): a_data = self.children_data.get('a') if isinstance(a_data, ProcessorResult): if a_data.success: value = change(a_data.data) self.value = ProcessorResult(True, value) return self.value
def process(self, w: Union[Window, int] = Window(None, 0), type_: SeriesType = SeriesType.PRICES): a_data = self.children_data.get('a') benchmark_data = self.children_data.get('benchmark') if isinstance(a_data, ProcessorResult) and isinstance( benchmark_data, ProcessorResult): if a_data.success and benchmark_data.success: result = correlation(a_data.data, benchmark_data.data, w=self.w, type_=SeriesType.PRICES) self.value = ProcessorResult(True, result) else: self.value = ProcessorResult( False, "Processor does not have A and Benchmark data yet") else: self.value = ProcessorResult( False, "Processor does not have A and Benchmark data yet")