def fake_time_group_column(self, topic_id: TopicId, factor_id: FactorId, name: str) -> Optional[SubjectDatasetColumn]: # fake a new column into subject if self.inspection.measureOnTime == MeasureMethod.YEAR: return SubjectDatasetColumn( columnId=self.FAKE_TIME_GROUP_COLUMN_ID, parameter=ComputedParameter(kind=ParameterKind.COMPUTED, type=ParameterComputeType.YEAR_OF, parameters=[ TopicFactorParameter( kind=ParameterKind.TOPIC, topicId=topic_id, factorId=factor_id) ]), alias=f'{name}_YEAR_', arithmetic=SubjectColumnArithmetic.NONE) elif self.inspection.measureOnTime == MeasureMethod.MONTH: return SubjectDatasetColumn( columnId=self.FAKE_TIME_GROUP_COLUMN_ID, parameter=ComputedParameter(kind=ParameterKind.COMPUTED, type=ParameterComputeType.MONTH_OF, parameters=[ TopicFactorParameter( kind=ParameterKind.TOPIC, topicId=topic_id, factorId=factor_id) ]), alias=f'{name}_MONTH_', arithmetic=SubjectColumnArithmetic.NONE) else: return
def build_value_criteria_left_on_factor( self, topic_id: TopicId, factor_id: FactorId, value: Optional[str], ask_factor: Callable[[], Factor]) -> Parameter: value = value.strip() value_length = len(value.strip()) if value_length != 1 and value_length != 2 and value_length != 4: # not month or year return self.build_topic_factor_parameter(topic_id, factor_id) factor: Factor = ask_factor() if not self.has_year_or_month(factor): return self.build_topic_factor_parameter(topic_id, factor_id) if value_length == 4: # year return ComputedParameter(kind=ParameterKind.COMPUTED, type=ParameterComputeType.YEAR_OF, parameters=[ TopicFactorParameter( kind=ParameterKind.TOPIC, topicId=topic_id, factorId=factor_id) ]) else: # month return ComputedParameter(kind=ParameterKind.COMPUTED, type=ParameterComputeType.MONTH_OF, parameters=[ TopicFactorParameter( kind=ParameterKind.TOPIC, topicId=topic_id, factorId=factor_id) ])
def action(topic_id: TopicId, factor_id: FactorId) -> ParameterExpression: values = ArrayHelper( segment.value).filter(lambda x: is_not_blank(x)).to_list() if len(values) == 0: raise IndicatorKernelException( 'Value of category segment not declared.') if len(values) == 1 and values[0] == OtherCategorySegmentValue: # other values values = self.gather_defined_category_values(segments) if len(values) == 0: raise IndicatorKernelException( 'No values rather than others of category segment not declared.' ) return ParameterExpression( left=TopicFactorParameter(kind=ParameterKind.TOPIC, topicId=topic_id, factorId=factor_id), operator=ParameterExpressionOperator.NOT_IN, right=values) else: return ParameterExpression( left=TopicFactorParameter(kind=ParameterKind.TOPIC, topicId=topic_id, factorId=factor_id), operator=ParameterExpressionOperator.IN, right=values)
def fake_time_range_to_dataset_filter(self) -> Optional[ParameterJoint]: time_range_factor_id = self.inspection.timeRangeFactorId if is_blank(time_range_factor_id): return None time_range_factor = self.find_factor(time_range_factor_id, lambda: 'Time range factor not declared.') time_ranges = ArrayHelper(self.inspection.timeRanges) \ .filter(lambda x: x is not None and x.value is not None).to_list() if len(time_ranges) == 0: # no ranges given return None operator = ParameterExpressionOperator.EQUALS if len(time_ranges) == 1 else ParameterExpressionOperator.IN right = time_ranges[0].value if len(time_ranges) == 1 \ else ArrayHelper(time_ranges).map(lambda x: x.value).join(',') time_range_measure = self.inspection.timeRangeMeasure if self.has_year_or_month(time_range_factor): if time_range_measure == MeasureMethod.YEAR: compute_type = ParameterComputeType.YEAR_OF elif time_range_measure == MeasureMethod.MONTH: compute_type = ParameterComputeType.MONTH_OF else: raise IndicatorKernelException( f'Measure method[{time_range_measure}] for factor type[{time_range_factor.type}] is not supported.') joint = ParameterJoint( jointType=ParameterJointType.AND, filters=[ ParameterExpression( left=ComputedParameter( kind=ParameterKind.COMPUTED, type=compute_type, parameters=[ TopicFactorParameter( kind=ParameterKind.TOPIC, topicId=self.topic.topicId, factorId=time_range_factor_id) ] ), operator=operator, right=ConstantParameter(kind=ParameterKind.CONSTANT, value=str(right)) ) ] ) else: joint = ParameterJoint( jointType=ParameterJointType.AND, filters=[ ParameterExpression( left=TopicFactorParameter( kind=ParameterKind.TOPIC, topicId=self.topic.topicId, factorId=time_range_factor_id), operator=operator, right=ConstantParameter(kind=ParameterKind.CONSTANT, value=str(right)) ) ] ) return joint
def action(topic_id: TopicId, factor_id: FactorId) -> ParameterCondition: min_value = segment.value.min max_value = segment.value.max if include == RangeBucketValueIncluding.INCLUDE_MIN: operator_min = ParameterExpressionOperator.MORE_EQUALS else: operator_min = ParameterExpressionOperator.MORE if include == RangeBucketValueIncluding.INCLUDE_MIN: operator_max = ParameterExpressionOperator.LESS else: operator_max = ParameterExpressionOperator.LESS_EQUALS if is_not_blank(min_value) and is_not_blank(max_value): return ParameterJoint( jointType=ParameterJointType.AND, filters=[ ParameterExpression(left=TopicFactorParameter( kind=ParameterKind.TOPIC, topicId=topic_id, factorId=factor_id), operator=operator_min, right=min_value), ParameterExpression(left=TopicFactorParameter( kind=ParameterKind.TOPIC, topicId=topic_id, factorId=factor_id), operator=operator_max, right=max_value) ]) elif is_not_blank(min_value): return ParameterExpression(left=TopicFactorParameter( kind=ParameterKind.TOPIC, topicId=topic_id, factorId=factor_id), operator=operator_min, right=min_value) elif is_not_blank(max_value): return ParameterExpression(left=TopicFactorParameter( kind=ParameterKind.TOPIC, topicId=topic_id, factorId=factor_id), operator=operator_max, right=max_value) else: raise IndicatorKernelException( 'Neither minimum not maximum value of numeric value segment is declared.' )
def translate_topic_factor( param: TopicFactorParameter) -> TopicFactorParameter: factor_id = param.factorId # alias name on subject dataset_column = subject_column_map.get(factor_id) if dataset_column is None: raise_400(f'Cannot find column[name={factor_id}] from subject.') # topicId is not need here since subject will be build as a sub query return TopicFactorParameter(kind=ParameterKind.TOPIC, factorId=dataset_column.columnId)
def fake_indicator_factor_to_dataset_column(self) -> SubjectDatasetColumn: indicator_factor = self.find_factor( self.indicator.factorId, lambda: f'Indicator[id={self.indicator.indicatorId}, name={self.indicator.name}] factor not declared.' ) return SubjectDatasetColumn(columnId='1', parameter=TopicFactorParameter( kind=ParameterKind.TOPIC, topicId=self.topic.topicId, factorId=indicator_factor.factorId), alias='column_1')
def fake_indicator_factor_to_dataset_column( self, column_index: int) -> Tuple[SubjectDatasetColumn, Optional[int], int]: indicator_factor = self.find_factor( self.indicator.factorId, lambda: f'Indicator[id={self.indicator.indicatorId}, name={self.indicator.name}] factor not declared.') column = SubjectDatasetColumn( columnId=str(column_index), parameter=TopicFactorParameter( kind=ParameterKind.TOPIC, topicId=self.topic.topicId, factorId=indicator_factor.factorId), alias=f'column_{column_index}' ) return column, column_index, column_index + 1
def parse_variable_to_value( name: str, variables: PipelineVariables, available_schemas: List[TopicSchema], allow_in_memory_variables: bool, parse_from_variables: Callable[[PipelineVariables, PrincipalService, str], Tuple[bool, Any, Any]], principal_service: PrincipalService ) -> Tuple[bool, Union[Any, ParsedStorageParameter]]: """ the first element of the tuple is whether the variable is found in variables. if it is not from variables, the second element should be a topic factor parameter. """ if name.startswith('&'): # not from variables available_name = name[1:] elif allow_in_memory_variables: # try to get from memory variables parsed, value, parsed_value = parse_from_variables(variables, principal_service, name) if not parsed: raise DataKernelException(f'Value[{value}] cannot be parsed to date or datetime.') return True, parsed_value else: # still not from variables available_name = name if allow_in_memory_variables: # in pipeline "find by" use factor name. factor must find in given available schemas. # actually, the only one is the source topic of find by itself if len(available_schemas) == 0: raise DataKernelException( f'Variable name[{name}] is not supported, since no available topic given.') topic = available_schemas[0].get_topic() factor_name = available_name else: # in console subject use topic.factor. topic must in given available schemas if '.' not in available_name: raise DataKernelException(f'Variable name[{name}] is not supported.') names = available_name.split('.') if len(names) != 2: raise DataKernelException(f'Variable name[{name}] is not supported.') topic_name = names[0] factor_name = names[1] topic = ArrayHelper(available_schemas).map(lambda x: x.get_topic()).find( lambda x: x.name == topic_name) if topic is None: raise DataKernelException(f'Topic[{topic_name}] not found in given available topics.') factor: Optional[Factor] = ArrayHelper(available_schemas[0].get_topic().factors) \ .find(lambda x: x.name == factor_name) if factor is None: raise DataKernelException( f'Factor[{factor_name}] in topic[id={topic.topicId}, name={topic.name}] not found.') return False, ParsedStorageTopicFactorParameter( TopicFactorParameter(kind=ParameterKind.TOPIC, topicId=topic.topicId, factorId=factor.factorId), available_schemas, principal_service, allow_in_memory_variables)
def fake_time_group_to_dataset_column( self, column_index: int) -> Tuple[Optional[SubjectDatasetColumn], Optional[int], int]: time_group_existing, measure_on_time_factor_id, measure_on_time = self.has_time_group() if not time_group_existing: return None, None, column_index measure_on_time_factor = self.find_factor( measure_on_time_factor_id, lambda: 'Measure on time factor not declared.') if self.has_year_or_month(measure_on_time_factor.type): if measure_on_time == MeasureMethod.YEAR: compute_type = ParameterComputeType.YEAR_OF elif measure_on_time == MeasureMethod.MONTH: compute_type = ParameterComputeType.MONTH_OF else: raise IndicatorKernelException( f'Measure method[{measure_on_time}] for factor type[{measure_on_time_factor.type}] is not supported.') column = SubjectDatasetColumn( columnId=str(column_index), parameter=ComputedParameter( kind=ParameterKind.COMPUTED, type=compute_type, parameters=[ TopicFactorParameter( kind=ParameterKind.TOPIC, topicId=self.topic.topicId, factorId=measure_on_time_factor_id) ] ), alias=f'column_{column_index}' ) else: column = SubjectDatasetColumn( columnId=str(column_index), parameter=TopicFactorParameter( kind=ParameterKind.TOPIC, topicId=self.topic.topicId, factorId=measure_on_time_factor_id), alias=f'column_{column_index}' ) return column, column_index, column_index + 1
def to_numeric_range_case_route( self, segment: NumericValueSegment, include: RangeBucketValueIncluding, factor: Factor ) -> Parameter: name = segment.name min_value = segment.value.min max_value = segment.value.max if include == RangeBucketValueIncluding.INCLUDE_MIN: min_operator = ParameterExpressionOperator.MORE_EQUALS max_operator = ParameterExpressionOperator.LESS else: min_operator = ParameterExpressionOperator.MORE max_operator = ParameterExpressionOperator.LESS_EQUALS return ConstantParameter( kind=ParameterKind.CONSTANT, conditional=True, on=ParameterJoint( jointType=ParameterJointType.AND, filters=ArrayHelper([ None if min_value is not None else ParameterExpression( left=TopicFactorParameter( kind=ParameterKind.TOPIC, topicId=self.topic.topicId, factorId=factor.factorId), operator=min_operator, right=min_value ), None if max_value is not None else ParameterExpression( left=TopicFactorParameter( kind=ParameterKind.TOPIC, topicId=self.topic.topicId, factorId=factor.factorId), operator=max_operator, right=max_value ) ]).map(lambda x: x is not None).to_list() ), value=name )
def to_category_case_route(self, segment: CategorySegment, factor: Factor) -> Parameter: return ConstantParameter( kind=ParameterKind.CONSTANT, conditional=True, on=ParameterJoint( jointType=ParameterJointType.AND, filters=[ ParameterExpression( left=TopicFactorParameter( kind=ParameterKind.TOPIC, topicId=self.topic.topicId, factorId=factor.factorId), operator=ParameterExpressionOperator.IN, right=segment.value ), ] ), value=segment.name )
def fake_measure_on_to_dataset_column( self, column_index: int) -> Tuple[Optional[SubjectDatasetColumn], Optional[int], int]: measure_on = self.inspection.measureOn if measure_on is None or measure_on == InspectMeasureOn.NONE: return None, None, column_index if measure_on == InspectMeasureOn.OTHER: measure_on_factor_id = self.inspection.measureOnFactorId if is_blank(measure_on_factor_id): return None, None, column_index elif measure_on == InspectMeasureOn.VALUE: measure_on_factor_id = self.indicator.factorId else: return None, None, column_index measure_on_factor = self.find_factor(measure_on_factor_id, lambda: 'Measure on factor not declared.') measure_on_bucket_id = self.inspection.measureOnBucketId if is_blank(measure_on_bucket_id): if measure_on == InspectMeasureOn.OTHER: # using naturally classification column = SubjectDatasetColumn( columnId=str(column_index), parameter=TopicFactorParameter( kind=ParameterKind.TOPIC, topicId=self.topic.topicId, factorId=measure_on_factor_id), alias=f'column_{column_index}' ) else: raise IndicatorKernelException('Measure on bucket not declared.') else: bucket = ask_bucket(measure_on_bucket_id, self.principalService) if measure_on == InspectMeasureOn.VALUE: bucket = self.to_numeric_segments_bucket(bucket) include = RangeBucketValueIncluding.INCLUDE_MIN if bucket.include is None else bucket.include # at least has one value segments = ArrayHelper(bucket.segments) \ .filter(lambda x: x.value is not None) \ .filter(lambda x: x.value.min is not None or x.value.max is not None) \ .to_list() if len(segments) == 0: raise IndicatorKernelException('Numeric range segments not declared.') column = SubjectDatasetColumn( columnId=str(column_index), parameter=ComputedParameter( kind=ParameterKind.COMPUTED, type=ParameterComputeType.CASE_THEN, parameters=[ *ArrayHelper(segments).map( lambda x: self.to_numeric_range_case_route(x, include, measure_on_factor)).to_list(), # an anyway route, additional ConstantParameter(kind=ParameterKind.CONSTANT, value='-') ] ), alias=f'column_{column_index}' ) elif measure_on == InspectMeasureOn.OTHER: bucket = self.to_category_segments_bucket(bucket) segments = ArrayHelper(bucket.segments) \ .filter(lambda x: x.value is not None and len(x.value) != 0).to_list() if len(segments) == 0: raise IndicatorKernelException('Category segments not declared.') anyway_segment: CategorySegment = ArrayHelper(segments) \ .find(lambda x: len(x.value) == 1 and x.value[0] == OtherCategorySegmentValue) if anyway_segment is not None: conditional_routes = ArrayHelper(segments).filter(lambda x: x != anyway_segment).to_list() anyway_route = ConstantParameter(kind=ParameterKind.CONSTANT, value=anyway_segment.name) else: conditional_routes = segments anyway_route = ConstantParameter(kind=ParameterKind.CONSTANT, value='-') column = SubjectDatasetColumn( columnId=str(column_index), parameter=ComputedParameter( kind=ParameterKind.COMPUTED, type=ParameterComputeType.CASE_THEN, parameters=[ *ArrayHelper(conditional_routes).map( lambda x: self.to_category_case_route(x, measure_on_factor)).to_list(), anyway_route ] ), alias=f'column_{column_index}' ) else: raise IndicatorKernelException(f'Measure[{measure_on}] is not supported.') return column, column_index, column_index + 1
def find_source_parameter(topic: Topic, factor_name: str) -> TopicFactorParameter: return TopicFactorParameter(kind=ParameterKind.TOPIC, topicId=topic.topicId, factorId=find_factor(topic, factor_name).factorId)
def build_topic_factor_parameter(self, topic_id: TopicId, factor_id: FactorId) -> Parameter: return TopicFactorParameter(kind=ParameterKind.TOPIC, topicId=topic_id, factorId=factor_id)
def fake_time_range_to_report(self) -> Optional[ParameterJoint]: time_range_factor_id = self.inspection.timeRangeFactorId if is_blank(time_range_factor_id): return None time_range_column = self.find_column(time_range_factor_id, lambda: 'Time range factor not declared.') time_ranges = ArrayHelper(self.inspection.timeRanges) \ .filter(lambda x: x is not None and x.value is not None).to_list() if len(time_ranges) == 0: # no ranges given return None operator = ParameterExpressionOperator.EQUALS if len(time_ranges) == 1 else ParameterExpressionOperator.IN right = time_ranges[0].value if len(time_ranges) == 1 \ else ArrayHelper(time_ranges).map(lambda x: x.value).join(',') time_range_measure = self.inspection.timeRangeMeasure if isinstance(time_range_column.parameter, TopicFactorParameter): topic_id = time_range_column.parameter.topicId if is_blank(topic_id): raise IndicatorKernelException(f'Topic not declared for time range factor[id={time_range_factor_id}].') topic = get_topic_service(self.principalService).find_by_id(topic_id) if topic is None: raise IndicatorKernelException(f'Topic[id={topic_id}] not found.') factor_id = time_range_column.parameter.factorId if is_blank(topic_id): raise IndicatorKernelException(f'Factor not declared for time range factor[id={time_range_factor_id}].') factor = ArrayHelper(topic.factors).find(lambda x: x.factorId == factor_id) if factor is None: raise IndicatorKernelException(f'Factor[id={factor_id}] not found on topic[id={topic_id}].') if self.has_year_or_month(factor): if time_range_measure == MeasureMethod.YEAR: compute_type = ParameterComputeType.YEAR_OF elif time_range_measure == MeasureMethod.MONTH: compute_type = ParameterComputeType.MONTH_OF else: raise IndicatorKernelException( f'Measure method[{time_range_measure}] for factor type[{factor.type}] is not supported.') joint = ParameterJoint( jointType=ParameterJointType.AND, filters=[ ParameterExpression( left=ComputedParameter( kind=ParameterKind.COMPUTED, type=compute_type, parameters=[ TopicFactorParameter( kind=ParameterKind.TOPIC, topicId='1', factorId=time_range_factor_id) ] ), operator=operator, right=ConstantParameter(kind=ParameterKind.CONSTANT, value=str(right)) ) ] ) else: joint = ParameterJoint( jointType=ParameterJointType.AND, filters=[ ParameterExpression( left=TopicFactorParameter( kind=ParameterKind.TOPIC, topicId='1', factorId=time_range_factor_id), operator=operator, right=ConstantParameter(kind=ParameterKind.CONSTANT, value=str(right)) ) ] ) else: joint = ParameterJoint( jointType=ParameterJointType.AND, filters=[ ParameterExpression( left=TopicFactorParameter( kind=ParameterKind.TOPIC, topicId='1', factorId=time_range_factor_id), operator=operator, right=ConstantParameter(kind=ParameterKind.CONSTANT, value=str(right)) ) ] ) return joint
def test_single_topic(self): self.prepare_data() subject = Subject(dataset=SubjectDataset(columns=[ SubjectDatasetColumn( columnId='1', parameter=ComputedParameter( kind=ParameterKind.COMPUTED, type=ParameterComputeType.ADD, parameters=[ TopicFactorParameter(kind=ParameterKind.TOPIC, topicId='1', factorId='1'), ConstantParameter(kind=ParameterKind.CONSTANT, value='2') ]), alias='Column1', arithmetic=SubjectColumnArithmetic.SUMMARY), SubjectDatasetColumn( columnId='2', parameter=TopicFactorParameter( kind=ParameterKind.TOPIC, topicId='1', factorId='1'), alias='Column2'), SubjectDatasetColumn(columnId='3', parameter=ConstantParameter( kind=ParameterKind.CONSTANT, value='{&now}'), alias='Column3'), SubjectDatasetColumn(columnId='4', parameter=ConstantParameter( kind=ParameterKind.CONSTANT, value='HELLO WORLD!'), alias='Column4'), SubjectDatasetColumn( columnId='5', parameter=ComputedParameter( kind=ParameterKind.COMPUTED, type=ParameterComputeType.ADD, parameters=[ ConstantParameter(kind=ParameterKind.CONSTANT, value='201'), ConstantParameter(kind=ParameterKind.CONSTANT, value='102') ]), alias='Column5'), SubjectDatasetColumn(columnId='6', parameter=ComputedParameter( kind=ParameterKind.COMPUTED, type=ParameterComputeType.YEAR_OF, parameters=[ ConstantParameter( kind=ParameterKind.CONSTANT, value='2022/03/03'), ]), alias='Column6'), SubjectDatasetColumn( columnId='7', parameter=TopicFactorParameter( kind=ParameterKind.TOPIC, topicId='1', factorId='2'), alias='Column7') ]), connectId='1', userId='1', tenantId='1') subject_data_service = SubjectDataService( subject, create_fake_principal_service()) page = subject_data_service.page(Pageable(pageNumber=1, pageSize=100)) print(page) report = Report( indicators=[ ReportIndicator(columnId='1', name='sum_value', arithmetic=ReportIndicatorArithmetic.SUMMARY) ], dimensions=[ReportDimension(columnId='7', name='enabled')], filters=ParameterJoint( jointType=ParameterJointType.AND, filters=[ ParameterExpression( left=TopicFactorParameter(kind=ParameterKind.TOPIC, factorId='7'), operator=ParameterExpressionOperator.EQUALS, right=ConstantParameter(kind=ParameterKind.CONSTANT, value='true')) ])) report_data_service = ReportDataService( subject, report, create_fake_principal_service(), False) data = report_data_service.find() print(data)