async def fetch_topic_data_count(
    topic_id: Optional[TopicId] = None,
    tenant_id: Optional[TenantId] = None,
    criteria: Optional[ParameterJoint] = None,
    principal_service: PrincipalService = Depends(get_any_admin_principal)
) -> List[str]:
    if is_blank(topic_id):
        raise_400('Topic id is required.')
    tenant_id = validate_tenant_id(tenant_id, principal_service)
    principal_service = fake_to_tenant(principal_service, tenant_id)

    schema = get_topic_service(principal_service).find_schema_by_id(
        topic_id, tenant_id)
    storage = ask_topic_storage(schema, principal_service)
    service = ask_topic_data_service(schema, storage, principal_service)

    if criteria is None:
        rows = service.find_distinct_values(None,
                                            [TopicDataColumnNames.ID.value],
                                            False)
    else:
        parsed_criteria = parse_condition_for_storage(criteria, [schema],
                                                      principal_service, False)
        empty_variables = PipelineVariables(None, None, None)
        rows = service.find_distinct_values(
            [parsed_criteria.run(empty_variables, principal_service)],
            [TopicDataColumnNames.ID.value], False)

    return ArrayHelper(rows).map(
        lambda x: str(x.get(TopicDataColumnNames.ID.value))).to_list()
async def patch_topic_data(
    topic_name: Optional[str] = None,
    patch_type: Optional[PipelineTriggerType] = PipelineTriggerType.MERGE,
    tenant_id: Optional[TenantId] = None,
    data=Body(...),
    principal_service: PrincipalService = Depends(get_any_admin_principal)
) -> None:
    """
	data patch will not trigger any pipeline
	"""
    if is_blank(topic_name):
        raise_400('Topic name is required.')
    if patch_type is None:
        patch_type = PipelineTriggerType.MERGE
    if patch_type == PipelineTriggerType.INSERT_OR_MERGE:
        raise_400('Patch type can be one of insert/merge/delete.')
    tenant_id = validate_tenant_id(tenant_id, principal_service)
    principal_service = fake_to_tenant(principal_service, tenant_id)

    schema = get_topic_schema(topic_name, tenant_id, principal_service)
    storage = ask_topic_storage(schema, principal_service)
    service = ask_topic_data_service(schema, storage, principal_service)
    if patch_type == PipelineTriggerType.INSERT:
        service.trigger_by_insert(data)
    elif patch_type == PipelineTriggerType.MERGE:
        service.trigger_by_merge(data)
    elif patch_type == PipelineTriggerType.DELETE:
        service.trigger_by_delete(data)
    else:
        raise DataKernelException(
            f'Patch type [{patch_type}] is not supported.')
Beispiel #3
0
def find_topic_data_service(
    topic_id: TopicId, tenant_id: TenantId, principal_service: PrincipalService
) -> Tuple[TopicSchema, TopicDataService]:
    topic_schema = get_topic_service(principal_service).find_schema_by_id(
        topic_id, tenant_id)
    topic_storage = ask_topic_storage(topic_schema, principal_service)
    topic_service = ask_topic_data_service(topic_schema, topic_storage,
                                           principal_service)
    return topic_schema, topic_service
	def find(self, topic_id: TopicId, start_time: datetime, end_time: datetime) -> Optional[TopicProfile]:
		schema = get_topic_schema(topic_id, self.principalService)
		if is_raw_topic(schema.get_topic()):
			raise DqcException(f'Raw topic[name={schema.get_topic().name}] is not supported for profiling.')
		storage = ask_topic_storage(schema, self.principalService)
		service = ask_topic_data_service(schema, storage, self.principalService)
		criteria = [
			EntityCriteriaExpression(
				left=ColumnNameLiteral(columnName=TopicDataColumnNames.TENANT_ID.value),
				right=self.principalService.get_tenant_id()),
			EntityCriteriaExpression(
				left=ColumnNameLiteral(columnName=TopicDataColumnNames.UPDATE_TIME.value),
				operator=EntityCriteriaOperator.GREATER_THAN_OR_EQUALS,
				right=start_time),
			EntityCriteriaExpression(
				left=ColumnNameLiteral(columnName=TopicDataColumnNames.UPDATE_TIME.value),
				operator=EntityCriteriaOperator.LESS_THAN_OR_EQUALS,
				right=end_time)
		]
		data = service.find(criteria)

		columns = [
			TopicDataColumnNames.ID.value,
			*ArrayHelper(schema.get_topic().factors).map(lambda x: x.name).to_list(),
			TopicDataColumnNames.TENANT_ID.value,
			TopicDataColumnNames.INSERT_TIME.value,
			TopicDataColumnNames.UPDATE_TIME.value
		]

		def row_to_list(row: Dict[str, Any]) -> List[Any]:
			return ArrayHelper(columns).map(lambda x: row.get(x)).to_list()

		data_frame = build_data_frame(ArrayHelper(data).map(row_to_list).to_list(), columns)
		data_frame = convert_data_frame_type_by_topic(data_frame, schema.get_topic())
		data_frame.drop([
			TopicDataColumnNames.TENANT_ID,
			TopicDataColumnNames.UPDATE_TIME,
			TopicDataColumnNames.INSERT_TIME,
			TopicDataColumnNames.AGGREGATE_ASSIST,
			TopicDataColumnNames.ID,
			TopicDataColumnNames.VERSION
		], axis=1, inplace=True, errors='ignore')

		if data_frame.empty or len(data_frame.index) == 1:
			return None
		else:
			logger.info(f'memory_usage {data_frame.memory_usage(deep=True).sum()} bytes')
			profile = ProfileReport(data_frame, title=f'{schema.get_topic().name} data profile report', minimal=True)
			json_data = profile.to_json()
			json_constants_map = {
				'-Infinity': float('-Infinity'),
				'Infinity': float('Infinity'),
				'NaN': None,
			}
			return loads(json_data, parse_constant=lambda x: json_constants_map[x])
def exchange_topic_data_service(data_service: TopicDataService, topic_id: TopicId) -> TopicDataService:
	principal_service = data_service.get_principal_service()
	topic_service = get_topic_service(principal_service)
	topic = topic_service.find_by_id(topic_id)
	if topic is None:
		raise DqcException(f'Topic[id={topic_id}] not found.')
	schema = topic_service.find_schema_by_name(topic.name, principal_service.get_tenant_id())
	if schema is None:
		raise DqcException(f'Topic[name={topic.name}] not found.')
	storage = ask_topic_storage(schema, principal_service)
	return ask_topic_data_service(schema, storage, data_service.get_principal_service())
Beispiel #6
0
    def ask_topic_storage(self, schema: TopicSchema) -> TopicDataStorageSPI:
        topic = schema.get_topic()
        data_source_id = topic.dataSourceId
        if is_blank(data_source_id):
            raise PipelineKernelException(
                f'Data source is not defined for topic[id={topic.topicId}, name={topic.name}]'
            )
        storage = self.storages.get(data_source_id)
        if storage is not None:
            return storage

        storage = ask_topic_storage(schema, self.principalService)
        self.storages[data_source_id] = storage
        return storage
async def fetch_topic_data(
    topic_name: Optional[str] = None,
    topic_id: Optional[TopicId] = None,
    tenant_id: Optional[TenantId] = None,
    criteria: TopicPageable = None,
    principal_service: PrincipalService = Depends(get_any_admin_principal)
) -> DataPage:
    if is_blank(topic_name) and is_blank(topic_id):
        raise_400('Topic id or name is required.')
    tenant_id = validate_tenant_id(tenant_id, principal_service)
    principal_service = fake_to_tenant(principal_service, tenant_id)

    if is_not_blank(topic_id):
        schema = get_topic_service(principal_service).find_schema_by_id(
            topic_id, tenant_id)
    else:
        schema = get_topic_schema(topic_name, tenant_id, principal_service)

    storage = ask_topic_storage(schema, principal_service)
    service = ask_topic_data_service(schema, storage, principal_service)

    pageable = Pageable(
        pageNumber=1 if criteria is None or criteria.pageNumber is None
        or criteria.pageNumber <= 0 else criteria.pageNumber,
        pageSize=100 if criteria is None or criteria.pageSize is None
        or criteria.pageSize <= 0 else criteria.pageSize)
    if criteria is None or is_blank(
            criteria.jointType) or criteria.filters is None:
        page = service.page_and_unwrap(None, pageable)
    else:
        parsed_criteria = parse_condition_for_storage(criteria, [schema],
                                                      principal_service, False)
        empty_variables = PipelineVariables(None, None, None)
        page = service.page_and_unwrap(
            [parsed_criteria.run(empty_variables, principal_service)],
            pageable)

    def id_to_str(row: Dict[str, Any]) -> Dict[str, Any]:
        if TopicDataColumnNames.ID.value in row:
            copy = row.copy()
            copy[TopicDataColumnNames.ID.value] = str(
                row[TopicDataColumnNames.ID.value])
            return copy
        else:
            return row

    page.data = ArrayHelper(page.data).map(id_to_str).to_list()
    return page
async def truncate_topic_data(
    topic_name: Optional[str] = None,
    tenant_id: Optional[TenantId] = None,
    principal_service: PrincipalService = Depends(get_any_admin_principal)
) -> None:
    if not ask_truncate_topic_data():
        raise_404('Not Found')
    if is_blank(topic_name):
        raise_400('Topic name is required.')
    tenant_id = validate_tenant_id(tenant_id, principal_service)
    principal_service = fake_to_tenant(principal_service, tenant_id)

    schema = get_topic_schema(topic_name, tenant_id, principal_service)
    storage = ask_topic_storage(schema, principal_service)
    service = ask_topic_data_service(schema, storage, principal_service)
    service.truncate()
 def get_topic_data_service(
         self, topic_id: TopicId,
         rules_count: int) -> Tuple[bool, Optional[TopicDataService]]:
     topic_service = get_topic_service(self.principalService)
     topic = topic_service.find_by_id(topic_id)
     if topic is None:
         # ignore and log
         logger.error(
             f'Topic[id={topic_id}] not found, ignored {rules_count} monitor rule(s).'
         )
         return False, None
     schema = topic_service.find_schema_by_name(
         topic.name, self.principalService.get_tenant_id())
     if schema is None:
         # ignore and log
         logger.error(
             f'Topic[name={topic.name}] not found, ignored {rules_count} monitor rule(s).'
         )
         return False, None
     storage = ask_topic_storage(schema, self.principalService)
     data_service = ask_topic_data_service(schema, storage,
                                           self.principalService)
     return True, data_service
async def fetch_topic_data_count(
    topic_id: Optional[TopicId],
    tenant_id: Optional[TenantId] = None,
    criteria: Optional[ParameterJoint] = None,
    principal_service: PrincipalService = Depends(get_any_admin_principal)
) -> int:
    if is_blank(topic_id):
        raise_400('Topic id is required.')
    tenant_id = validate_tenant_id(tenant_id, principal_service)
    principal_service = fake_to_tenant(principal_service, tenant_id)

    schema = get_topic_service(principal_service).find_schema_by_id(
        topic_id, tenant_id)
    storage = ask_topic_storage(schema, principal_service)
    service = ask_topic_data_service(schema, storage, principal_service)

    if criteria is None:
        return service.count()
    else:
        parsed_criteria = parse_condition_for_storage(criteria, [schema],
                                                      principal_service, False)
        empty_variables = PipelineVariables(None, None, None)
        return service.count_by_criteria(
            [parsed_criteria.run(empty_variables, principal_service)])