def crate_topic_by_raw_data_schema(schema, topic_list, mapping_list): factor_list = [] node: schema topic: Topic = Topic( **{ 'topic_id': get_surrogate_key(), 'name': '', 'topic_type': '', 'factors': [] }) mapping: {} # 遍历tree, 创建mapping和topic node = schema entity = node.data_entity # 每个node上的data_entity对应一个topic,data_entity上的一个attr对应一个factor topic.name = entity.name topic.topic_type = 'test' attr_list = entity.attrs mapping = Mapping( **{ 'mapping_id': get_surrogate_key(), 'source_entity_id': entity.entity_id, 'source_entity_name': entity.name, 'target_topic_id': topic.topicId, 'target_topic_name': entity.name, 'mapping_detail_list': [] }) mapping_detail_list = [] for attr in attr_list: factor: Factor = Factor(**{ 'id': get_surrogate_key(), 'name': '', 'type': '' }) factor.name = attr.name factor.type = attr.type factor_list.append(factor) mapping_detail = MappingDetail(**{ 'source_attr': attr, 'target_factor': factor }) mapping_detail_list.append(mapping_detail) topic.factors = factor_list topic_list.append(topic) mapping.mapping_detail_list = mapping_detail_list mapping_list.append(mapping) if len(node.childs) == 0: return else: for node in node.childs: crate_topic_by_raw_data_schema(node, topic_list, mapping_list) return mapping_list
def run_pipeline(pipeline: Pipeline, data): pipeline_status = PipelineRunStatus(pipelineId=pipeline.pipelineId, uid=get_surrogate_key(), start_time=datetime.now()) if pipeline.enabled: pipeline_topic = get_topic_by_id(pipeline.topicId) # TODO pipeline when condition log.info("start run pipeline {0}".format(pipeline.name)) context = {PIPELINE_UID: pipeline_status.uid} try: start = time.time() for stage in pipeline.stages: stage_run_status = StageRunStatus() stage_run_status.name = stage.name log.info("stage name {0}".format(stage.name)) for unit in stage.units: # TODO __check_when_condition # if unit.on is not None: # result = __check_when_condition(unit.on.children, data) # if result: # continue if unit.do is not None: unit_run_status = UnitRunStatus() for action in unit.do: func = find_action_type_func( convert_action_type(action.type), action, pipeline_topic) # call dynamic action in action folder # TODO [future] custom folder out_result, unit_action_status = func( data, context) log.debug("out_result :{0}".format(out_result)) context = {**context, **out_result} unit_run_status.actions.append(unit_action_status) stage_run_status.units.append(unit_run_status) else: log.info("action stage unit {0} do is None".format( stage.name)) elapsed_time = time.time() - start pipeline_status.stages.append(stage_run_status) pipeline_status.complete_time = elapsed_time pipeline_status.status = FINISHED log.info("pipeline_status {0} time :{1}".format( pipeline.name, elapsed_time)) except Exception as e: log.exception(e) pipeline_status.error = traceback.format_exc() pipeline_status.status = ERROR log.error(pipeline_status) finally: # log.info("insert_pipeline_monitor") if pipeline_topic.kind is not None and pipeline_topic.kind == pipeline_constants.SYSTEM: log.info("pipeline_status is {0}".format(pipeline_status)) else: sync_pipeline_monitor_data(pipeline_status)
def create_console_group_to_storage(group: ConsoleSpaceGroup): with get_client_db() as client: if group.groupId is None or check_fake_id(group.groupId): group.groupId = get_surrogate_key() client[WATCHMEN].get_collection(GROUP_COLLECTION).insert_one( group.dict()) return ConsoleSpaceGroup.parse_obj(group)
def create_topic_schema(topic): if topic.topicId is None or check_fake_id(topic.topicId): topic.topicId = get_surrogate_key() if type(topic) is not dict: topic = topic.dict() save_topic(topic) return Topic.parse_obj(topic)
def save_console_space(console_space: ConsoleSpace): if console_space.connectId is None or check_fake_id( console_space.connectId): console_space.connectId = get_surrogate_key() return create_console_space(console_space) else: return update_console_space(console_space)
def create_model_schema(model_schema_set, name, record, is_root): model_schema = model_schema_set.schemas.get(name) if model_schema is not None: for key, value in record.items(): if check_model_field_in_schema(key, model_schema): if check_value_type( value) == ValueType.LIST.value or check_value_type( value) == ValueType.DICT.value: create_schema(model_schema_set, key, value, False) # else: # if check_value_duplicate(model_schema.businessFields[key].values, value): # continue # else: # model_schema.businessFields[key].values.append(value) else: model_field = create_model_field(model_schema_set, model_schema, key, value) model_schema.businessFields[model_field.name] = model_field else: model_schema = ModelSchema() model_schema.model_id = get_surrogate_key() model_schema.name = name model_schema.isRoot = is_root # print(record) for key, value in record.items(): model_field = create_model_field(model_schema_set, model_schema, key, value) model_schema.businessFields[model_field.name] = model_field model_schema_set.schemas[model_schema.name] = model_schema
def process_variable(variable_name): if "{snowflake}" in variable_name: return SNOWFLAKE, get_surrogate_key() elif variable_name.startswith("{"): return MEMORY, variable_name.replace("{", "").replace("}", "") else: return parameter_constants.CONSTANT, variable_name
def create_user_group_storage(user_group: UserGroup): if user_group.userGroupId is None or check_fake_id(user_group.userGroupId): user_group.userGroupId = get_surrogate_key() if type(user_group) is not dict: user_group = user_group.dict() user_groups.insert_one(user_group) return user_group
def build_query_monitor_report(report: Report, query_type: str): query_monitor = QueryMonitor() query_monitor.queryUid = get_surrogate_key() query_source = QuerySource() query_source.name = report.name query_source.queryType = query_type query_monitor.querySource = query_source return query_monitor
def build_query_monitor(subject: ConsoleSpaceSubject, query_type: str): query_monitor = QueryMonitor() query_monitor.queryUid = get_surrogate_key() query_source = QuerySource() query_source.name = subject.name query_source.queryType = query_type query_monitor.querySource = query_source return query_monitor
def create_user_storage(user: User): if user.userId is None: user.userId = get_surrogate_key() user.password = get_password_hash(user.password) if type(user) is not dict: user = user.dict() users.insert_one(user) return user
def create_space(space: Space): if space.spaceId is None or check_fake_id(space.spaceId): space.spaceId = get_surrogate_key() if type(space) is not dict: space = space.dict() insert_space_to_storage(space) return space
async def save_report(subject_id: str, report: Report, current_user: User = Depends(deps.get_current_user)): report.reportId = get_surrogate_key() # report.subjectId = subject_id new_report = create_report(report) subject = load_console_subject_by_id(subject_id) subject.reportIds.append(report.reportId) update_console_subject(subject) return new_report
def create_raw_data_model_set(code, data): model_schema_set = ModelSchemaSet() model_schema_set.id = get_surrogate_key() model_schema_set.code = code model_schema_set.schemas = {} model_schema_set.relationships = {} create_schema(model_schema_set, code, data, True) # insert_data_schema(model_schema_set.dict()) return model_schema_set
def build_topic(model_schema_set: ModelSchemaSet): topic = Topic() topic.topicId = get_surrogate_key() topic.name = model_schema_set.code topic.type = "raw" topic.factors = [] parent = "" build_factors(topic.factors, parent, model_schema_set.schemas[topic.name], model_schema_set) create_topic_schema(topic)
def run_pipeline(pipeline: Pipeline, data): pipeline_status = PipelineRunStatus(pipelineId=pipeline.pipelineId, uid=get_surrogate_key(), startTime=datetime.now(), topicId=pipeline.pipelineId) pipeline_status.oldValue = data[pipeline_constants.OLD] pipeline_status.newValue = data[pipeline_constants.NEW] if pipeline.enabled: pipeline_topic = get_topic_by_id(pipeline.topicId) log.info("start run pipeline {0}".format(pipeline.name)) context = {PIPELINE_UID: pipeline_status.uid} if __check_condition(pipeline, pipeline_topic, data): try: start = time.time() for stage in pipeline.stages: if __check_condition(stage, pipeline_topic, data): stage_run_status = StageRunStatus() stage_run_status.name = stage.name log.info("stage name {0}".format(stage.name)) for unit in stage.units: if unit.do is not None and __check_condition(unit, pipeline_topic, data): unit_run_status = UnitRunStatus() for action in unit.do: func = find_action_type_func(convert_action_type(action.type), action, pipeline_topic) # call dynamic action in action folder # TODO [future] custom folder out_result, unit_action_status = func(data, context) log.debug("out_result :{0}".format(out_result)) context = {**context, **out_result} unit_run_status.actions.append(unit_action_status) stage_run_status.units.append(unit_run_status) else: log.info("action stage unit {0} do is None".format(stage.name)) pipeline_status.stages.append(stage_run_status) elapsed_time = time.time() - start pipeline_status.completeTime = elapsed_time pipeline_status.status = FINISHED log.info("pipeline_status {0} time :{1}".format(pipeline.name, elapsed_time)) except Exception as e: log.exception(e) pipeline_status.error = traceback.format_exc() pipeline_status.status = ERROR log.error(pipeline_status) finally: if pipeline_topic.kind is not None and pipeline_topic.kind == pipeline_constants.SYSTEM: log.debug("pipeline_status is {0}".format(pipeline_status)) else: print("sync pipeline monitor") watchmen.monitor.services.pipeline_monitor_service.sync_pipeline_monitor_data(pipeline_status)
def build_oracle_updates_expression_for_insert(table, updates): new_updates = {"id_": get_surrogate_key()} for key, value in updates.items(): if key == "$inc": if isinstance(value, dict): for k, v in value.items(): new_updates[k.lower()] = v elif key == "$set": if isinstance(value, dict): for k, v in value.items(): new_updates[k.lower()] = v else: new_updates[key] = value return new_updates
def save_enum_to_storage(enum: Enum): if check_fake_id(enum.enumId): enum.enumId = get_surrogate_key() items_copy = enum.items.copy() enum.items = [] result = template.create(ENUMS, enum, Enum) items = __add_enum_id(items_copy, result.enumId) save_enum_items_to_storage(items) return result else: items_copy = enum.items.copy() enum.items = [] items = __add_enum_id(items_copy, enum.enumId) save_enum_items_to_storage(items) return template.update_one(ENUMS, {"enumId": enum.enumId}, enum, Enum)
def raw_topic_data_insert_one(one, topic_name): if topic_name == "raw_pipeline_monitor": raw_pipeline_monitor_insert_one(one, topic_name) else: ''' table = Table('topic_' + topic_name, metadata, extend_existing=True, autoload=True, autoload_with=engine) ''' table_name = 'topic_' + topic_name table = get_topic_table_by_name(table_name) one_dict: dict = convert_to_dict(one) value = {'id_': get_surrogate_key(), 'data_': dumps(one_dict)} stmt = insert(table) with engine.connect() as conn: conn.execute(stmt, value)
def create_raw_data_model_set(code, data): model_schema_set = get_model_schema_set_by_code(code) if model_schema_set is not None: create_schema(model_schema_set, code, data, True) update_data_schema(model_schema_set.id, model_schema_set.dict()) else: model_schema_set = ModelSchemaSet() model_schema_set.id = get_surrogate_key() model_schema_set.code = code model_schema_set.schemas = {} model_schema_set.relationships = {} create_schema(model_schema_set, code, data, True) insert_data_schema(model_schema_set.dict()) return model_schema_set
def raw_topic_data_insert_(data, topic_name): ''' table = Table('topic_' + topic_name, metadata, extend_existing=True, autoload=True, autoload_with=engine) ''' table_name = 'topic_' + topic_name table = get_topic_table_by_name(table_name) values = [] for instance in data: instance_dict: dict = convert_to_dict(instance) value = {'id_': get_surrogate_key(), 'data_': dumps(instance_dict)} values.append(value) stmt = insert(table) with engine.connect() as conn: conn.execute(stmt, values)
def create_model_field(model_schema_set, model_schema, key, value): model_filed = ModelField( **{ 'field_id': get_surrogate_key(), 'name': key, 'type': check_value_type(value), 'values': [value] }) if model_filed.type == ValueType.LIST.value or model_filed.type == ValueType.DICT.value: relationship = ModelRelationship() create_schema(model_schema_set, key, value, False) relationship.parentId = model_schema.model_id relationship.parentName = model_schema.name relationship.childId = model_schema_set.schemas[key].model_id relationship.childName = model_schema_set.schemas[key].name model_schema_set.relationships[key] = relationship return model_filed
async def create_console_subject(connect_id, subject: ConsoleSpaceSubject = Body(...), current_user: User = Depends(deps.get_current_user)): if check_fake_id(subject.subjectId): subject.subjectId = None console_space = load_console_space_by_id(connect_id) for report in subject.reports: report.reportId = get_surrogate_key() subject.reportIds.append(report.reportId) subject = create_console_subject_to_storage(subject) console_space.subjectIds.append(subject.subjectId) save_console_space(console_space) return subject else: raise Exception("id is not fake ID")
def run_pipeline(pipeline: Pipeline, data): pipeline_status = PipelineRunStatus( pipelineId=pipeline.pipelineId, uid=get_surrogate_key(), startTime=datetime.now().replace(tzinfo=None), topicId=pipeline.pipelineId) pipeline_status.oldValue = data[pipeline_constants.OLD] pipeline_status.newValue = data[pipeline_constants.NEW] if pipeline.enabled: pipeline_topic = get_topic_by_id(pipeline.topicId) log.info("start run pipeline {0}".format(pipeline.name)) context = {PIPELINE_UID: pipeline_status.uid} start = time.time() if __check_condition(pipeline, pipeline_topic, data, context): try: pipeline_trigger_merge_list = [] for stage in pipeline.stages: if __check_condition(stage, pipeline_topic, data, context): stage_run_status = StageRunStatus(name=stage.name) log.info("stage name {0}".format(stage.name)) context, pipeline_trigger_merge_list = run_unit( context, data, pipeline_status, pipeline_topic, pipeline_trigger_merge_list, stage, stage_run_status) elapsed_time = time.time() - start pipeline_status.completeTime = elapsed_time pipeline_status.status = FINISHED log.debug("pipeline_status {0} time :{1}".format( pipeline.name, elapsed_time)) if pipeline_topic.kind is None or pipeline_topic.kind != pipeline_constants.SYSTEM: __trigger_all_pipeline(pipeline_trigger_merge_list) except Exception as e: log.exception(e) pipeline_status.error = traceback.format_exc() pipeline_status.status = ERROR log.error(pipeline_status) finally: if pipeline_topic.kind is not None and pipeline_topic.kind == pipeline_constants.SYSTEM: log.debug("pipeline_status is {0}".format(pipeline_status)) else: watchmen.monitor.services.pipeline_monitor_service.sync_pipeline_monitor_data( pipeline_status)
def build_factors(factors: list, parent: str, model_schema: ModelSchema, model_schema_set: ModelSchemaSet): for key, value in model_schema.businessFields.items(): if value.type == "array" or value.type == "dict": if parent == "": parent = key else: parent = parent + "." + key build_factors(factors, parent, model_schema_set.schemas[key], model_schema_set) else: factor = Factor() if parent != "": factor.name = parent + "." + key else: factor.name = key factor.type = value.type factor.factorId = get_surrogate_key() factor.label = key factors.append(factor)
def topic_data_insert_one(one, topic_name): if check_topic_type_is_raw(topic_name): raw_topic_data_insert_one(one, topic_name) else: ''' table = Table('topic_' + topic_name, metadata, extend_existing=True, autoload=True, autoload_with=engine) ''' table_name = 'topic_' + topic_name table = get_topic_table_by_name(table_name) # one_dict: dict = convert_to_dict(one) one_dict: dict = capital_to_lower(convert_to_dict(one)) value = {} for key in table.c.keys(): if key == "id_": value[key] = get_surrogate_key() else: value[key] = one_dict.get(key) stmt = insert(table) with engine.connect() as conn: conn.execute(stmt, value)
def raw_pipeline_monitor_insert_one(one, topic_name): table_name = 'topic_' + topic_name table = get_topic_table_by_name(table_name) one_dict: dict = convert_to_dict(one) one_lower_dict = capital_to_lower(one_dict) value = {} for key in table.c.keys(): if key == "id_": value[key] = get_surrogate_key() elif key == "data_": value[key] = dumps(one_dict) else: if isinstance(table.c[key].type, CLOB): if one_lower_dict.get(key) is not None: value[key] = dumps(one_lower_dict.get(key)) else: value[key] = None else: value[key] = one_lower_dict.get(key) stmt = insert(table) with engine.connect() as conn: conn.execute(stmt, value)
def save_enum_items_to_storage(items: List[EnumItem]): for item in items: if item.itemId is None or check_fake_id(item.itemId): item.itemId = get_surrogate_key() # template.create_or_update(ENUM_ITEMS, {"itemId": item.itemId}, item, EnumItem) upsert_({"itemId": item.itemId}, item, EnumItem, ENUM_ITEMS)
def create_user_storage(user: User): if user.userId is None: user.userId = get_surrogate_key() user.password = get_password_hash(user.password) return template.create(USERS, user, User)
def create_console_subject_to_storage(subject: ConsoleSpaceSubject): if subject.subjectId is None or check_fake_id(subject.subjectId): subject.subjectId = get_surrogate_key() return template.create(CONSOLE_SPACE_SUBJECTS, subject, ConsoleSpaceSubject)