Beispiel #1
0
    def merge_or_insert_topic(instance, context):
        raw_data, old_value = instance[pipeline_constants.NEW], instance[
            pipeline_constants.OLD]
        unit_action_status = UnitStatus(type=action.type)
        start = time.time()
        pipeline_uid = context[PIPELINE_UID]
        unit_action_status.uid = pipeline_uid

        if action.topicId is None:
            raise ValueError("action.topicId is empty {0}".format(action.name))

        target_topic = get_topic_by_id(action.topicId)
        mapping_results, mapping_logs = run_mapping_rules(
            action.mapping, target_topic, raw_data, pipeline_topic)
        joint_type, where_condition = build_query_conditions(
            action.by, pipeline_topic, raw_data, target_topic, context)
        for index in range(len(mapping_results)):
            mongo_query = __build_mongo_query(joint_type, where_condition)
            target_data = query_topic_data(mongo_query, target_topic.name)
            if target_data is None:
                insert_topic_data(target_topic.name, mapping_results[index],
                                  pipeline_uid)
                unit_action_status.insertCount = unit_action_status.insertCount + 1
            else:
                update_topic_data(target_topic.name, mapping_results[index],
                                  target_data, pipeline_uid)
                unit_action_status.updateCount = unit_action_status.updateCount + 1

        unit_action_status.mapping = mapping_logs
        elapsed_time = time.time() - start
        unit_action_status.complete_time = elapsed_time
        return context, unit_action_status
Beispiel #2
0
    def read_factor(instance, context):
        raw_data, old_value = instance[pipeline_constants.NEW], instance[pipeline_constants.OLD]
        unit_action_status = ReadFactorAction(type=action.type)
        start = time.time()
        # print("context",context)
        variable_type, context_target_name = process_variable(action.variableName)
        topic = get_topic_by_id(action.topicId)
        factor = get_factor(action.factorId, topic)
        joint_type, where_condition = build_query_conditions(action.by, pipeline_topic, raw_data, topic, context)
        mongo_query = __build_mongo_query(joint_type, where_condition)
        target_data = query_topic_data(mongo_query, topic.name)
        if target_data is not None:
            if factor.name in target_data:
                read_value = target_data[factor.name]
                if factor.name in context:
                    log.warn("factor name {0} is already in context".format(factor.name))

                context[context_target_name] = target_data[factor.name]
                unit_action_status.value = read_value
        else:
            context[context_target_name] = convert_factor_type(factor.defaultValue, factor.type)
            log.warn("target_data is empty ,conditions {0}".format(mongo_query))

        elapsed_time = time.time() - start
        unit_action_status.complete_time = elapsed_time
        # print("read context",context)
        return context, unit_action_status, []
    def merge_topic():
        # begin time
        start = time.time()

        # create action status monitor
        status = ActionStatus()
        status.type = "merge-row"
        status.uid = action_context.get_pipeline_id()

        previous_data = action_context.previousOfTriggerData
        current_data = action_context.currentOfTriggerData
        action = action_context.action
        if action.topicId is None:
            raise ValueError("action.topicId is empty {0}".format(
                action.topicId))

        pipeline_topic = action_context.unitContext.stageContext.pipelineContext.pipelineTopic
        target_topic = get_topic_by_id(action.topicId)

        variables = get_variables(action_context)

        # if there are aggregate functions, need lock the record to update
        mappings_results, having_aggregate_functions = parse_mappings(
            action.mapping, target_topic, previous_data, current_data,
            variables)
        status.value = mappings_results

        where_ = parse_parameter_joint(action.by, current_data, variables,
                                       pipeline_topic, target_topic)
        status.by = where_

        trigger_pipeline_data_list = []

        target_data = query_topic_data(where_, target_topic,
                                       action_context.get_current_user())
        if target_data is None:
            raise Exception("can't insert data in merge row action ")
        else:
            if target_topic.type == "aggregate":
                args = [
                    mappings_results, where_, target_topic,
                    action_context.get_current_user()
                ]
                retry_callback = (update_retry_callback, args)
                recovery_callback = (update_recovery_callback, args)
                execute_ = retry_template(retry_callback, recovery_callback,
                                          RetryPolicy())
                result = execute_()
                trigger_pipeline_data_list.append(result)
            else:
                trigger_pipeline_data_list.append(
                    update_topic_data_one(mappings_results, target_data,
                                          action_context.get_pipeline_id(),
                                          target_data[get_id_name()],
                                          target_topic,
                                          action_context.get_current_user()))
        status.updateCount = status.updateCount + 1
        elapsed_time = time.time() - start
        status.completeTime = elapsed_time
        return status, trigger_pipeline_data_list
Beispiel #4
0
def run_pipeline(pipeline: Pipeline, data):
    pipeline_status = PipelineRunStatus(pipelineId=pipeline.pipelineId,
                                        uid=get_surrogate_key(),
                                        start_time=datetime.now())

    if pipeline.enabled:
        pipeline_topic = get_topic_by_id(pipeline.topicId)
        # TODO pipeline when  condition
        log.info("start run pipeline {0}".format(pipeline.name))
        context = {PIPELINE_UID: pipeline_status.uid}

        try:
            start = time.time()
            for stage in pipeline.stages:
                stage_run_status = StageRunStatus()
                stage_run_status.name = stage.name
                log.info("stage name {0}".format(stage.name))
                for unit in stage.units:
                    # TODO __check_when_condition
                    # if unit.on is not None:
                    #     result = __check_when_condition(unit.on.children, data)
                    #     if result:
                    #         continue

                    if unit.do is not None:
                        unit_run_status = UnitRunStatus()
                        for action in unit.do:
                            func = find_action_type_func(
                                convert_action_type(action.type), action,
                                pipeline_topic)
                            # call dynamic action in action folder
                            # TODO [future] custom folder
                            out_result, unit_action_status = func(
                                data, context)
                            log.debug("out_result :{0}".format(out_result))
                            context = {**context, **out_result}
                            unit_run_status.actions.append(unit_action_status)
                        stage_run_status.units.append(unit_run_status)
                    else:
                        log.info("action stage unit  {0} do is None".format(
                            stage.name))

            elapsed_time = time.time() - start
            pipeline_status.stages.append(stage_run_status)
            pipeline_status.complete_time = elapsed_time
            pipeline_status.status = FINISHED
            log.info("pipeline_status {0} time :{1}".format(
                pipeline.name, elapsed_time))

        except Exception as e:
            log.exception(e)
            pipeline_status.error = traceback.format_exc()
            pipeline_status.status = ERROR
            log.error(pipeline_status)
        finally:
            # log.info("insert_pipeline_monitor")
            if pipeline_topic.kind is not None and pipeline_topic.kind == pipeline_constants.SYSTEM:
                log.info("pipeline_status is {0}".format(pipeline_status))
            else:
                sync_pipeline_monitor_data(pipeline_status)
def insert_or_update(action, already_see, pipeline_graph, pipeline_node):
    topic = get_topic_by_id(action.topicId)
    topic_node = buildTopicNode(topic)
    if topic_node.id in already_see:
        pass
    else:
        pipeline_graph.nodes.append(topic_node)
        already_see.append(topic_node.id)
    relationship_properties = buildRelationShipProperties({"type": "pipeline_to_topic"})
    relationship = buildRelationShip(pipeline_node, topic_node, relationship_properties)
    pipeline_graph = add_edge(pipeline_graph, relationship)
    for map_ in action.mapping:
        factor = getFactorFromTopic(topic, map_.factorId)
        if factor is None:
            continue
        factor_node = buildFactorNode(factor)
        if factor_node.name == "changeId":
            pass
        else:
            if factor_node.id in already_see:
                pass
            else:
                pipeline_graph.nodes.append(factor_node)
                already_see.append(factor_node)
            relationship_properties = buildRelationShipProperties({"type": "pipeline_to_factor"})
            relationship = buildRelationShip(pipeline_node, factor_node, relationship_properties)
            pipeline_graph = add_edge(pipeline_graph, relationship)
Beispiel #6
0
async def import_topic(topic: Topic):
    result = get_topic_by_id(topic.topicId)
    if result is None:
        # import_topic_to_db(topic)
        create_topic_schema(topic)
    else:
        update_topic_schema(topic.topicId, topic)
Beispiel #7
0
    def insert_topic(instance, context):
        raw_data, old_value = instance[pipeline_constants.NEW], instance[
            pipeline_constants.OLD]
        unit_action_status = InsertAction(type=action.type)
        start = time.time()
        pipeline_uid = context[PIPELINE_UID]
        unit_action_status.uid = pipeline_uid
        # print(json.loads(raw_data))
        if action.topicId is None:
            raise ValueError("action.topicId is empty {0}".format(action.name))

        target_topic = get_topic_by_id(action.topicId)
        log.info("run target_topic {0}".format(target_topic.name))
        mapping_results = run_mapping_rules(action.mapping, target_topic,
                                            raw_data, pipeline_topic, context)
        log.info("mapping_results:{0}".format(mapping_results))
        unit_action_status.mapping = mapping_results
        trigger_pipeline_data_list = []
        for index, item in enumerate(mapping_results):
            trigger_pipeline_data_list.append(
                insert_topic_data(target_topic.name, item, pipeline_uid))
            unit_action_status.insertCount = unit_action_status.insertCount + 1

        elapsed_time = time.time() - start
        unit_action_status.complete_time = elapsed_time
        return context, unit_action_status, trigger_pipeline_data_list
Beispiel #8
0
    def merge_topic(instance, context):
        raw_data, old_value = instance[pipeline_constants.NEW], instance[pipeline_constants.OLD]
        # unit_action_status = UnitActionStatus(type=action.type)
        unit_action_status = MergeRowAction(type=action.type)
        start = time.time()
        pipeline_uid = context[PIPELINE_UID]
        unit_action_status.uid = pipeline_uid

        if action.topicId is None:
            raise ValueError("action.topicId is empty {0}".format(action.name))

        target_topic = get_topic_by_id(action.topicId)
        mapping_results = run_mapping_rules(action.mapping, target_topic, raw_data, pipeline_topic, context)
        joint_type, where_condition = build_query_conditions(action.by, pipeline_topic, raw_data, target_topic, context)
        unit_action_status.whereConditions = where_condition
        unit_action_status.mapping = mapping_results
        trigger_pipeline_data_list = []
        for index, mapping_result in enumerate(mapping_results):
            mongo_query = __build_mongo_query(joint_type, index_conditions(where_condition, index))
            target_data = query_topic_data(mongo_query, target_topic.name)
            if target_data is None:
                raise Exception("can't insert data in merge row action ")
            else:
                trigger_pipeline_data_list.append(
                    update_topic_data(target_topic.name, mapping_result, target_data, pipeline_uid, mongo_query))
                unit_action_status.updateCount = unit_action_status.updateCount + 1

        elapsed_time = time.time() - start
        unit_action_status.complete_time = elapsed_time
        return context, unit_action_status, trigger_pipeline_data_list
Beispiel #9
0
    def exists():
        # begin time
        start = time.time()

        # create action status monitor
        status = ActionStatus()
        status.type = "exists"
        status.uid = action_context.get_pipeline_id()

        previous_data = action_context.previousOfTriggerData
        current_data = action_context.currentOfTriggerData
        action = action_context.action
        pipeline_topic = action_context.get_pipeline_context().pipelineTopic
        target_topic = get_topic_by_id(action.topicId)
        variables = get_variables(action_context)

        where_ = parse_parameter_joint(action.by, current_data, variables,
                                       pipeline_topic, target_topic)
        status.by = where_

        target_data = query_topic_data(where_, target_topic,
                                       action_context.get_current_user())

        if target_data is not None:
            set_variable(action_context, action.variableName, 'true')
        else:
            set_variable(action_context, action.variableName, 'false')

        elapsed_time = time.time() - start
        status.completeTime = elapsed_time
        return status, []
def get_topic_sub_query_with_space_filter(console_subject, current_user):
    console_space = load_console_space_by_subject_id(console_subject.subjectId,
                                                     current_user)
    filters: List[SpaceFilter] = get_filters_by_id(console_space.spaceId,
                                                   current_user)
    if filters is None:
        filters = []
    topic_sub_query = {}
    for filter in filters:
        if filter.enabled:
            topic = get_topic_by_id(filter.topicId)
            table = build_table_by_topic_id(filter.topicId)
            sub_query = PrestoQuery. \
                from_(table). \
                select('*'). \
                where(build_space_filter_where(filter.joint))
            # where(table.tenant_id_ == current_user.tenantId). \
            topic_sub_query[filter.topicId] = {
                "alias": topic.name,
                "query": sub_query
            }

    def get_topic_sub_query_by_topic_id(topic_id):
        return topic_sub_query.get(topic_id, None)

    return get_topic_sub_query_by_topic_id
Beispiel #11
0
    def insert_topic(instance, context):
        raw_data, old_value = instance[pipeline_constants.NEW], instance[
            pipeline_constants.OLD]
        unit_action_status = UnitStatus(type=action.type)
        start = time.time()
        pipeline_uid = context[PIPELINE_UID]
        unit_action_status.uid = pipeline_uid

        if action.topicId is None:
            raise ValueError("action.topicId is empty {0}".format(action.name))
        insert_action = InsertAction()
        target_topic = get_topic_by_id(action.topicId)
        mapping_results, mapping_logs = run_mapping_rules(
            action.mapping, target_topic, raw_data, pipeline_topic)

        # for i enumerate(mapping_results)
        for count, item in enumerate(mapping_results):
            # print(count, item)
            insert_topic_data(target_topic.name, item, pipeline_uid)
            unit_action_status.insertCount = unit_action_status.insertCount + 1

        # for index in range(len(mapping_results)):
        insert_action.mapping = mapping_logs

        # unit_action_status.mapping = mapping_logs
        elapsed_time = time.time() - start
        unit_action_status.action = insert_action
        unit_action_status.complete_time = elapsed_time
        return context, unit_action_status
Beispiel #12
0
def parse_parameter(parameter_: Parameter):
    if parameter_.kind == "topic":
        topic = get_topic_by_id(parameter_.topicId)
        topic_name = build_collection_name(topic.name)
        factor = get_factor(parameter_.factorId, topic)
        factor_name = factor.name
        return f'{factor_name.upper()}'
    elif parameter_.kind == 'constant':
        return parameter_.value
    elif parameter_.kind == 'computed':
        if parameter_.type == Operator.add:
            result = None
            for item in parameter_.parameters:
                if result:
                    next_ = parse_parameter(item)
                    result = f'{result}+{next_}'
                else:
                    result = parse_parameter(item)
            return result
        elif parameter_.type == Operator.subtract:
            result = None
            for item in parameter_.parameters:
                if result:
                    next_ = parse_parameter(item)
                    result = f'{result}-{next_}'
                else:
                    result = parse_parameter(item)
            return result
        elif parameter_.type == Operator.multiply:
            result = None
            for item in parameter_.parameters:
                if result:
                    next_ = parse_parameter(item)
                    result = f'{result}*{next_}'
                else:
                    result = parse_parameter(item)
            return result
        elif parameter_.type == Operator.divide:
            result = None
            for item in parameter_.parameters:
                if result:
                    next_ = parse_parameter(item)
                    result = f'{result}/{next_}'
                else:
                    result = parse_parameter(item)
            return result
        elif parameter_.type == Operator.modulus:
            result = None
            for item in parameter_.parameters:
                if result:
                    next_ = parse_parameter(item)
                    result = f'{result}%{next_}'
                else:
                    result = parse_parameter(item)
            return result
        elif parameter_.type == "case-then":
            return parse_oracle_case_then(parameter_.parameters)
        else:
            raise Exception("operator is not supported")
def build_table_by_topic_id(topic_id) -> Table:
    topic = get_topic_by_id(topic_id)
    topic_col_name = build_collection_name(topic.name)
    datasource: DataSource = load_data_source_by_id(topic.dataSourceId)
    catalog_name = datasource.dataSourceCode
    schema_name = datasource.name
    schema = Schema(schema_name, LiteralValue(catalog_name))
    return Table(topic_col_name, schema)
Beispiel #14
0
    def read_factor():
        # begin time
        start = time.time()

        # create action status monitor
        status = ActionStatus()
        status.type = "read-factor"
        status.uid = action_context.unitContext.stageContext.pipelineContext.pipeline.pipelineId

        previous_data = action_context.previousOfTriggerData
        current_data = action_context.currentOfTriggerData
        action = action_context.action

        pipeline_topic = action_context.unitContext.stageContext.pipelineContext.pipelineTopic
        target_topic = get_topic_by_id(action.topicId)
        variables = get_variables(action_context)

        where_ = parse_parameter_joint(action.by, current_data, variables, pipeline_topic, target_topic)
        status.by = where_

        target_factor = get_factor(action.factorId, target_topic)

        if action.arithmetic == "none" or action.arithmetic is None:
            target_data = query_topic_data(where_, target_topic, action_context.get_current_user())
            if target_data is not None:
                if isinstance(target_data, list):
                    raise ValueError("read factor action should just get one factor record")
                else:
                    read_value = target_data[target_factor.name]
                    set_variable(action_context, action.variableName, read_value)
                    status.value = read_value
            else:
                raise ValueError("read factor action must match one factor record")
        else:
            read_value = None
            if action.arithmetic == "sum":
                read_value = query_topic_data_aggregate(where_,
                                                        {target_factor.name: "sum"},
                                                        target_topic, action_context.get_current_user())
            elif action.arithmetic == "count":
                read_value = query_topic_data_aggregate(where_,
                                                        {target_factor.name: "count"},
                                                        target_topic, action_context.get_current_user())
            elif action.arithmetic == "avg":
                read_value = query_topic_data_aggregate(where_,
                                                        {target_factor.name: "avg"},
                                                        target_topic,
                                                        action_context.get_current_user())
            if read_value is not None:
                set_variable(action_context, action.variableName, read_value)
            else:
                raise ValueError("read factor action must match one factor record at least")

            status.value = read_value

        elapsed_time = time.time() - start
        status.completeTime = elapsed_time
        return status, []
def __get_factor_name_by_alias(column_name_list, console_subject):
    factor_name_list = []
    for column_name in column_name_list:
        column = __find_column_by_alias(column_name,
                                        console_subject.dataset.columns)
        factor = get_factor(column.parameter.factorId,
                            get_topic_by_id(column.parameter.topicId))
        factor_name_list.append(factor.name)
    return factor_name_list
Beispiel #16
0
    def write_factor(instance, context):
        raw_data, old_value = instance[pipeline_constants.NEW], instance[
            pipeline_constants.OLD]
        unit_action_status = WriteFactorAction(type=action.type)
        start = time.time()

        if action.topicId is not None:
            target_topic = get_topic_by_id(action.topicId)
            # todo for find factor
            # factor_dict = build_factor_dict(target_topic)
            conditions = action.by
            joint_type, where_condition = build_query_conditions(
                conditions, pipeline_topic, raw_data, target_topic, context)
            target_factor = get_factor(action.factorId, target_topic)
            # if action.source.kind

            source_value_list = __run_arithmetic(
                action.arithmetic,
                get_source_value_list(pipeline_topic, raw_data, action.source,
                                      target_factor, context))

            update_data = {target_factor.name: source_value_list}
            mongo_query = __build_mongo_query(joint_type, where_condition)
            condition_factors = {
                "$set":
                get_condition_factor_value(raw_data, where_condition,
                                           joint_type)
            }
            trigger_pipeline_data_list = []
            target_data = query_topic_data(mongo_query, target_topic.name)
            if old_value is not None:
                old_value_list = get_source_value_list(pipeline_topic,
                                                       old_value,
                                                       action.source,
                                                       target_factor, context)
                trigger_pipeline_data_list.append(
                    find_and_modify_topic_data(
                        target_topic.name, mongo_query,
                        __merge_condition_factor(
                            __build_mongo_update(update_data,
                                                 action.arithmetic,
                                                 target_factor,
                                                 old_value_list),
                            condition_factors), target_data))
            else:
                trigger_pipeline_data_list.append(
                    find_and_modify_topic_data(
                        target_topic.name, mongo_query,
                        __merge_condition_factor(
                            __build_mongo_update(update_data,
                                                 action.arithmetic,
                                                 target_factor),
                            condition_factors), target_data))

        elapsed_time = time.time() - start
        unit_action_status.complete_time = elapsed_time
        return context, unit_action_status, trigger_pipeline_data_list
Beispiel #17
0
    def write_factor(instance, context):
        raw_data, old_value = instance[pipeline_constants.NEW], instance[
            pipeline_constants.OLD]
        unit_action_status = UnitStatus(type=action.type)
        start = time.time()
        pipeline_uid = context[PIPELINE_UID]
        # TODO  action_log

        # action_log = WriteFactorAction()

        if action.topicId is not None:
            target_topic = get_topic_by_id(action.topicId)
            # todo for find factor
            factor_dict = build_factor_dict(target_topic)
            conditions = action.by
            joint_type, where_condition = build_query_conditions(
                conditions, pipeline_topic, raw_data, target_topic, context)
            source_value_list = get_source_value_list(pipeline_topic, raw_data,
                                                      action.source)
            target_factor = get_factor(action.factorId, target_topic)
            update_data = {target_factor.name: source_value_list}
            mongo_query = __build_mongo_query(joint_type, where_condition)
            target_data = query_topic_data(mongo_query, target_topic.name)
            if target_data is None:
                condition_factors = get_condition_factor_value(
                    raw_data, where_condition, joint_type)
                insert_data = {
                    **{
                        target_factor.name: source_value_list
                    },
                    **condition_factors
                }
                log.info("Insert data : {0}".format(insert_data))
                insert_topic_data(target_topic.name, insert_data, pipeline_uid)
            else:

                if old_value is not None:
                    old_value_list = get_source_value_list(
                        pipeline_topic, old_value, action.source)
                    # def_value_list = source_value_list - old_value_list
                    # update_data = {target_factor.name: def_value_list}
                    find_and_modify_topic_data(
                        target_topic.name, mongo_query,
                        __build_mongo_update(update_data, action.arithmetic,
                                             target_factor, old_value_list),
                        target_data)
                else:
                    find_and_modify_topic_data(
                        target_topic.name, mongo_query,
                        __build_mongo_update(update_data, action.arithmetic,
                                             target_factor, None), target_data)

        elapsed_time = time.time() - start
        unit_action_status.complete_time = elapsed_time
        return context, unit_action_status
def parse_parameter(parameter: Parameter, factor=None):
    if parameter.kind == "topic":
        topic = get_topic_by_id(parameter.topicId)
        topic_col_name = build_collection_name(topic.name)
        factor = get_factor(parameter.factorId, topic)
        return Table(topic_col_name)[factor.name]
    elif parameter.kind == 'constant':
        # if factor.type =="text":
        #     return "\'"+parameter.value+"\'"
        # else:
        return parameter.value
    elif parameter.kind == 'computed':
        if parameter.type == Operator.add:
            result = None
            for item in parameter.parameters:
                if result:
                    result = operator.add(result, parse_parameter(item))
                else:
                    result = parse_parameter(item)
            return result
        elif parameter.type == Operator.subtract:
            result = None
            for item in parameter.parameters:
                if result:
                    result = operator.sub(result, parse_parameter(item))
                else:
                    result = parse_parameter(item)
            return result
        elif parameter.type == Operator.multiply:
            result = None
            for item in parameter.parameters:
                if result:
                    result = operator.mul(result, parse_parameter(item))
                else:
                    result = parse_parameter(item)
            return result
        elif parameter.type == Operator.divide:
            result = None
            for item in parameter.parameters:
                if result:
                    result = operator.truediv(result, parse_parameter(item))
                else:
                    result = parse_parameter(item)
            return result
        elif parameter.type == Operator.modulus:
            result = None
            for item in parameter.parameters:
                if result:
                    result = operator.mod(result, parse_parameter(item))
                else:
                    result = parse_parameter(item)
            return result
        else:
            # TODO more operator support
            raise Exception("operator is not supported")
def buildPipelineGraph(pipeline, already_see, pipeline_graph):
    pipeline_node = buildPipelineNode(pipeline)
    pipeline_graph.nodes.append(pipeline_node)
    if get_topic_by_id(pipeline.topicId) is None:
        return
    source_topic_node = buildTopicNode(get_topic_by_id(pipeline.topicId))
    if source_topic_node.id in already_see:
        pass
    else:
        pipeline_graph.nodes.append(source_topic_node)
        already_see.append(source_topic_node.id)
    relationship_properties = buildRelationShipProperties({"type": "topic_to_pipeline"})
    relationship = buildRelationShip(source_topic_node, pipeline_node, relationship_properties)
    pipeline_graph = add_edge(pipeline_graph, relationship)
    for stage in pipeline.stages:
        for unit in stage.units:
            for action in unit.do:
                if action.type == pipeline_constants.INSERT_OR_MERGE_ROW:
                    insert_or_update(action, already_see, pipeline_graph, pipeline_node)
                elif action.type == pipeline_constants.COPY_TO_MEMORY:
                    pass
                elif action.type == pipeline_constants.INSERT_ROW:
                    insert_or_update(action, already_see, pipeline_graph, pipeline_node)
                elif action.type == pipeline_constants.WRITE_FACTOR:
                    insert_or_update(action, already_see, pipeline_graph, pipeline_node)
                elif action.type == pipeline_constants.MERGE_ROW:
                    insert_or_update(action, already_see, pipeline_graph, pipeline_node)
                elif action.type == pipeline_constants.READ_ROW:
                    read_(action, already_see, pipeline_graph, pipeline_node)
                elif action.type == pipeline_constants.READ_ROWS:
                    read_(action, already_see, pipeline_graph, pipeline_node)
                elif action.type == pipeline_constants.READ_FACTORS:
                    read_(action, already_see, pipeline_graph, pipeline_node)
                elif action.type == pipeline_constants.READ_FACTOR:
                    read_(action, already_see, pipeline_graph, pipeline_node)
                elif action.type == pipeline_constants.EXIST:
                    pass
                elif action.type == pipeline_constants.ALARM:
                    pass
                else:
                    raise ("action not support:" + action.type)
Beispiel #20
0
def _filter_criterion(filter: Filter) -> any:
    left = parse_parameter(filter.left)

    topic = get_topic_by_id(filter.left.topicId)
    factor = get_factor(filter.left.factorId, topic)
    # print("left", left)
    # print("type",type(left))
    right = parse_parameter(filter.right,factor)
    # print("right", right)
    # print("type", type(right))
    if filter.operator == "equals":
        # if right.isdigit():
        #     return operator.eq(left, int(right))
        # else:
        return operator.eq(left, right)
    elif filter.operator == "not-equals":
        if right.isdigit():
            return left.__ne__(int(right))
        else:
            return left.__ne__(right)
    elif filter.operator == 'empty':
        return left.isnull()
    elif filter.operator == 'not-empty':
        return left.notnull()
    elif filter.operator == "more":
        return operator.gt(left, int(right))
    elif filter.operator == "more-equals":
        return operator.ge(left, int(right))
    elif filter.operator == "less":
        return operator.lt(left, int(right))
    elif filter.operator == "less-equals":
        return operator.le(left, int(right))
    elif filter.operator == 'in':
        value_list = right.split(',')
        values: List = []
        for value in value_list:
            if value.isdigit():
                values.append(int(value))
            else:
                values.append(value)
        return left.isin(values)
    elif filter.operator == 'not-in':
        value_list = right.split(',')
        values: List = []
        for value in value_list:
            if value.isdigit():
                values.append(int(value))
            else:
                values.append(value)
        return left.notin(values)
    else:
        # TODO more operator support
        raise Exception("operator is not supported")
def _parse_parameter(parameter_: Parameter):
    if parameter_.kind == "topic":
        topic = get_topic_by_id(parameter_.topicId)
        # topic_name = build_collection_name(topic.name)
        factor = get_factor(parameter_.factorId, topic)
        return f'${factor.name}'
    elif parameter_.kind == 'constant':
        return parameter_.value
    elif parameter_.kind == 'computed':
        if parameter_.type == Operator.add:
            result = None
            for item in parameter_.parameters:
                if result:
                    result = {"$add": [result, _parse_parameter(item)]}
                else:
                    result = _parse_parameter(item)
            return result
        elif parameter_.type == Operator.subtract:
            result = None
            for item in parameter_.parameters:
                if result:
                    result = {"$subtract": [result, _parse_parameter(item)]}
                else:
                    result = _parse_parameter(item)
            return result
        elif parameter_.type == Operator.multiply:
            result = None
            for item in parameter_.parameters:
                if result:
                    result = {"$multiply": [result, _parse_parameter(item)]}
                else:
                    result = _parse_parameter(item)
            return result
        elif parameter_.type == Operator.divide:
            result = None
            for item in parameter_.parameters:
                if result:
                    result = {"$divide": [result, _parse_parameter(item)]}
                else:
                    result = _parse_parameter(item)
            return result
        elif parameter_.type == Operator.modulus:
            result = None
            for item in parameter_.parameters:
                if result:
                    result = {"$mod": [result, _parse_parameter(item)]}
                else:
                    result = _parse_parameter(item)
            return result
        elif parameter_.type == "case-then":
            return parse_mongo_case_then(parameter_.parameters)
        else:
            raise Exception("operator is not supported")
async def build_pipeline_index_list(pipeline: Pipeline, pipeline_index_dict, current_user):
    source_topic = get_topic_by_id(pipeline.topicId, current_user)
    pipeline_index_list = []
    temporary_context_dict = {}
    for pipeline_stage in pipeline.stages:
        for pipeline_unit in pipeline_stage.units:
            for action in pipeline_unit.do:
                result_dict = __process_by_action_type(action, pipeline, pipeline_stage, pipeline_unit,
                                                       pipeline_index_dict, temporary_context_dict, current_user)
                if result_dict:
                    for index in result_dict.values():
                        pipeline_index_list.append(index)
    return pipeline_index_list
def run_pipeline(pipeline: Pipeline, data):
    pipeline_status = PipelineRunStatus(pipelineId=pipeline.pipelineId, uid=get_surrogate_key(),
                                        startTime=datetime.now(), topicId=pipeline.pipelineId)
    pipeline_status.oldValue = data[pipeline_constants.OLD]
    pipeline_status.newValue = data[pipeline_constants.NEW]

    if pipeline.enabled:
        pipeline_topic = get_topic_by_id(pipeline.topicId)
        log.info("start run pipeline {0}".format(pipeline.name))
        context = {PIPELINE_UID: pipeline_status.uid}
        if __check_condition(pipeline, pipeline_topic, data):
            try:
                start = time.time()
                for stage in pipeline.stages:
                    if __check_condition(stage, pipeline_topic, data):
                        stage_run_status = StageRunStatus()
                        stage_run_status.name = stage.name
                        log.info("stage name {0}".format(stage.name))
                        for unit in stage.units:
                            if unit.do is not None and __check_condition(unit, pipeline_topic, data):
                                unit_run_status = UnitRunStatus()
                                for action in unit.do:
                                    func = find_action_type_func(convert_action_type(action.type), action,
                                                                 pipeline_topic)
                                    # call dynamic action in action folder
                                    # TODO [future] custom folder
                                    out_result, unit_action_status = func(data, context)
                                    log.debug("out_result :{0}".format(out_result))
                                    context = {**context, **out_result}
                                    unit_run_status.actions.append(unit_action_status)
                                stage_run_status.units.append(unit_run_status)
                            else:
                                log.info("action stage unit  {0} do is None".format(stage.name))
                        pipeline_status.stages.append(stage_run_status)
                elapsed_time = time.time() - start
                pipeline_status.completeTime = elapsed_time
                pipeline_status.status = FINISHED
                log.info("pipeline_status {0} time :{1}".format(pipeline.name, elapsed_time))

            except Exception as e:
                log.exception(e)
                pipeline_status.error = traceback.format_exc()
                pipeline_status.status = ERROR
                log.error(pipeline_status)
            finally:
                if pipeline_topic.kind is not None and pipeline_topic.kind == pipeline_constants.SYSTEM:
                    log.debug("pipeline_status is {0}".format(pipeline_status))
                else:
                    print("sync pipeline monitor")
                    watchmen.monitor.services.pipeline_monitor_service.sync_pipeline_monitor_data(pipeline_status)
def _join(q: QueryBuilder, join: Join) -> QueryBuilder:
    # left
    topic = get_topic_by_id(join.topicId)
    topic_col_name = build_collection_name(topic.name)
    factor = get_factor(join.factorId, topic)
    left_table = Table(topic_col_name).as_(topic.name)

    # right
    sec_topic = get_topic_by_id(join.secondaryTopicId)
    sec_topic_col_name = build_collection_name(sec_topic.name)
    sec_factor = get_factor(join.secondaryFactorId, sec_topic)
    right_table = Table(sec_topic_col_name).as_(sec_topic.name)

    if join.type == JoinType.inner:
        return q.join(right_table, JoinType.inner).on(
            operator.eq(left_table[factor.name], right_table[sec_factor.name]))

    if join.type == JoinType.left:
        return q.join(right_table, JoinType.left).on(
            operator.eq(left_table[factor.name], right_table[sec_factor.name]))

    if join.type == JoinType.right:
        return q.join(right_table, JoinType.right).on(
            operator.eq(left_table[factor.name], right_table[sec_factor.name]))
def __add_mapping_to_pipeline_index(mapping_factor, pipeline_index, pipeline_index_dict):
    topic: Topic = get_topic_by_id(mapping_factor.source.topicId)
    if topic is not None and topic.type != RAW:
        new_pipeline_index = __get_pipeline_index_in_dict(mapping_factor.source.topicId, mapping_factor.source.factorId,
                                                          pipeline_index.pipelineId,
                                                          pipeline_index_dict)
        new_pipeline_index.tenantId = pipeline_index.tenantId
        new_pipeline_index.pipelineName = pipeline_index.pipelineName
        new_pipeline_index.stageId = pipeline_index.stageId
        new_pipeline_index.actionId = pipeline_index.actionId
        new_pipeline_index.stageName = pipeline_index.stageName
        new_pipeline_index.unitName = pipeline_index.unitName
        new_pipeline_index.unitId = pipeline_index.unitId
        new_pipeline_index.refType = MAPPING_TO
        new_pipeline_index.mappingToTopicId = pipeline_index.topicId
        new_pipeline_index.mappingToFactorId = pipeline_index.factorId
    def read_row(instance, context):
        raw_data, old_value = instance[pipeline_constants.NEW], instance[pipeline_constants.OLD]
        unit_action_status = UnitActionStatus(type=action.type)
        start = time.time()
        variable_type, context_target_name = process_variable(action.variableName)
        topic = get_topic_by_id(action.topicId)
        joint_type, where_condition = build_query_conditions(action.by, pipeline_topic, raw_data, topic, context)
        mongo_query = __build_mongo_query(joint_type, where_condition)
        target_data = query_topic_data(mongo_query, topic.name)

        if target_data is not None:
            context[context_target_name] = target_data

        elapsed_time = time.time() - start
        unit_action_status.complete_time = elapsed_time
        return context, unit_action_status, []
 def topic_handler_in_dataset(
         self) -> Tuple[Union[Field, CustomFunction], ParameterValueType]:
     param = self.param
     topic_id = param.topicId
     factor_id = param.factorId
     topic = get_topic_by_id(topic_id)
     table = None
     if self.topic_space_filter:
         if self.topic_space_filter(param.topicId):
             alias_ = self.topic_space_filter(param.topicId)["alias"]
             table = AliasedQuery(alias_)
     if table is None:
         table = build_table_by_topic_id(topic_id)
     factor = get_factor(factor_id, topic)
     result = Field(factor.name, None, table)
     value_type = factor.type
     return result, value_type
Beispiel #28
0
def run_pipeline(pipeline: Pipeline, data):
    pipeline_status = PipelineRunStatus(
        pipelineId=pipeline.pipelineId,
        uid=get_surrogate_key(),
        startTime=datetime.now().replace(tzinfo=None),
        topicId=pipeline.pipelineId)
    pipeline_status.oldValue = data[pipeline_constants.OLD]
    pipeline_status.newValue = data[pipeline_constants.NEW]

    if pipeline.enabled:
        pipeline_topic = get_topic_by_id(pipeline.topicId)
        log.info("start run pipeline {0}".format(pipeline.name))
        context = {PIPELINE_UID: pipeline_status.uid}
        start = time.time()
        if __check_condition(pipeline, pipeline_topic, data, context):
            try:
                pipeline_trigger_merge_list = []
                for stage in pipeline.stages:
                    if __check_condition(stage, pipeline_topic, data, context):
                        stage_run_status = StageRunStatus(name=stage.name)
                        log.info("stage name {0}".format(stage.name))
                        context, pipeline_trigger_merge_list = run_unit(
                            context, data, pipeline_status, pipeline_topic,
                            pipeline_trigger_merge_list, stage,
                            stage_run_status)

                elapsed_time = time.time() - start
                pipeline_status.completeTime = elapsed_time
                pipeline_status.status = FINISHED
                log.debug("pipeline_status {0} time :{1}".format(
                    pipeline.name, elapsed_time))
                if pipeline_topic.kind is None or pipeline_topic.kind != pipeline_constants.SYSTEM:
                    __trigger_all_pipeline(pipeline_trigger_merge_list)

            except Exception as e:
                log.exception(e)
                pipeline_status.error = traceback.format_exc()
                pipeline_status.status = ERROR
                log.error(pipeline_status)
            finally:
                if pipeline_topic.kind is not None and pipeline_topic.kind == pipeline_constants.SYSTEM:
                    log.debug("pipeline_status is {0}".format(pipeline_status))
                else:
                    watchmen.monitor.services.pipeline_monitor_service.sync_pipeline_monitor_data(
                        pipeline_status)
def parse_parameter(parameter: Parameter, factor=None):
    if parameter.kind == "topic":
        topic = get_topic_by_id(parameter.topicId)
        topic_col_name = build_collection_name(topic.name)
        factor = get_factor(parameter.factorId, topic)
        result = {
            'type': factor.type,
            'value': Table(topic_col_name).as_(topic.name)[factor.name]
        }
        return result
    elif parameter.kind == 'constant':
        if parameter.value.strip().startswith("{&monthDiff"):
            value_ = parameter.value.strip()
            args_str = value_.replace("{&monthDiff(", "").replace(")}", "")
            expr = _date_diff("month", args_str)
            result = {"type": "number", "value": expr}
            return result
        elif parameter.value.strip().startswith("{&dayDiff"):
            value_ = parameter.value.strip()
            args_str = value_.replace("{&dayDiff(", "").replace(")}", "")
            expr = _date_diff("day", args_str)
            result = {"type": "number", "value": expr}
            return result
        elif parameter.value.strip().startswith("{&yearDiff"):
            value_ = parameter.value.strip()
            args_str = value_.replace("{&yearDiff(", "").replace(")}", "")
            expr = _date_diff("year", args_str)
            result = {"type": "number", "value": expr}
            return result
        else:
            result = {'type': "text", 'value': parameter.value}
            return result
    elif parameter.kind == 'computed':
        result = None
        left = None
        for item in parameter.parameters:
            if left:
                right = parse_parameter(item)
                return _arithmetic_process(parameter.type, left, right)
            else:
                left = parse_parameter(item)
        return result
    def read_factor(instance, context):
        raw_data, old_value = instance[pipeline_constants.NEW], instance[pipeline_constants.OLD]
        unit_action_status = UnitActionStatus(type=action.type)
        start = time.time()

        variable_type, context_target_name = process_variable(action.variableName)
        topic = get_topic_by_id(action.topicId)
        factor = get_factor(action.factorId, topic)
        joint_type, where_condition = build_query_conditions(action.by, pipeline_topic, raw_data, topic, context)
        mongo_query = __build_mongo_query(joint_type, where_condition)
        target_data = query_topic_data(mongo_query, topic.name)

        if factor.name in target_data:
            read_value = target_data[factor.name]
            context[context_target_name] = target_data[factor.name]

            build_action_log(factor, read_value, topic, unit_action_status)
        elapsed_time = time.time() - start
        unit_action_status.complete_time = elapsed_time
        return context, unit_action_status