def get_data_sources(tenant_id, case_id, column_name, master=False): """Helper to get all the required table data for the businesss rules to apply """ get_datasources_query = "SELECT * from `data_sources`" db_config['tenant_id'] = tenant_id business_rules_db = DB('business_rules', **db_config) data_sources = business_rules_db.execute(get_datasources_query) # sources sources = json.loads(list(data_sources[column_name])[0]) data = {} for database, tables in sources.items(): db = DB(database, **db_config) for table in tables: if master: query = f"SELECT * from `{table}`" try: df = db.execute(query) except: df = db.execute_(query) data[table] = df.to_dict(orient='records') else: query = f"SELECT * from `{table}` WHERE case_id = %s" params = [case_id] df = db.execute(query, params=params) if not df.empty: data[table] = df.to_dict(orient='records')[0] else: data[table] = {} case_id_based_sources = json.loads(list(data_sources['case_id_based'])[0]) return data
def get_data(tenant_id, database, table, case_id, case_id_based=True, view='records'): """give the data from database Args: Returns: result (dict) result is a dict having keys flag, data.Depending on the flag the data changes. if flag is True: data contains the key value and value is the data. if flag is False: data contains the error_msg say why its failed. Example: x = get_data('invesco.acelive.ai','extraction','ocr','INV4D15EFC')""" result = {} db = DB(database, tenant_id=tenant_id, **db_config) logging.info(f"case_id based: {case_id_based}") try: if case_id_based: query = f"SELECT * from `{table}` WHERE `case_id` = '{case_id}'" try: df = db.execute(query) except: df = db.execute_(query) table_data = df.to_dict(orient=view) result['flag'] = True result['data'] = {"value": table_data} else: query = f"SELECT * from `{table}`" df = db.execute(query) if not df.empty: table_data = df.to_dict(orient=view) else: table_data = {} result['flag'] = True result['data'] = {"value": table_data} except Exception as e: logging.error(f"Failed in getting tables data from database") logging.error(e) result['flag'] = 'False' result['data'] = { 'reason': 'Failed in getting tables data from database', 'error_msg': str(e) } return result
def get_rules(tenant_id, group): """Get the rules based on the stage, tenant_id""" db_config['tenant_id'] = tenant_id business_rules_db = DB('business_rules', **db_config) get_rules_query = "SELECT * from `sequence_rule_data` where `group` = %s" params = [group] rules = business_rules_db.execute(get_rules_query, params=params) return rules
def run_chained_rules(case_id, tenant_id, chain_rules, start_rule_id=None, updated_tables=False, trace_exec=None, rule_params=None): """Execute the chained rules""" # get the mapping of the rules...basically a rule_id maps to a rule rule_id_mapping = {} for ind, rule in chain_rules.iterrows(): rule_id_mapping[rule['rule_id']] = [ rule['rule_string'], rule['next_if_sucess'], rule['next_if_failure'], rule['stage'], rule['description'], rule['data_source'] ] logging.info(f"\n rule id mapping is \n{rule_id_mapping}\n") # evaluate the rules one by one as chained # start_rule_id = None if start_rule_id is None: if rule_id_mapping.keys(): start_rule_id = list(rule_id_mapping.keys())[0] trace_exec = [] rule_params = {} # if start_rule_id then. coming from other service # get the existing trace and rule params data db_config['tenant_id'] = tenant_id business_rules_db = DB('business_rules', **db_config) rule_data_query = "SELECT * from `rule_data` where `case_id`=%s" params = [case_id] df = business_rules_db.execute(rule_data_query, params=params) try: trace_exec = json.loads(list(df['trace_data'])[0]) logging.info(f"\nexistig trace exec is \n{trace_exec}\n") except Exception as e: logging.info(f"no existing trace data") logging.info(f"{str(e)}") trace_exec = [] try: rule_params = json.loads(list(df['rule_params'])[0]) logging.info(f"\nexistig rule_params is \n{rule_params}\n") except Exception as e: logging.info(f"no existing rule params data") logging.info(f"{str(e)}") rule_params = {} # usually master data will not get updated...for every rule master_data_tables = get_data_sources(tenant_id, case_id, 'master', master=True) logging.info(f"\nStart rule id got is {start_rule_id}\n ") while start_rule_id != "END": # get the rules, next rule id to be evaluated rule_to_evaluate, next_if_sucess, next_if_failure, stage, description, data_source = rule_id_mapping[ str(start_rule_id)] logging.info( f"\nInside the loop \n rule_to_evaluate {rule_to_evaluate}\n \ \nnext_if_sucess {next_if_sucess}\n \ \nnext_if_failure {next_if_failure}\n ") # update the data_table if there is any change case_id_data_tables = get_data_sources(tenant_id, case_id, 'case_id_based') master_updated_tables = {} if updated_tables: master_updated_tables = get_data_sources(tenant_id, case_id, 'updated_tables') # consolidate the data into data_tables data_tables = { **case_id_data_tables, **master_data_tables, **master_updated_tables } # evaluate the rule rules = [json.loads(rule_to_evaluate)] BR = BusinessRules(case_id, rules, data_tables) BR.tenant_id = tenant_id decision = BR.evaluate_rule(rules[0]) logging.info( f"\n got the decision {decision} for the rule id {start_rule_id}") logging.info(f"\n updates got are {BR.changed_fields}") updates = {} # update the updates if any if BR.changed_fields: updates = BR.changed_fields update_tables(case_id, tenant_id, updates) # update the trace_data trace_exec.append(start_rule_id) logging.info( f"\n params data used from the rules are \n {BR.params_data}\n") # update the rule_params trace_dict = { str(start_rule_id): { 'description': description if description else 'No description available in the database', 'output': "", 'input': to_DT_data(BR.params_data['input']) } } rule_params.update(trace_dict) # update the start_rule_id based on the decision if decision: start_rule_id = next_if_sucess else: start_rule_id = next_if_failure logging.info(f"\n next rule id to execute is {start_rule_id}\n") # off by one updates... trace_exec.append(start_rule_id) # store the trace_exec and rule_params in the database update_rule_params_query = f"INSERT INTO `rule_data`(`id`, `case_id`, `rule_params`) VALUES ('NULL',%s,%s) ON DUPLICATE KEY UPDATE `rule_params`=%s" params = [ case_id, json.dumps(rule_params, default=str), json.dumps(rule_params, default=str) ] business_rules_db.execute(update_rule_params_query, params=params) update_trace_exec_query = f"INSERT INTO `rule_data` (`id`, `case_id`, `trace_data`) VALUES ('NULL',%s,%s) ON DUPLICATE KEY UPDATE `trace_data`=%s" params = [case_id, json.dumps(trace_exec), json.dumps(trace_exec)] business_rules_db.execute(update_trace_exec_query, params=params) logging.info("\n Applied chained rules successfully") return 'Applied chained rules successfully'
def consume(broker_url='broker:9092'): try: route = 'run_business_rule' logging.info(f'Listening to topic: {route}') consumer = create_consumer(route) logging.debug('Consumer object created.') parts = consumer.partitions_for_topic(route) if parts is None: logging.warning(f'No partitions for topic `{route}`') logging.debug(f'Creating Topic: {route}') produce(route, {}) print(f'Listening to topic `{route}`...') while parts is None: consumer = create_consumer(route) parts = consumer.partitions_for_topic(route) logging.warning("No partition. In while loop. Make it stop") partitions = [TopicPartition(route, p) for p in parts] consumer.assign(partitions) for message in consumer: data = message.value logging.info(f'Message: {data}') try: case_id = data['case_id'] functions = data.pop('functions', None) tenant_id = data['tenant_id'] start_rule_id = data.pop('start_rule_id', None) stage = data.get('stage', None) workflow = data.get('workflow', None) file_name = data.get('file_name', None) files = data.get('files', None) source = data.get('source', None) file_path = data.get('file_path', None) original_file_name = data.get('original_file_name', None) except Exception as e: logging.warning(f'Recieved unknown data. [{data}] [{e}]') consumer.commit() continue db_config['tenant_id'] = tenant_id queue_db = DB('queues', **db_config) kafka_db = DB('kafka', **db_config) query = 'SELECT * FROM `button_functions` WHERE `route`=%s' function_info = queue_db.execute(query, params=[route]) in_progress_message = list(function_info['in_progress_message'])[0] failure_message = list(function_info['failure_message'])[0] success_message = list(function_info['success_message'])[0] message_flow = kafka_db.get_all('grouped_message_flow') # Get which button (group in kafka table) this function was called from group = data.pop('group', None) if group: # Get message group functions group_messages = message_flow.loc[message_flow['message_group'] == group] # If its the first function the update the progress count first_flow = group_messages.head(1) first_topic = first_flow.loc[first_flow['listen_to_topic'] == route] query = 'UPDATE `process_queue` SET `status`=%s, `total_processes`=%s, `case_lock`=1 WHERE `case_id`=%s' if not first_topic.empty: logging.debug( f'`{route}` is the first topic in the group `{group}`.' ) logging.debug( f'Number of topics in group `{group}` is {len(group_messages)}' ) if list(first_flow['send_to_topic'])[0] is None: queue_db.execute(query, params=[ in_progress_message, len(group_messages), case_id ]) else: queue_db.execute(query, params=[ in_progress_message, len(group_messages) + 1, case_id ]) # Getting the correct data for the functions. This data will be passed through # rest of the chained functions. function_params = {} if functions: for function in functions: if function['route'] == route: function_params = function['parameters'] break else: function_params['stage'] = [stage] # Call the function try: logging.debug(f'Calling function `run_business_rule`') result = apply_business_rule(case_id, function_params, tenant_id, start_rule_id) logging.debug(f'Ran business rules with stage {stage}') logging.debug(f"result flag is {result.get('flag', '')}") except: # Unlock the case. logging.exception( f'Something went wrong while saving changes. Check trace.') query = 'UPDATE `process_queue` SET `status`=%s, `case_lock`=0, `failure_status`=1 WHERE `case_id`=%s' queue_db.execute(query, params=[failure_message, case_id]) consumer.commit() continue # Check if function was succesfully executed if group: if result['flag']: # If there is only function for the group, unlock case. if not first_topic.empty: if list(first_flow['send_to_topic'])[0] is None: # It is the last message. So update file status to completed. query = 'UPDATE `process_queue` SET `status`=%s, `case_lock`=0, `completed_processes`=`completed_processes`+1 WHERE `case_id`=%s' queue_db.execute(query, params=[success_message, case_id]) consumer.commit() continue last_topic = group_messages.tail(1).loc[ group_messages['send_to_topic'] == route] # If it is not the last message, then produce to next function else just unlock case. if last_topic.empty: # Get next function name next_topic = list(group_messages.loc[ group_messages['listen_to_topic'] == route] ['send_to_topic'])[0] if next_topic is not None: logging.debug('Not the last topic of the group.') produce(next_topic, data) # Update the progress count by 1 query = 'UPDATE `process_queue` SET `status`=%s, `completed_processes`=`completed_processes`+1 WHERE `case_id`=%s' queue_db.execute(query, params=[success_message, case_id]) consumer.commit() else: # It is the last message. So update file status to completed. logging.debug('Last topic of the group.') query = 'UPDATE `process_queue` SET `status`=%s, `case_lock`=0, `completed_processes`=`completed_processes`+1 WHERE `case_id`=%s' queue_db.execute(query, params=[success_message, case_id]) consumer.commit() else: # Unlock the case. logging.debug( 'Flag false. Unlocking case with failure status 1.') query = 'UPDATE `process_queue` SET `status`=%s, `case_lock`=0, `failure_status`=1 WHERE `case_id`=%s' queue_db.execute(query, params=[failure_message, case_id]) consumer.commit() else: data = result['send_data'] if 'send_data' in result else {} data['case_id'] = case_id data['tenant_id'] = tenant_id data['workflow'] = workflow data['type'] = 'file_ingestion' data['file_name'] = file_name data['files'] = files data['source'] = source data['file_path'] = file_path data['original_file_name'] = original_file_name query = 'SELECT * FROM `message_flow` WHERE `listen_to_topic`=%s AND `workflow`=%s' logging.debug(f'topic - {route} , workflow - {workflow}') message_flow = kafka_db.execute(query, params=[route, workflow]) if message_flow.empty: logging.error( '`kafka table` is not configured correctly in message flow table.' ) send_to_topic = None else: send_to_topic = list(message_flow.send_to_topic)[0] if send_to_topic is not None: logging.info(f'Producing to topic {send_to_topic}') produce(send_to_topic, data) else: logging.info(f'There is no topic to send.') if 'send_to_topic' in result: send_to_topic_bypassed = result['send_to_topic'] produce(send_to_topic_bypassed, {}) else: logging.error( 'Message not consumed. Some error must have occured. Will try again!' ) consumer.commit() except: logging.exception('Something went wrong in consumer. Check trace.')