def get_rules(tenant_id, group): """Get the rules based on the stage, tenant_id""" db_config['tenant_id'] = tenant_id business_rules_db = DB('business_rules', **db_config) get_rules_query = "SELECT * from `sequence_rule_data` where `group` = %s" params = [group] rules = business_rules_db.execute(get_rules_query, params=params) return rules
def get_data_sources(tenant_id, case_id, column_name, master=False): """Helper to get all the required table data for the businesss rules to apply """ get_datasources_query = "SELECT * from `data_sources`" db_config['tenant_id'] = tenant_id business_rules_db = DB('business_rules', **db_config) data_sources = business_rules_db.execute(get_datasources_query) # sources sources = json.loads(list(data_sources[column_name])[0]) data = {} for database, tables in sources.items(): db = DB(database, **db_config) for table in tables: if master: query = f"SELECT * from `{table}`" try: df = db.execute(query) except: df = db.execute_(query) data[table] = df.to_dict(orient='records') else: query = f"SELECT * from `{table}` WHERE case_id = %s" params = [case_id] df = db.execute(query, params=params) if not df.empty: data[table] = df.to_dict(orient='records')[0] else: data[table] = {} case_id_based_sources = json.loads(list(data_sources['case_id_based'])[0]) return data
def get_data(tenant_id, database, table, case_id, case_id_based=True, view='records'): """give the data from database Args: Returns: result (dict) result is a dict having keys flag, data.Depending on the flag the data changes. if flag is True: data contains the key value and value is the data. if flag is False: data contains the error_msg say why its failed. Example: x = get_data('invesco.acelive.ai','extraction','ocr','INV4D15EFC')""" result = {} db = DB(database, tenant_id=tenant_id, **db_config) logging.info(f"case_id based: {case_id_based}") try: if case_id_based: query = f"SELECT * from `{table}` WHERE `case_id` = '{case_id}'" try: df = db.execute(query) except: df = db.execute_(query) table_data = df.to_dict(orient=view) result['flag'] = True result['data'] = {"value": table_data} else: query = f"SELECT * from `{table}`" df = db.execute(query) if not df.empty: table_data = df.to_dict(orient=view) else: table_data = {} result['flag'] = True result['data'] = {"value": table_data} except Exception as e: logging.error(f"Failed in getting tables data from database") logging.error(e) result['flag'] = 'False' result['data'] = { 'reason': 'Failed in getting tables data from database', 'error_msg': str(e) } return result
def __init__(self,db_name=None): if db_name is None: db_name = 'rrd.db' self.db = DB(db_name) self.last_day = [None]*24 self.last_hour = [None]*60 t = time.time() self.start_h, self.end_h = wnd_1(t) self.start_d, self.end_d = wnd_24(t)
def update_tables(case_id, tenant_id, updates): """Update the values in the database""" db_config['tenant_id'] = tenant_id extraction_db = DB( 'extraction', **db_config) # only in ocr or process_queue we are updating queue_db = DB('queues', **db_config) # only in ocr or process_queue we are updating for table, colum_values in updates.items(): if table == 'ocr': extraction_db.update(table, update=colum_values, where={'case_id': case_id}) if table == 'process_queue': queue_db.update(table, update=colum_values, where={'case_id': case_id}) return "UPDATED IN THE DATABASE SUCCESSFULLY"
def save_data(tenant_id, database, table, data, case_id, case_id_based=True, view='records'): """Util for saving the data into database Args: tenant_id (str) -> the tenant name for which we have to take the database from. ex.invesco.acelive.ai database (str) -> database name. ex.extraction table (str) -> table name. ex.ocr case_id_based (bool) -> says whether we have to bring in all the data or only the data for a case_id. case_id (str) -> case_id for which we have to bring the data from the table. data (dict) -> column_value map or a record in the database. Returns: result (dict) Example: data1 = {'ocr':{'comments':'testing','assessable_value':1000}} save_data(tenant_id='deloitte.acelive.ai', database='extraction', table='None', data=data1, case_id='DEL754C18D_test', case_id_based = True, view='records')""" logging.info(f"tenant_id got is : {tenant_id}") logging.info(f"database got is : {database}") logging.info(f"table name got is : {table}") logging.info(f"data got is : {data}") logging.info(f"case_id got is : {case_id}") result = {} if case_id_based: logging.info(f"data to save is case_id based data.") try: db_config['tenant_id'] = tenant_id if database == 'extraction': extraction_db = DB( 'extraction', ** db_config) # only in ocr or process_queue we are updating elif database == 'queues': queue_db = DB( 'queues', ** db_config) # only in ocr or process_queue we are updating for table, colum_values in data.items(): if table == 'ocr': extraction_db.update(table, update=colum_values, where={'case_id': case_id}) if table == 'process_queue': queue_db.update(table, update=colum_values, where={'case_id': case_id}) except Exception as e: logging.error(f"Cannot update the database") logging.error(e) result["flag"] = False, result['data'] = { 'reason': f'Cannot update the database', 'error_msg': str(e) } return result result['flag'] = True result['data'] = data return result else: logging.info(f"data to save is master based data.") try: db_config['tenant_id'] = tenant_id extraction_db = DB( 'extraction', **db_config) # only in ocr or process_queue we are updating queue_db = DB( 'queues', **db_config) # only in ocr or process_queue we are updating for table, colum_values in data.items(): logging.info( '************** have to develop due to where clause condition not getting from data *******' ) except Exception as e: logging.error(f"Cannot update the database") logging.error(e) result['flag'] = False result['data'] = { 'reason': f'Cannot update the database', 'error_msg': str(e) } result['flag'] = True result['data'] = data return result
from flask import Flask from flask_restful import Api from server.resources import SimilarUserList from db_utils import DB from DEFAULTS import SERVER_PORT app = Flask(__name__) api = Api(app) api.add_resource(SimilarUserList, "/similar-users/") if __name__ == "__main__": db = DB() db.init_app() app.run(port=SERVER_PORT, debug=True)
class RRD: def __init__(self,db_name=None): if db_name is None: db_name = 'rrd.db' self.db = DB(db_name) self.last_day = [None]*24 self.last_hour = [None]*60 t = time.time() self.start_h, self.end_h = wnd_1(t) self.start_d, self.end_d = wnd_24(t) def update_time(func): def inner(self,*args, **kwargs): t = time.time() self.start_h, self.end_h = wnd_1(t) self.start_d, self.end_d = wnd_24(t) return func(self,*args, **kwargs) return inner @update_time def save(self,epoch,value): """Save new data received to database. :param: epoch timestamp of data to be saved :param: value float value to be saved timestamp older than 24 hours or in future date will be discarded. """ m = self.db.fetch_from_epoch('minutes',trunc_to_min(epoch)) if m is None and ( self._last_hour(epoch) or self._last_day(epoch)) : self.db.insert('minutes',data=(trunc_to_min(epoch), value)) @update_time def query(self,*args): """Query informations stored on the database. Valid params are 'hours' or 'minutes'. Parameter 'hours' return values store in the last 24 hours, together with min value stored, max value stored and average of the values for the last 24 hours. 'minutes' parameter will return values store in the last 60 minutes, together with min value stored, max value stored and average of the values for the last 60 minutes. """ if args[0] == 'minutes': data = self.db.fetch_all('minutes',self.start_h,self.end_h) elif args[0] == 'hours': data = self.db.fetch_all('hours',self.start_d,self.end_d) self.print_time_epoch(args[0], data) data_float = [e[1] for e in data] if len(data_float) > 0: print "min:{} max:{} avg:{} ".format(min(data_float),max(data_float),sum(data_float)/len(data_float)) else: print "Not enough informations to evaluate min, max, avg. (Empty sequence)" def _last_hour(self,epoch): epoch = trunc_to_min(epoch) return int(self.end_h) <= int(epoch) <= int(self.start_h) def _last_day(self,epoch): epoch = trunc_to_min(epoch) return int(self.end_d) <= int(epoch) <= int(self.start_d) def print_time_epoch(self,table,data): print "".format('-'*5,str(table.upper()),'-'*5) for epoch,value in data: print '({}): <{}> ==> {}'.format(epoch, time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(epoch)),value)
def run_chained_rules(case_id, tenant_id, chain_rules, start_rule_id=None, updated_tables=False, trace_exec=None, rule_params=None): """Execute the chained rules""" # get the mapping of the rules...basically a rule_id maps to a rule rule_id_mapping = {} for ind, rule in chain_rules.iterrows(): rule_id_mapping[rule['rule_id']] = [ rule['rule_string'], rule['next_if_sucess'], rule['next_if_failure'], rule['stage'], rule['description'], rule['data_source'] ] logging.info(f"\n rule id mapping is \n{rule_id_mapping}\n") # evaluate the rules one by one as chained # start_rule_id = None if start_rule_id is None: if rule_id_mapping.keys(): start_rule_id = list(rule_id_mapping.keys())[0] trace_exec = [] rule_params = {} # if start_rule_id then. coming from other service # get the existing trace and rule params data db_config['tenant_id'] = tenant_id business_rules_db = DB('business_rules', **db_config) rule_data_query = "SELECT * from `rule_data` where `case_id`=%s" params = [case_id] df = business_rules_db.execute(rule_data_query, params=params) try: trace_exec = json.loads(list(df['trace_data'])[0]) logging.info(f"\nexistig trace exec is \n{trace_exec}\n") except Exception as e: logging.info(f"no existing trace data") logging.info(f"{str(e)}") trace_exec = [] try: rule_params = json.loads(list(df['rule_params'])[0]) logging.info(f"\nexistig rule_params is \n{rule_params}\n") except Exception as e: logging.info(f"no existing rule params data") logging.info(f"{str(e)}") rule_params = {} # usually master data will not get updated...for every rule master_data_tables = get_data_sources(tenant_id, case_id, 'master', master=True) logging.info(f"\nStart rule id got is {start_rule_id}\n ") while start_rule_id != "END": # get the rules, next rule id to be evaluated rule_to_evaluate, next_if_sucess, next_if_failure, stage, description, data_source = rule_id_mapping[ str(start_rule_id)] logging.info( f"\nInside the loop \n rule_to_evaluate {rule_to_evaluate}\n \ \nnext_if_sucess {next_if_sucess}\n \ \nnext_if_failure {next_if_failure}\n ") # update the data_table if there is any change case_id_data_tables = get_data_sources(tenant_id, case_id, 'case_id_based') master_updated_tables = {} if updated_tables: master_updated_tables = get_data_sources(tenant_id, case_id, 'updated_tables') # consolidate the data into data_tables data_tables = { **case_id_data_tables, **master_data_tables, **master_updated_tables } # evaluate the rule rules = [json.loads(rule_to_evaluate)] BR = BusinessRules(case_id, rules, data_tables) BR.tenant_id = tenant_id decision = BR.evaluate_rule(rules[0]) logging.info( f"\n got the decision {decision} for the rule id {start_rule_id}") logging.info(f"\n updates got are {BR.changed_fields}") updates = {} # update the updates if any if BR.changed_fields: updates = BR.changed_fields update_tables(case_id, tenant_id, updates) # update the trace_data trace_exec.append(start_rule_id) logging.info( f"\n params data used from the rules are \n {BR.params_data}\n") # update the rule_params trace_dict = { str(start_rule_id): { 'description': description if description else 'No description available in the database', 'output': "", 'input': to_DT_data(BR.params_data['input']) } } rule_params.update(trace_dict) # update the start_rule_id based on the decision if decision: start_rule_id = next_if_sucess else: start_rule_id = next_if_failure logging.info(f"\n next rule id to execute is {start_rule_id}\n") # off by one updates... trace_exec.append(start_rule_id) # store the trace_exec and rule_params in the database update_rule_params_query = f"INSERT INTO `rule_data`(`id`, `case_id`, `rule_params`) VALUES ('NULL',%s,%s) ON DUPLICATE KEY UPDATE `rule_params`=%s" params = [ case_id, json.dumps(rule_params, default=str), json.dumps(rule_params, default=str) ] business_rules_db.execute(update_rule_params_query, params=params) update_trace_exec_query = f"INSERT INTO `rule_data` (`id`, `case_id`, `trace_data`) VALUES ('NULL',%s,%s) ON DUPLICATE KEY UPDATE `trace_data`=%s" params = [case_id, json.dumps(trace_exec), json.dumps(trace_exec)] business_rules_db.execute(update_trace_exec_query, params=params) logging.info("\n Applied chained rules successfully") return 'Applied chained rules successfully'
def consume(broker_url='broker:9092'): try: route = 'run_business_rule' logging.info(f'Listening to topic: {route}') consumer = create_consumer(route) logging.debug('Consumer object created.') parts = consumer.partitions_for_topic(route) if parts is None: logging.warning(f'No partitions for topic `{route}`') logging.debug(f'Creating Topic: {route}') produce(route, {}) print(f'Listening to topic `{route}`...') while parts is None: consumer = create_consumer(route) parts = consumer.partitions_for_topic(route) logging.warning("No partition. In while loop. Make it stop") partitions = [TopicPartition(route, p) for p in parts] consumer.assign(partitions) for message in consumer: data = message.value logging.info(f'Message: {data}') try: case_id = data['case_id'] functions = data.pop('functions', None) tenant_id = data['tenant_id'] start_rule_id = data.pop('start_rule_id', None) stage = data.get('stage', None) workflow = data.get('workflow', None) file_name = data.get('file_name', None) files = data.get('files', None) source = data.get('source', None) file_path = data.get('file_path', None) original_file_name = data.get('original_file_name', None) except Exception as e: logging.warning(f'Recieved unknown data. [{data}] [{e}]') consumer.commit() continue db_config['tenant_id'] = tenant_id queue_db = DB('queues', **db_config) kafka_db = DB('kafka', **db_config) query = 'SELECT * FROM `button_functions` WHERE `route`=%s' function_info = queue_db.execute(query, params=[route]) in_progress_message = list(function_info['in_progress_message'])[0] failure_message = list(function_info['failure_message'])[0] success_message = list(function_info['success_message'])[0] message_flow = kafka_db.get_all('grouped_message_flow') # Get which button (group in kafka table) this function was called from group = data.pop('group', None) if group: # Get message group functions group_messages = message_flow.loc[message_flow['message_group'] == group] # If its the first function the update the progress count first_flow = group_messages.head(1) first_topic = first_flow.loc[first_flow['listen_to_topic'] == route] query = 'UPDATE `process_queue` SET `status`=%s, `total_processes`=%s, `case_lock`=1 WHERE `case_id`=%s' if not first_topic.empty: logging.debug( f'`{route}` is the first topic in the group `{group}`.' ) logging.debug( f'Number of topics in group `{group}` is {len(group_messages)}' ) if list(first_flow['send_to_topic'])[0] is None: queue_db.execute(query, params=[ in_progress_message, len(group_messages), case_id ]) else: queue_db.execute(query, params=[ in_progress_message, len(group_messages) + 1, case_id ]) # Getting the correct data for the functions. This data will be passed through # rest of the chained functions. function_params = {} if functions: for function in functions: if function['route'] == route: function_params = function['parameters'] break else: function_params['stage'] = [stage] # Call the function try: logging.debug(f'Calling function `run_business_rule`') result = apply_business_rule(case_id, function_params, tenant_id, start_rule_id) logging.debug(f'Ran business rules with stage {stage}') logging.debug(f"result flag is {result.get('flag', '')}") except: # Unlock the case. logging.exception( f'Something went wrong while saving changes. Check trace.') query = 'UPDATE `process_queue` SET `status`=%s, `case_lock`=0, `failure_status`=1 WHERE `case_id`=%s' queue_db.execute(query, params=[failure_message, case_id]) consumer.commit() continue # Check if function was succesfully executed if group: if result['flag']: # If there is only function for the group, unlock case. if not first_topic.empty: if list(first_flow['send_to_topic'])[0] is None: # It is the last message. So update file status to completed. query = 'UPDATE `process_queue` SET `status`=%s, `case_lock`=0, `completed_processes`=`completed_processes`+1 WHERE `case_id`=%s' queue_db.execute(query, params=[success_message, case_id]) consumer.commit() continue last_topic = group_messages.tail(1).loc[ group_messages['send_to_topic'] == route] # If it is not the last message, then produce to next function else just unlock case. if last_topic.empty: # Get next function name next_topic = list(group_messages.loc[ group_messages['listen_to_topic'] == route] ['send_to_topic'])[0] if next_topic is not None: logging.debug('Not the last topic of the group.') produce(next_topic, data) # Update the progress count by 1 query = 'UPDATE `process_queue` SET `status`=%s, `completed_processes`=`completed_processes`+1 WHERE `case_id`=%s' queue_db.execute(query, params=[success_message, case_id]) consumer.commit() else: # It is the last message. So update file status to completed. logging.debug('Last topic of the group.') query = 'UPDATE `process_queue` SET `status`=%s, `case_lock`=0, `completed_processes`=`completed_processes`+1 WHERE `case_id`=%s' queue_db.execute(query, params=[success_message, case_id]) consumer.commit() else: # Unlock the case. logging.debug( 'Flag false. Unlocking case with failure status 1.') query = 'UPDATE `process_queue` SET `status`=%s, `case_lock`=0, `failure_status`=1 WHERE `case_id`=%s' queue_db.execute(query, params=[failure_message, case_id]) consumer.commit() else: data = result['send_data'] if 'send_data' in result else {} data['case_id'] = case_id data['tenant_id'] = tenant_id data['workflow'] = workflow data['type'] = 'file_ingestion' data['file_name'] = file_name data['files'] = files data['source'] = source data['file_path'] = file_path data['original_file_name'] = original_file_name query = 'SELECT * FROM `message_flow` WHERE `listen_to_topic`=%s AND `workflow`=%s' logging.debug(f'topic - {route} , workflow - {workflow}') message_flow = kafka_db.execute(query, params=[route, workflow]) if message_flow.empty: logging.error( '`kafka table` is not configured correctly in message flow table.' ) send_to_topic = None else: send_to_topic = list(message_flow.send_to_topic)[0] if send_to_topic is not None: logging.info(f'Producing to topic {send_to_topic}') produce(send_to_topic, data) else: logging.info(f'There is no topic to send.') if 'send_to_topic' in result: send_to_topic_bypassed = result['send_to_topic'] produce(send_to_topic_bypassed, {}) else: logging.error( 'Message not consumed. Some error must have occured. Will try again!' ) consumer.commit() except: logging.exception('Something went wrong in consumer. Check trace.')