Example #1
0
def get_rules(tenant_id, group):
    """Get the rules based on the stage, tenant_id"""
    db_config['tenant_id'] = tenant_id
    business_rules_db = DB('business_rules', **db_config)
    get_rules_query = "SELECT * from `sequence_rule_data` where `group` = %s"
    params = [group]
    rules = business_rules_db.execute(get_rules_query, params=params)
    return rules
Example #2
0
def get_data_sources(tenant_id, case_id, column_name, master=False):
    """Helper to get all the required table data for the businesss rules to apply
    """
    get_datasources_query = "SELECT * from `data_sources`"
    db_config['tenant_id'] = tenant_id
    business_rules_db = DB('business_rules', **db_config)
    data_sources = business_rules_db.execute(get_datasources_query)

    # sources
    sources = json.loads(list(data_sources[column_name])[0])

    data = {}
    for database, tables in sources.items():
        db = DB(database, **db_config)
        for table in tables:
            if master:
                query = f"SELECT * from `{table}`"
                try:
                    df = db.execute(query)
                except:
                    df = db.execute_(query)

                data[table] = df.to_dict(orient='records')
            else:
                query = f"SELECT * from `{table}` WHERE case_id = %s"
                params = [case_id]
                df = db.execute(query, params=params)
                if not df.empty:
                    data[table] = df.to_dict(orient='records')[0]
                else:
                    data[table] = {}

    case_id_based_sources = json.loads(list(data_sources['case_id_based'])[0])

    return data
def get_data(tenant_id,
             database,
             table,
             case_id,
             case_id_based=True,
             view='records'):
    """give the data from database
    Args:
        
    Returns:
        result (dict)
        result is a dict having keys flag, data.Depending on the flag the data changes.
        if flag is True: data contains the key value and value is the data.
        if flag is False: data contains the error_msg say why its failed.
    Example:
            x = get_data('invesco.acelive.ai','extraction','ocr','INV4D15EFC')"""
    result = {}
    db = DB(database, tenant_id=tenant_id, **db_config)
    logging.info(f"case_id based: {case_id_based}")
    try:
        if case_id_based:
            query = f"SELECT * from `{table}` WHERE `case_id` = '{case_id}'"
            try:
                df = db.execute(query)
            except:
                df = db.execute_(query)
            table_data = df.to_dict(orient=view)
            result['flag'] = True
            result['data'] = {"value": table_data}

        else:
            query = f"SELECT * from `{table}`"
            df = db.execute(query)
            if not df.empty:
                table_data = df.to_dict(orient=view)
            else:
                table_data = {}
            result['flag'] = True
            result['data'] = {"value": table_data}
    except Exception as e:
        logging.error(f"Failed in getting tables data from database")
        logging.error(e)
        result['flag'] = 'False'
        result['data'] = {
            'reason': 'Failed in getting tables data from database',
            'error_msg': str(e)
        }
    return result
Example #4
0
File: rrd.py Project: marcoazza/rrd
  def __init__(self,db_name=None):
    if db_name is None:
      db_name = 'rrd.db'

    self.db = DB(db_name)
    self.last_day = [None]*24
    self.last_hour = [None]*60

    t = time.time()
    self.start_h, self.end_h = wnd_1(t)
    self.start_d, self.end_d = wnd_24(t)
Example #5
0
def update_tables(case_id, tenant_id, updates):
    """Update the values in the database"""
    db_config['tenant_id'] = tenant_id
    extraction_db = DB(
        'extraction',
        **db_config)  # only in ocr or process_queue we are updating
    queue_db = DB('queues',
                  **db_config)  # only in ocr or process_queue we are updating

    for table, colum_values in updates.items():
        if table == 'ocr':
            extraction_db.update(table,
                                 update=colum_values,
                                 where={'case_id': case_id})
        if table == 'process_queue':
            queue_db.update(table,
                            update=colum_values,
                            where={'case_id': case_id})
    return "UPDATED IN THE DATABASE SUCCESSFULLY"
def save_data(tenant_id,
              database,
              table,
              data,
              case_id,
              case_id_based=True,
              view='records'):
    """Util for saving the data into database
    
    Args:
        tenant_id (str) -> the tenant name for which we have to take the database from. ex.invesco.acelive.ai
        database (str) -> database name. ex.extraction
        table (str) -> table name. ex.ocr
        case_id_based (bool) -> says whether we have to bring in all the data or only the data for a case_id.
        case_id (str) -> case_id for which we have to bring the data from the table.
        data (dict) -> column_value map or a record in the database.
    Returns:
        result (dict)
    Example:
        data1 = {'ocr':{'comments':'testing','assessable_value':1000}}
        save_data(tenant_id='deloitte.acelive.ai', database='extraction', table='None', data=data1, case_id='DEL754C18D_test', case_id_based = True, view='records')"""
    logging.info(f"tenant_id got is : {tenant_id}")
    logging.info(f"database got is : {database}")
    logging.info(f"table name got is : {table}")
    logging.info(f"data got is : {data}")
    logging.info(f"case_id got is : {case_id}")
    result = {}

    if case_id_based:
        logging.info(f"data to save is case_id based data.")
        try:

            db_config['tenant_id'] = tenant_id
            if database == 'extraction':
                extraction_db = DB(
                    'extraction', **
                    db_config)  # only in ocr or process_queue we are updating
            elif database == 'queues':
                queue_db = DB(
                    'queues', **
                    db_config)  # only in ocr or process_queue we are updating

            for table, colum_values in data.items():
                if table == 'ocr':
                    extraction_db.update(table,
                                         update=colum_values,
                                         where={'case_id': case_id})
                if table == 'process_queue':
                    queue_db.update(table,
                                    update=colum_values,
                                    where={'case_id': case_id})
        except Exception as e:
            logging.error(f"Cannot update the database")
            logging.error(e)
            result["flag"] = False,
            result['data'] = {
                'reason': f'Cannot update the database',
                'error_msg': str(e)
            }
            return result
        result['flag'] = True
        result['data'] = data
        return result

    else:
        logging.info(f"data to save is master based data.")
        try:
            db_config['tenant_id'] = tenant_id
            extraction_db = DB(
                'extraction',
                **db_config)  # only in ocr or process_queue we are updating
            queue_db = DB(
                'queues',
                **db_config)  # only in ocr or process_queue we are updating

            for table, colum_values in data.items():
                logging.info(
                    '************** have to develop due to where clause condition not getting from data *******'
                )
        except Exception as e:
            logging.error(f"Cannot update the database")
            logging.error(e)
            result['flag'] = False
            result['data'] = {
                'reason': f'Cannot update the database',
                'error_msg': str(e)
            }

        result['flag'] = True
        result['data'] = data
        return result
Example #7
0
from flask import Flask
from flask_restful import Api
from server.resources import SimilarUserList
from db_utils import DB
from DEFAULTS import SERVER_PORT

app = Flask(__name__)
api = Api(app)
api.add_resource(SimilarUserList, "/similar-users/")

if __name__ == "__main__":
    db = DB()
    db.init_app()
    app.run(port=SERVER_PORT, debug=True)
Example #8
0
File: rrd.py Project: marcoazza/rrd
class RRD:
  def __init__(self,db_name=None):
    if db_name is None:
      db_name = 'rrd.db'

    self.db = DB(db_name)
    self.last_day = [None]*24
    self.last_hour = [None]*60

    t = time.time()
    self.start_h, self.end_h = wnd_1(t)
    self.start_d, self.end_d = wnd_24(t)

  def update_time(func):
    def inner(self,*args, **kwargs):
      t = time.time()
      self.start_h, self.end_h = wnd_1(t)
      self.start_d, self.end_d = wnd_24(t)
      return func(self,*args, **kwargs)
    return inner

  @update_time
  def save(self,epoch,value):
    """Save new data received to database.

    :param: epoch   timestamp of data to be saved
    :param: value   float value to be saved

    timestamp older than 24 hours or in future date
    will be discarded.
    """
    m = self.db.fetch_from_epoch('minutes',trunc_to_min(epoch))
    if m is None and ( self._last_hour(epoch) or self._last_day(epoch)) :
      self.db.insert('minutes',data=(trunc_to_min(epoch), value))


  @update_time
  def query(self,*args):
    """Query informations stored on the database.

    Valid params are 'hours' or 'minutes'.
    Parameter 'hours' return values store in the
    last 24 hours, together with min value stored,
    max value stored and average of the values for the
    last 24 hours.
    'minutes' parameter will return values store in the
    last 60 minutes, together with min value stored,
    max value stored and average of the values for the
    last 60 minutes.
    """

    if args[0] == 'minutes':
      data = self.db.fetch_all('minutes',self.start_h,self.end_h)
    elif args[0] == 'hours':
      data = self.db.fetch_all('hours',self.start_d,self.end_d)

    self.print_time_epoch(args[0], data)

    data_float = [e[1] for e in data]
    if len(data_float) > 0:
      print "min:{}  max:{}  avg:{} ".format(min(data_float),max(data_float),sum(data_float)/len(data_float))
    else:
      print "Not enough informations to evaluate min, max, avg. (Empty sequence)"

  def _last_hour(self,epoch):
    epoch = trunc_to_min(epoch)
    return int(self.end_h) <= int(epoch) <= int(self.start_h)


  def _last_day(self,epoch):
    epoch = trunc_to_min(epoch)
    return int(self.end_d) <= int(epoch) <= int(self.start_d)


  def print_time_epoch(self,table,data):
    print "".format('-'*5,str(table.upper()),'-'*5)
    for epoch,value in data:
      print '({}): <{}>  ==> {}'.format(epoch, time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(epoch)),value)
Example #9
0
def run_chained_rules(case_id,
                      tenant_id,
                      chain_rules,
                      start_rule_id=None,
                      updated_tables=False,
                      trace_exec=None,
                      rule_params=None):
    """Execute the chained rules"""

    # get the mapping of the rules...basically a rule_id maps to a rule
    rule_id_mapping = {}
    for ind, rule in chain_rules.iterrows():
        rule_id_mapping[rule['rule_id']] = [
            rule['rule_string'], rule['next_if_sucess'],
            rule['next_if_failure'], rule['stage'], rule['description'],
            rule['data_source']
        ]
    logging.info(f"\n rule id mapping is \n{rule_id_mapping}\n")

    # evaluate the rules one by one as chained
    # start_rule_id = None
    if start_rule_id is None:
        if rule_id_mapping.keys():
            start_rule_id = list(rule_id_mapping.keys())[0]
            trace_exec = []
            rule_params = {}

    # if start_rule_id then. coming from other service
    # get the existing trace and rule params data
    db_config['tenant_id'] = tenant_id
    business_rules_db = DB('business_rules', **db_config)
    rule_data_query = "SELECT * from `rule_data` where `case_id`=%s"
    params = [case_id]
    df = business_rules_db.execute(rule_data_query, params=params)
    try:
        trace_exec = json.loads(list(df['trace_data'])[0])
        logging.info(f"\nexistig trace exec is \n{trace_exec}\n")
    except Exception as e:
        logging.info(f"no existing trace data")
        logging.info(f"{str(e)}")
        trace_exec = []

    try:
        rule_params = json.loads(list(df['rule_params'])[0])
        logging.info(f"\nexistig rule_params is \n{rule_params}\n")
    except Exception as e:
        logging.info(f"no existing rule params data")
        logging.info(f"{str(e)}")
        rule_params = {}

    # usually master data will not get updated...for every rule
    master_data_tables = get_data_sources(tenant_id,
                                          case_id,
                                          'master',
                                          master=True)

    logging.info(f"\nStart rule id got is {start_rule_id}\n ")
    while start_rule_id != "END":
        # get the rules, next rule id to be evaluated
        rule_to_evaluate, next_if_sucess, next_if_failure, stage, description, data_source = rule_id_mapping[
            str(start_rule_id)]

        logging.info(
            f"\nInside the loop \n rule_to_evaluate  {rule_to_evaluate}\n \
                      \nnext_if_sucess {next_if_sucess}\n \
                      \nnext_if_failure {next_if_failure}\n ")

        # update the data_table if there is any change
        case_id_data_tables = get_data_sources(tenant_id, case_id,
                                               'case_id_based')
        master_updated_tables = {}
        if updated_tables:
            master_updated_tables = get_data_sources(tenant_id, case_id,
                                                     'updated_tables')
        # consolidate the data into data_tables
        data_tables = {
            **case_id_data_tables,
            **master_data_tables,
            **master_updated_tables
        }

        # evaluate the rule
        rules = [json.loads(rule_to_evaluate)]
        BR = BusinessRules(case_id, rules, data_tables)
        BR.tenant_id = tenant_id
        decision = BR.evaluate_rule(rules[0])

        logging.info(
            f"\n got the decision {decision} for the rule id {start_rule_id}")
        logging.info(f"\n updates got are {BR.changed_fields}")

        updates = {}
        # update the updates if any
        if BR.changed_fields:
            updates = BR.changed_fields
            update_tables(case_id, tenant_id, updates)

        # update the trace_data
        trace_exec.append(start_rule_id)

        logging.info(
            f"\n params data used from the rules are \n {BR.params_data}\n")
        # update the rule_params
        trace_dict = {
            str(start_rule_id): {
                'description': description
                if description else 'No description available in the database',
                'output': "",
                'input': to_DT_data(BR.params_data['input'])
            }
        }
        rule_params.update(trace_dict)
        # update the start_rule_id based on the decision
        if decision:
            start_rule_id = next_if_sucess
        else:
            start_rule_id = next_if_failure
        logging.info(f"\n next rule id to execute is {start_rule_id}\n")

    # off by one updates...
    trace_exec.append(start_rule_id)

    # store the trace_exec and rule_params in the database
    update_rule_params_query = f"INSERT INTO `rule_data`(`id`, `case_id`, `rule_params`) VALUES ('NULL',%s,%s) ON DUPLICATE KEY UPDATE `rule_params`=%s"
    params = [
        case_id,
        json.dumps(rule_params, default=str),
        json.dumps(rule_params, default=str)
    ]
    business_rules_db.execute(update_rule_params_query, params=params)

    update_trace_exec_query = f"INSERT INTO `rule_data` (`id`, `case_id`, `trace_data`) VALUES ('NULL',%s,%s) ON DUPLICATE KEY UPDATE `trace_data`=%s"
    params = [case_id, json.dumps(trace_exec), json.dumps(trace_exec)]
    business_rules_db.execute(update_trace_exec_query, params=params)

    logging.info("\n Applied chained rules successfully")
    return 'Applied chained rules successfully'
def consume(broker_url='broker:9092'):
    try:
        route = 'run_business_rule'
        logging.info(f'Listening to topic: {route}')

        consumer = create_consumer(route)
        logging.debug('Consumer object created.')

        parts = consumer.partitions_for_topic(route)
        if parts is None:
            logging.warning(f'No partitions for topic `{route}`')
            logging.debug(f'Creating Topic: {route}')
            produce(route, {})
            print(f'Listening to topic `{route}`...')
            while parts is None:
                consumer = create_consumer(route)
                parts = consumer.partitions_for_topic(route)
                logging.warning("No partition. In while loop. Make it stop")

        partitions = [TopicPartition(route, p) for p in parts]
        consumer.assign(partitions)

        for message in consumer:
            data = message.value
            logging.info(f'Message: {data}')

            try:
                case_id = data['case_id']
                functions = data.pop('functions', None)
                tenant_id = data['tenant_id']
                start_rule_id = data.pop('start_rule_id', None)
                stage = data.get('stage', None)
                workflow = data.get('workflow', None)
                file_name = data.get('file_name', None)
                files = data.get('files', None)
                source = data.get('source', None)
                file_path = data.get('file_path', None)
                original_file_name = data.get('original_file_name', None)
            except Exception as e:
                logging.warning(f'Recieved unknown data. [{data}] [{e}]')
                consumer.commit()
                continue

            db_config['tenant_id'] = tenant_id
            queue_db = DB('queues', **db_config)
            kafka_db = DB('kafka', **db_config)

            query = 'SELECT * FROM `button_functions` WHERE `route`=%s'
            function_info = queue_db.execute(query, params=[route])
            in_progress_message = list(function_info['in_progress_message'])[0]
            failure_message = list(function_info['failure_message'])[0]
            success_message = list(function_info['success_message'])[0]

            message_flow = kafka_db.get_all('grouped_message_flow')

            # Get which button (group in kafka table) this function was called from
            group = data.pop('group', None)
            if group:
                # Get message group functions
                group_messages = message_flow.loc[message_flow['message_group']
                                                  == group]

                # If its the first function the update the progress count
                first_flow = group_messages.head(1)
                first_topic = first_flow.loc[first_flow['listen_to_topic'] ==
                                             route]

                query = 'UPDATE `process_queue` SET `status`=%s, `total_processes`=%s, `case_lock`=1 WHERE `case_id`=%s'

                if not first_topic.empty:
                    logging.debug(
                        f'`{route}` is the first topic in the group `{group}`.'
                    )
                    logging.debug(
                        f'Number of topics in group `{group}` is {len(group_messages)}'
                    )
                    if list(first_flow['send_to_topic'])[0] is None:
                        queue_db.execute(query,
                                         params=[
                                             in_progress_message,
                                             len(group_messages), case_id
                                         ])
                    else:
                        queue_db.execute(query,
                                         params=[
                                             in_progress_message,
                                             len(group_messages) + 1, case_id
                                         ])

            # Getting the correct data for the functions. This data will be passed through
            # rest of the chained functions.
            function_params = {}
            if functions:
                for function in functions:
                    if function['route'] == route:
                        function_params = function['parameters']
                        break
            else:
                function_params['stage'] = [stage]

            # Call the function
            try:
                logging.debug(f'Calling function `run_business_rule`')
                result = apply_business_rule(case_id, function_params,
                                             tenant_id, start_rule_id)
                logging.debug(f'Ran business rules with stage {stage}')
                logging.debug(f"result flag is {result.get('flag', '')}")
            except:
                # Unlock the case.
                logging.exception(
                    f'Something went wrong while saving changes. Check trace.')
                query = 'UPDATE `process_queue` SET `status`=%s, `case_lock`=0, `failure_status`=1 WHERE `case_id`=%s'
                queue_db.execute(query, params=[failure_message, case_id])
                consumer.commit()
                continue

            # Check if function was succesfully executed
            if group:
                if result['flag']:
                    # If there is only function for the group, unlock case.
                    if not first_topic.empty:
                        if list(first_flow['send_to_topic'])[0] is None:
                            # It is the last message. So update file status to completed.
                            query = 'UPDATE `process_queue` SET `status`=%s, `case_lock`=0, `completed_processes`=`completed_processes`+1 WHERE `case_id`=%s'
                            queue_db.execute(query,
                                             params=[success_message, case_id])
                            consumer.commit()
                            continue

                    last_topic = group_messages.tail(1).loc[
                        group_messages['send_to_topic'] == route]

                    # If it is not the last message, then produce to next function else just unlock case.
                    if last_topic.empty:
                        # Get next function name
                        next_topic = list(group_messages.loc[
                            group_messages['listen_to_topic'] == route]
                                          ['send_to_topic'])[0]

                        if next_topic is not None:
                            logging.debug('Not the last topic of the group.')
                            produce(next_topic, data)

                        # Update the progress count by 1
                        query = 'UPDATE `process_queue` SET `status`=%s, `completed_processes`=`completed_processes`+1 WHERE `case_id`=%s'
                        queue_db.execute(query,
                                         params=[success_message, case_id])
                        consumer.commit()
                    else:
                        # It is the last message. So update file status to completed.
                        logging.debug('Last topic of the group.')
                        query = 'UPDATE `process_queue` SET `status`=%s, `case_lock`=0, `completed_processes`=`completed_processes`+1 WHERE `case_id`=%s'
                        queue_db.execute(query,
                                         params=[success_message, case_id])
                        consumer.commit()
                else:
                    # Unlock the case.
                    logging.debug(
                        'Flag false. Unlocking case with failure status 1.')
                    query = 'UPDATE `process_queue` SET `status`=%s, `case_lock`=0, `failure_status`=1 WHERE `case_id`=%s'
                    queue_db.execute(query, params=[failure_message, case_id])
                    consumer.commit()
            else:
                data = result['send_data'] if 'send_data' in result else {}
                data['case_id'] = case_id
                data['tenant_id'] = tenant_id
                data['workflow'] = workflow
                data['type'] = 'file_ingestion'
                data['file_name'] = file_name
                data['files'] = files
                data['source'] = source
                data['file_path'] = file_path
                data['original_file_name'] = original_file_name

                query = 'SELECT * FROM `message_flow` WHERE `listen_to_topic`=%s AND `workflow`=%s'
                logging.debug(f'topic - {route} , workflow - {workflow}')
                message_flow = kafka_db.execute(query,
                                                params=[route, workflow])

                if message_flow.empty:
                    logging.error(
                        '`kafka table` is not configured correctly in message flow table.'
                    )
                    send_to_topic = None
                else:
                    send_to_topic = list(message_flow.send_to_topic)[0]

                if send_to_topic is not None:
                    logging.info(f'Producing to topic {send_to_topic}')
                    produce(send_to_topic, data)
                else:
                    logging.info(f'There is no topic to send.')
                if 'send_to_topic' in result:
                    send_to_topic_bypassed = result['send_to_topic']
                    produce(send_to_topic_bypassed, {})
                else:
                    logging.error(
                        'Message not consumed. Some error must have occured. Will try again!'
                    )
                consumer.commit()
    except:
        logging.exception('Something went wrong in consumer. Check trace.')