def mutate(self, info, input):
     """Mutation to create an indicator parameter type."""
     # Convert input to dictionary
     data = api_utils.input_to_dictionary(input)
     parameter_type = Operation('ModelIndicatorParameterType').create(
         **data)
     return CreateIndicatorParameterType(parameter_type=parameter_type)
Ejemplo n.º 2
0
    def execute(self, indicator_id=None):
        batch_record = self.start()

        # Get indicators for the batch owner
        if indicator_id is not None:
            indicator_list = Operation('ModelIndicator').read(
                id=indicator_id, batchOwnerId=self.batch_owner_id)
        else:
            indicator_list = Operation('ModelIndicator').read(
                batchOwnerId=self.batch_owner_id)

        for indicator_record in indicator_list:
            MethodIndicator(indicator_record.id).execute(batch_record.id)

        self.stop(batch_record.id)
        self.error_message[
            'message'] = 'Batch with Id {} completed successfully'.format(
                batch_record.id)
        log.info(self.error_message['message'])
        return batch_record
Ejemplo n.º 3
0
 def start(self):
     """
     Start a new batch. Return batch object.
     * Insert a new batch
     * New batch status is set to Running (Id: 1)
     * Returns the corresponding batch object
     """
     log.info('Starting batch for batch owner Id: {}'.format(
         self.batch_owner_id))
     batch = Operation('ModelBatch').create(
         batchOwnerId=self.batch_owner_id, statusId=1)
     return batch
Ejemplo n.º 4
0
    def fail(self, batch_id):
        """
        Fail a running batch. Return batch object.
        * Terminate an existing running batch
        * Existing batch status is set to Failed (Id: 3)
        * Returns the corresponding batch object
        """
        log.info('Failing batch for batch owner Id: {}'.format(
            self.batch_owner_id))
        batch_list = Operation('ModelBatch').read(id=batch_id, statusId=1)

        if not batch_list:
            self.error_message[
                'message'] = 'Cannot fail batch because batch with Id {} is not running'.format(
                    batch_id)
            log.error(self.error_message['message'])
            return self.error_message

        # Update running batch
        batch = Operation('ModelBatch').update(id=batch_id, statusId=3)
        return batch
    def get_database_connection(self):
        """Connect to a data source of type database using an ODBC connection. Return a connection object."""
        connection_string = self.data_source.connectionString

        # Add login to connection string if it is not empty
        if self.data_source.login:
            connection_string = connection_string + 'uid={};'.format(
                self.data_source.login)

        # Add password to connection string if it is not empty
        if self.data_source.password:
            password = Operation.encryption('decrypt',
                                            self.data_source.password)
            connection_string = connection_string + 'pwd={};'.format(password)

        # Hive
        if self.data_source.dataSourceTypeId == 1:
            connection = pyodbc.connect(connection_string)
            connection.setencoding(encoding='utf-8')

        # Impala
        if self.data_source.dataSourceTypeId == 2:
            connection = pyodbc.connect(connection_string)
            connection.setencoding(encoding='utf-8')

        # Microsoft SQL Server
        if self.data_source.dataSourceTypeId == 3:
            connection = pyodbc.connect(connection_string)
            pass

        # MySQL
        if self.data_source.dataSourceTypeId == 4:
            connection = pyodbc.connect(connection_string)
            pass

        # PostgreSQL
        if self.data_source.dataSourceTypeId == 5:
            connection = pyodbc.connect(connection_string)
            pass

        # SQLite
        if self.data_source.dataSourceTypeId == 6:
            connection = sqlite3.connect(connection_string)

        # Teradata
        if self.data_source.dataSourceTypeId == 7:
            connection = pyodbc.connect(connection_string)
            connection.setdecoding(pyodbc.SQL_CHAR, encoding='utf-8')
            connection.setdecoding(pyodbc.SQL_WCHAR, encoding='utf-8')
            connection.setdecoding(pyodbc.SQL_WMETADATA, encoding='utf-8')
            connection.setencoding(encoding='utf-8')
        return connection
Ejemplo n.º 6
0
    def __init__(self, indicator_id):
        """Initialize class."""
        # Initialize dictionary for error message
        self.error_message = {}

        # Verify indicator exists
        indicator_list = Operation('ModelIndicator').read(id=indicator_id)
        if indicator_list:
            self.indicator = indicator_list[0]
        else:
            self.error_message[
                'message'] = 'Indicator with Id {} does not exist'.format(
                    indicator_id)
            log.error(self.error_message['message'])
            return self.error_message
Ejemplo n.º 7
0
    def __init__(self, batch_owner_id):
        """Initialize class."""
        # Initialize dictionary for error message
        self.error_message = {}

        # Verify batch owner exists
        batch_owner_list = Operation('ModelBatchOwner').read(id=batch_owner_id)
        if batch_owner_list:
            self.batch_owner_id = batch_owner_list[0].id
        else:
            self.error_message[
                'message'] = 'Batch owner with Id {} does not exist'.format(
                    batch_owner_id)
            log.error(self.error_message['message'])
            return self.error_message
Ejemplo n.º 8
0
    def __init__(self, event_type):
        """Initialize class."""
        # Initialize dictionary for error message
        self.error_message = {}

        # Verify event type exists
        event_type_list = Operation('ModelEventType').read(name=event_type)
        if event_type_list:
            self.event_type_id = event_type_list[0].id
            self.event_type = event_type_list[0].name
        else:
            self.error_message[
                'message'] = 'Cannot log event because event type {} does not exist'.format(
                    event_type)
            log.error(self.error_message['message'])
            return self.error_message
    def __init__(self, data_source_name):
        """Initialize class."""
        # Initialize dictionary for error message
        self.error_message = {}

        # Verify data source exists
        data_source_list = Operation('ModelDataSource').read(
            name=data_source_name)

        if data_source_list:
            self.data_source = data_source_list[0]
        else:
            self.error_message[
                'message'] = 'No {} found with values: {}'.format(
                    'DataSource', {'name': data_source_name})
            log.error(self.error_message['message'])
            return self.error_message
Ejemplo n.º 10
0
    def compute_indicator_result(self, session_id, parameters,
                                 result_data_frame):
        """Compute aggregated results for the indicator."""
        alert_operator = parameters[1]  # Alert operator
        alert_threshold = parameters[2]  # Alert threshold
        nb_records = len(result_data_frame)
        nb_records_alert = len(
            result_data_frame.loc[result_data_frame['Alert'] == True])
        nb_records_no_alert = len(
            result_data_frame.loc[result_data_frame['Alert'] == False])

        # Insert result to database
        Operation('ModelIndicatorResult').create(
            indicatorId=self.indicator.id,
            sessionId=session_id,
            alertOperator=alert_operator,
            alertThreshold=alert_threshold,
            nbRecords=nb_records,
            nbRecordsAlert=nb_records_alert,
            nbRecordsNoAlert=nb_records_no_alert)
        return nb_records_alert
    def get_data_frame(self, request):
        """Connect to a data source, execute request and return the corresponding results as a pandas data frame."""

        # Identify the type of data source
        data_source_type_list = Operation('ModelDataSourceType').read(
            id=self.data_source.dataSourceTypeId)

        if data_source_type_list:
            data_source_type = data_source_type_list[0]
        else:
            self.error_message[
                'message'] = 'No {} found with values: {}'.format(
                    'DataSourceType',
                    {'id': self.data_source.dataSourceTypeId})
            log.error(self.error_message['message'])
            return self.error_message

        # Database
        if data_source_type.parentType == 'Database':
            connection = self.get_database_connection()
            data_frame = pandas.read_sql(request, connection)

        # File
        elif data_source_type.parentType == 'File':
            # Not implemented yet
            pass

        # API
        elif data_source_type.parentType == 'API':
            # Not implemented yet
            pass

        else:
            self.error_message[
                'message'] = 'Unknown data source type: {}'.format(
                    data_source_type.parentType)
            log.error(self.error_message['message'])
            return self.error_message

        return data_frame
Ejemplo n.º 12
0
 def mutate(self, info, input):
     """Mutation to update a session."""
     # Convert input to dictionary
     data = api_utils.input_to_dictionary(input)
     session = Operation('ModelSession').update(**data)
     return UpdateSession(session=session)
 def mutate(self, info, input):
     """Mutation to create an indicator result summary."""
     # Convert input to dictionary
     data = api_utils.input_to_dictionary(input)
     indicator_result = Operation('ModelIndicatorResult').create(**data)
     return CreateIndicatorResult(indicator_result=indicator_result)
Ejemplo n.º 14
0
    def log_event(self, indicator_id, batch_id, data_set=None):
        """
        Manage session status and event logging for the corresponding data quality indicator. Return event object.

        If event is Start:
        * Insert a new session
        * New session status is set to Running (Id: 1)
        * Insert a new Start event
        * Returns the corresponding event object

        If event is Stop:
        * Terminate an existing running session
        * Existing session status is set to Succeeded (Id: 2)
        * Insert a new Stop event
        * Returns the corresponding event object

        If event is Error:
        * Terminate an existing running session
        * Existing session status is set to Failed (Id: 3)
        * Insert a new Error event
        * Returns the corresponding event object
        """
        if not data_set:
            data_set = {}

        # Log start event, insert new running session
        if self.event_type_id == 1:
            log.info(
                'Starting session for indicator Id: {}'.format(indicator_id))

            # Insert new running session and start event
            session = Operation('ModelSession').create(
                indicatorId=indicator_id, batchId=batch_id, statusId=1)
            event = Operation('ModelEvent').create(
                eventTypeId=self.event_type_id,
                sessionId=session.id,
                content=data_set)

        # Log stop event, update running session to succeeded
        elif self.event_type_id == 2:
            log.info(
                'Stoping session for indicator Id: {}'.format(indicator_id))

            # Verify current indicator is running
            session_list = Operation('ModelSession').read(
                indicatorId=indicator_id, batchId=batch_id, statusId=1)
            if not session_list:
                self.error_message[
                    'message'] = '''Cannot log {} event because indicator with Id {}
                 does not have a running session with batch Id {}'''.format(
                        self.event_type, indicator_id, batch_id)
                log.error(self.error_message['message'])
                return self.error_message

            # Insert stop event and terminate running session
            event = Operation('ModelEvent').create(
                eventTypeId=self.event_type_id,
                sessionId=session_list[0].id,
                content=data_set)
            Operation('ModelSession').update(id=session_list[0].id, statusId=2)

        # Log error event, update running session to failed
        elif self.event_type_id == 3:
            log.info(
                'Failing session for indicator Id: {}'.format(indicator_id))

            # Verify current indicator is running
            session_list = Operation('ModelSession').read(
                indicatorId=indicator_id, batchId=batch_id, statusId=1)
            if not session_list:
                self.error_message[
                    'message'] = '''Cannot log {} event because indicator with Id {}
                 does not have a running session with batch Id {}'''.format(
                        self.event_type, indicator_id, batch_id)
                log.error(self.error_message['message'])
                return self.error_message

            # Insert error event and terminate running session
            event = Operation('ModelEvent').create(
                eventTypeId=self.event_type_id,
                sessionId=session_list[0].id,
                content=data_set)
            Operation('ModelSession').update(id=session_list[0].id, statusId=3)

        return event
 def mutate(self, info, input):
     """Mutation to create an indicator."""
     # Convert input to dictionary
     data = api_utils.input_to_dictionary(input)
     indicator = Operation('ModelIndicator').create(**data)
     return CreateIndicator(indicator=indicator)
Ejemplo n.º 16
0
 def mutate(self, info, input):
     """Mutation to update a data source."""
     # Convert input to dictionary
     data = api_utils.input_to_dictionary(input)
     data_source = Operation('ModelDataSource').update(**data)
     return UpdateDataSource(data_source=data_source)
Ejemplo n.º 17
0
 def mutate(self, info, input):
     """Mutation to create a batch owner."""
     # Convert input to dictionary
     data = api_utils.input_to_dictionary(input)
     batch_owner = Operation('ModelBatchOwner').create(**data)
     return CreateBatchOwner(batch_owner=batch_owner)
Ejemplo n.º 18
0
 def mutate(self, info, input):
     # Convert input to dictionary
     data = api_utils.input_to_dictionary(input)
     batch_owner = Operation('ModelBatchOwner').update(**data)
     return UpdateBatchOwner(batch_owner=batch_owner)
Ejemplo n.º 19
0
    def execute(self, batch_id):
        """Execute a data quality indicator."""
        start_event = MethodEvent('Start').log_event(self.indicator.id,
                                                     batch_id)
        session_id = start_event.sessionId

        # Get indicator parameters
        indicator_parameter_list = Operation('ModelIndicatorParameter').read(
            indicatorId=self.indicator.id)

        # Create dictionary from indicator parameters
        parameters = {}
        for indicator_parameter in indicator_parameter_list:
            parameters[indicator_parameter.
                       parameterTypeId] = indicator_parameter.value

        # Verify parameters exist and convert them to list objects
        # Dimension parameter
        if 4 in parameters:
            parameters[4] = ['indicator_id'] + literal_eval(parameters[4])
        else:
            parameters[4] = ['indicator_id']

        # Measure parameter
        if 3 in parameters:
            parameters[3] = literal_eval(parameters[3])
        else:
            self.error_message[
                'message'] = 'Indicator with Id {} does not have any measures parameter'.format(
                    self.indicator.id)
            log.error(self.error_message['message'])
            return self.error_message

        # Get source and target data frames
        data_sets = {}
        for parameter in parameters:
            if parameter == 7:  # Source
                log.info(
                    'Getting data set from parameter Id {}, data source: {}'.
                    format(parameter, parameters[parameter]))
                data_source_name = parameters[parameter]
                source_data_frame = MethodDataSource(
                    data_source_name).get_data_frame(
                        parameters[8])  # Source request
                source_data_frame.insert(loc=0,
                                         column='indicator_id',
                                         value=self.indicator.id)
                data_sets['Source data frame'] = source_data_frame

            elif parameter == 5:  # Target
                log.info(
                    'Getting data set from parameter Id {}, data source: {}'.
                    format(parameter, parameters[parameter]))
                data_source_name = parameters[parameter]
                target_data_frame = MethodDataSource(
                    data_source_name).get_data_frame(
                        parameters[6])  # Target request
                target_data_frame.insert(loc=0,
                                         column='indicator_id',
                                         value=self.indicator.id)
                data_sets['Target data frame'] = target_data_frame

        # Verify data frames are not empty
        for data_frame_name in data_sets:
            if data_sets[data_frame_name].empty:
                log.error('{} is empty'.format(data_frame_name))

        # Format source and target data set with dimensions and measures parameters
        for data_frame_name in data_sets:
            log.info('Formatting {}'.format(data_frame_name))
            data_frame = data_sets[data_frame_name]
            column_name_list = parameters[4] + parameters[3]
            data_frame.columns = column_name_list
            for column in parameters[4]:
                data_frame[column] = data_frame[column].astype(str)
            data_sets[data_frame_name] = data_frame

        # Get indicator function and execute it
        indicator_type_list = Operation('ModelIndicatorType').read(
            id=self.indicator.indicatorTypeId)
        indicator_function = indicator_type_list[0].function
        result_data_frame = getattr(self, indicator_function)(data_sets,
                                                              parameters)

        # Compute indicator result summary
        log.info('Computing result summary for indicator Id: {}'.format(
            self.indicator.id))
        nb_records_alert = self.compute_indicator_result(
            session_id, parameters, result_data_frame)

        # Send e-mail alert
        if 9 in parameters and not result_data_frame.loc[
                result_data_frame['Alert'] == True].empty:
            # Create csv file to send in attachment
            file_name = 'indicator_{}_session_{}.csv'.format(
                self.indicator.id, session_id)
            file_path = os.path.dirname(__file__) + "/" + file_name
            result_data_frame.to_csv(file_path, header=True, index=False)

            # Convert distribution list parameter to python list
            parameters[9] = literal_eval(parameters[9])

            # Prepare e-mail body
            body = {}
            body['indicator_name'] = self.indicator.name
            body['alert_threshold'] = parameters[1] + parameters[
                2]  # 'Alert operator' + Alert threshold
            body['nb_records_alert'] = nb_records_alert
            body['log_url'] = 'http://'  # To be updated

            # Send e-mail
            log.info(
                'Sending e-mail alert for indicator Id {} and session Id {}'.
                format(self.indicator.id, session_id))
            api_utils.send_mail(template='indicator',
                                distribution_list=parameters[9],
                                attachment=file_path,
                                **body)

            # Delete csv file
            os.remove(file_path)

        MethodEvent('Stop').log_event(self.indicator.id, batch_id)
        self.error_message[
            'message'] = 'Indicator with Id {} completed successfully'.format(
                self.indicator.id)
        log.info(self.error_message['message'])
        return self.error_message
Ejemplo n.º 20
0
 def mutate(self, info, input):
     # Convert input to dictionary
     data = api_utils.input_to_dictionary(input)
     event_type = Operation('ModelEventType').update(**data)
     return UpdateEventType(event_type=event_type)
Ejemplo n.º 21
0
 def mutate(self, info, input):
     """Mutation to update an event."""
     # Convert input to dictionary
     data = api_utils.input_to_dictionary(input)
     event = Operation('ModelEvent').update(**data)
     return UpdateEvent(event=event)
Ejemplo n.º 22
0
 def mutate(self, info, input):
     """Mutation to update a batch."""
     # Convert input to dictionary
     data = api_utils.input_to_dictionary(input)
     batch = Operation('ModelBatch').update(**data)
     return UpdateBatch(batch=batch)
Ejemplo n.º 23
0
 def mutate(self, info, input):
     """Mutation to update a status."""
     # Convert input to dictionary
     data = api_utils.input_to_dictionary(input)
     status = Operation('ModelStatus').update(**data)
     return UpdateStatus(status=status)
 def mutate(self, info, input):
     """Mutation to update an indicator parameter."""
     # Convert input to dictionary
     data = api_utils.input_to_dictionary(input)
     parameter = Operation('ModelIndicatorParameter').update(**data)
     return UpdateIndicatorParameter(parameter=parameter)
 def mutate(self, info, input):
     # Convert input to dictionary
     data = api_utils.input_to_dictionary(input)
     indicator_result = Operation('ModelIndicatorResult').update(**data)
     return UpdateIndicatorResult(indicator_result=indicator_result)
Ejemplo n.º 26
0
 def mutate(self, info, input):
     """Mutation to create a data source type."""
     # Convert input to dictionary
     data = api_utils.input_to_dictionary(input)
     data_source_type = Operation('ModelDataSourceType').create(**data)
     return CreateDataSourceType(data_source_type=data_source_type)
 def mutate(self, info, input):
     # Convert input to dictionary
     data = api_utils.input_to_dictionary(input)
     indicator_type = Operation('ModelIndicatorType').update(**data)
     return UpdateIndicatorType(indicator_type=indicator_type)
Ejemplo n.º 28
0
 def mutate(self, info, input):
     # Convert input to dictionary
     data = api_utils.input_to_dictionary(input)
     data_source_type = Operation('ModelDataSourceType').update(**data)
     return UpdateDataSourceType(data_source_type=data_source_type)
Ejemplo n.º 29
0
 def mutate(self, info, input):
     """Mutation to create an event type."""
     # Convert input to dictionary
     data = api_utils.input_to_dictionary(input)
     event_type = Operation('ModelEventType').create(**data)
     return CreateEventType(event_type=event_type)