def mutate(self, info, input): """Mutation to create an indicator parameter type.""" # Convert input to dictionary data = api_utils.input_to_dictionary(input) parameter_type = Operation('ModelIndicatorParameterType').create( **data) return CreateIndicatorParameterType(parameter_type=parameter_type)
def execute(self, indicator_id=None): batch_record = self.start() # Get indicators for the batch owner if indicator_id is not None: indicator_list = Operation('ModelIndicator').read( id=indicator_id, batchOwnerId=self.batch_owner_id) else: indicator_list = Operation('ModelIndicator').read( batchOwnerId=self.batch_owner_id) for indicator_record in indicator_list: MethodIndicator(indicator_record.id).execute(batch_record.id) self.stop(batch_record.id) self.error_message[ 'message'] = 'Batch with Id {} completed successfully'.format( batch_record.id) log.info(self.error_message['message']) return batch_record
def start(self): """ Start a new batch. Return batch object. * Insert a new batch * New batch status is set to Running (Id: 1) * Returns the corresponding batch object """ log.info('Starting batch for batch owner Id: {}'.format( self.batch_owner_id)) batch = Operation('ModelBatch').create( batchOwnerId=self.batch_owner_id, statusId=1) return batch
def fail(self, batch_id): """ Fail a running batch. Return batch object. * Terminate an existing running batch * Existing batch status is set to Failed (Id: 3) * Returns the corresponding batch object """ log.info('Failing batch for batch owner Id: {}'.format( self.batch_owner_id)) batch_list = Operation('ModelBatch').read(id=batch_id, statusId=1) if not batch_list: self.error_message[ 'message'] = 'Cannot fail batch because batch with Id {} is not running'.format( batch_id) log.error(self.error_message['message']) return self.error_message # Update running batch batch = Operation('ModelBatch').update(id=batch_id, statusId=3) return batch
def get_database_connection(self): """Connect to a data source of type database using an ODBC connection. Return a connection object.""" connection_string = self.data_source.connectionString # Add login to connection string if it is not empty if self.data_source.login: connection_string = connection_string + 'uid={};'.format( self.data_source.login) # Add password to connection string if it is not empty if self.data_source.password: password = Operation.encryption('decrypt', self.data_source.password) connection_string = connection_string + 'pwd={};'.format(password) # Hive if self.data_source.dataSourceTypeId == 1: connection = pyodbc.connect(connection_string) connection.setencoding(encoding='utf-8') # Impala if self.data_source.dataSourceTypeId == 2: connection = pyodbc.connect(connection_string) connection.setencoding(encoding='utf-8') # Microsoft SQL Server if self.data_source.dataSourceTypeId == 3: connection = pyodbc.connect(connection_string) pass # MySQL if self.data_source.dataSourceTypeId == 4: connection = pyodbc.connect(connection_string) pass # PostgreSQL if self.data_source.dataSourceTypeId == 5: connection = pyodbc.connect(connection_string) pass # SQLite if self.data_source.dataSourceTypeId == 6: connection = sqlite3.connect(connection_string) # Teradata if self.data_source.dataSourceTypeId == 7: connection = pyodbc.connect(connection_string) connection.setdecoding(pyodbc.SQL_CHAR, encoding='utf-8') connection.setdecoding(pyodbc.SQL_WCHAR, encoding='utf-8') connection.setdecoding(pyodbc.SQL_WMETADATA, encoding='utf-8') connection.setencoding(encoding='utf-8') return connection
def __init__(self, indicator_id): """Initialize class.""" # Initialize dictionary for error message self.error_message = {} # Verify indicator exists indicator_list = Operation('ModelIndicator').read(id=indicator_id) if indicator_list: self.indicator = indicator_list[0] else: self.error_message[ 'message'] = 'Indicator with Id {} does not exist'.format( indicator_id) log.error(self.error_message['message']) return self.error_message
def __init__(self, batch_owner_id): """Initialize class.""" # Initialize dictionary for error message self.error_message = {} # Verify batch owner exists batch_owner_list = Operation('ModelBatchOwner').read(id=batch_owner_id) if batch_owner_list: self.batch_owner_id = batch_owner_list[0].id else: self.error_message[ 'message'] = 'Batch owner with Id {} does not exist'.format( batch_owner_id) log.error(self.error_message['message']) return self.error_message
def __init__(self, event_type): """Initialize class.""" # Initialize dictionary for error message self.error_message = {} # Verify event type exists event_type_list = Operation('ModelEventType').read(name=event_type) if event_type_list: self.event_type_id = event_type_list[0].id self.event_type = event_type_list[0].name else: self.error_message[ 'message'] = 'Cannot log event because event type {} does not exist'.format( event_type) log.error(self.error_message['message']) return self.error_message
def __init__(self, data_source_name): """Initialize class.""" # Initialize dictionary for error message self.error_message = {} # Verify data source exists data_source_list = Operation('ModelDataSource').read( name=data_source_name) if data_source_list: self.data_source = data_source_list[0] else: self.error_message[ 'message'] = 'No {} found with values: {}'.format( 'DataSource', {'name': data_source_name}) log.error(self.error_message['message']) return self.error_message
def compute_indicator_result(self, session_id, parameters, result_data_frame): """Compute aggregated results for the indicator.""" alert_operator = parameters[1] # Alert operator alert_threshold = parameters[2] # Alert threshold nb_records = len(result_data_frame) nb_records_alert = len( result_data_frame.loc[result_data_frame['Alert'] == True]) nb_records_no_alert = len( result_data_frame.loc[result_data_frame['Alert'] == False]) # Insert result to database Operation('ModelIndicatorResult').create( indicatorId=self.indicator.id, sessionId=session_id, alertOperator=alert_operator, alertThreshold=alert_threshold, nbRecords=nb_records, nbRecordsAlert=nb_records_alert, nbRecordsNoAlert=nb_records_no_alert) return nb_records_alert
def get_data_frame(self, request): """Connect to a data source, execute request and return the corresponding results as a pandas data frame.""" # Identify the type of data source data_source_type_list = Operation('ModelDataSourceType').read( id=self.data_source.dataSourceTypeId) if data_source_type_list: data_source_type = data_source_type_list[0] else: self.error_message[ 'message'] = 'No {} found with values: {}'.format( 'DataSourceType', {'id': self.data_source.dataSourceTypeId}) log.error(self.error_message['message']) return self.error_message # Database if data_source_type.parentType == 'Database': connection = self.get_database_connection() data_frame = pandas.read_sql(request, connection) # File elif data_source_type.parentType == 'File': # Not implemented yet pass # API elif data_source_type.parentType == 'API': # Not implemented yet pass else: self.error_message[ 'message'] = 'Unknown data source type: {}'.format( data_source_type.parentType) log.error(self.error_message['message']) return self.error_message return data_frame
def mutate(self, info, input): """Mutation to update a session.""" # Convert input to dictionary data = api_utils.input_to_dictionary(input) session = Operation('ModelSession').update(**data) return UpdateSession(session=session)
def mutate(self, info, input): """Mutation to create an indicator result summary.""" # Convert input to dictionary data = api_utils.input_to_dictionary(input) indicator_result = Operation('ModelIndicatorResult').create(**data) return CreateIndicatorResult(indicator_result=indicator_result)
def log_event(self, indicator_id, batch_id, data_set=None): """ Manage session status and event logging for the corresponding data quality indicator. Return event object. If event is Start: * Insert a new session * New session status is set to Running (Id: 1) * Insert a new Start event * Returns the corresponding event object If event is Stop: * Terminate an existing running session * Existing session status is set to Succeeded (Id: 2) * Insert a new Stop event * Returns the corresponding event object If event is Error: * Terminate an existing running session * Existing session status is set to Failed (Id: 3) * Insert a new Error event * Returns the corresponding event object """ if not data_set: data_set = {} # Log start event, insert new running session if self.event_type_id == 1: log.info( 'Starting session for indicator Id: {}'.format(indicator_id)) # Insert new running session and start event session = Operation('ModelSession').create( indicatorId=indicator_id, batchId=batch_id, statusId=1) event = Operation('ModelEvent').create( eventTypeId=self.event_type_id, sessionId=session.id, content=data_set) # Log stop event, update running session to succeeded elif self.event_type_id == 2: log.info( 'Stoping session for indicator Id: {}'.format(indicator_id)) # Verify current indicator is running session_list = Operation('ModelSession').read( indicatorId=indicator_id, batchId=batch_id, statusId=1) if not session_list: self.error_message[ 'message'] = '''Cannot log {} event because indicator with Id {} does not have a running session with batch Id {}'''.format( self.event_type, indicator_id, batch_id) log.error(self.error_message['message']) return self.error_message # Insert stop event and terminate running session event = Operation('ModelEvent').create( eventTypeId=self.event_type_id, sessionId=session_list[0].id, content=data_set) Operation('ModelSession').update(id=session_list[0].id, statusId=2) # Log error event, update running session to failed elif self.event_type_id == 3: log.info( 'Failing session for indicator Id: {}'.format(indicator_id)) # Verify current indicator is running session_list = Operation('ModelSession').read( indicatorId=indicator_id, batchId=batch_id, statusId=1) if not session_list: self.error_message[ 'message'] = '''Cannot log {} event because indicator with Id {} does not have a running session with batch Id {}'''.format( self.event_type, indicator_id, batch_id) log.error(self.error_message['message']) return self.error_message # Insert error event and terminate running session event = Operation('ModelEvent').create( eventTypeId=self.event_type_id, sessionId=session_list[0].id, content=data_set) Operation('ModelSession').update(id=session_list[0].id, statusId=3) return event
def mutate(self, info, input): """Mutation to create an indicator.""" # Convert input to dictionary data = api_utils.input_to_dictionary(input) indicator = Operation('ModelIndicator').create(**data) return CreateIndicator(indicator=indicator)
def mutate(self, info, input): """Mutation to update a data source.""" # Convert input to dictionary data = api_utils.input_to_dictionary(input) data_source = Operation('ModelDataSource').update(**data) return UpdateDataSource(data_source=data_source)
def mutate(self, info, input): """Mutation to create a batch owner.""" # Convert input to dictionary data = api_utils.input_to_dictionary(input) batch_owner = Operation('ModelBatchOwner').create(**data) return CreateBatchOwner(batch_owner=batch_owner)
def mutate(self, info, input): # Convert input to dictionary data = api_utils.input_to_dictionary(input) batch_owner = Operation('ModelBatchOwner').update(**data) return UpdateBatchOwner(batch_owner=batch_owner)
def execute(self, batch_id): """Execute a data quality indicator.""" start_event = MethodEvent('Start').log_event(self.indicator.id, batch_id) session_id = start_event.sessionId # Get indicator parameters indicator_parameter_list = Operation('ModelIndicatorParameter').read( indicatorId=self.indicator.id) # Create dictionary from indicator parameters parameters = {} for indicator_parameter in indicator_parameter_list: parameters[indicator_parameter. parameterTypeId] = indicator_parameter.value # Verify parameters exist and convert them to list objects # Dimension parameter if 4 in parameters: parameters[4] = ['indicator_id'] + literal_eval(parameters[4]) else: parameters[4] = ['indicator_id'] # Measure parameter if 3 in parameters: parameters[3] = literal_eval(parameters[3]) else: self.error_message[ 'message'] = 'Indicator with Id {} does not have any measures parameter'.format( self.indicator.id) log.error(self.error_message['message']) return self.error_message # Get source and target data frames data_sets = {} for parameter in parameters: if parameter == 7: # Source log.info( 'Getting data set from parameter Id {}, data source: {}'. format(parameter, parameters[parameter])) data_source_name = parameters[parameter] source_data_frame = MethodDataSource( data_source_name).get_data_frame( parameters[8]) # Source request source_data_frame.insert(loc=0, column='indicator_id', value=self.indicator.id) data_sets['Source data frame'] = source_data_frame elif parameter == 5: # Target log.info( 'Getting data set from parameter Id {}, data source: {}'. format(parameter, parameters[parameter])) data_source_name = parameters[parameter] target_data_frame = MethodDataSource( data_source_name).get_data_frame( parameters[6]) # Target request target_data_frame.insert(loc=0, column='indicator_id', value=self.indicator.id) data_sets['Target data frame'] = target_data_frame # Verify data frames are not empty for data_frame_name in data_sets: if data_sets[data_frame_name].empty: log.error('{} is empty'.format(data_frame_name)) # Format source and target data set with dimensions and measures parameters for data_frame_name in data_sets: log.info('Formatting {}'.format(data_frame_name)) data_frame = data_sets[data_frame_name] column_name_list = parameters[4] + parameters[3] data_frame.columns = column_name_list for column in parameters[4]: data_frame[column] = data_frame[column].astype(str) data_sets[data_frame_name] = data_frame # Get indicator function and execute it indicator_type_list = Operation('ModelIndicatorType').read( id=self.indicator.indicatorTypeId) indicator_function = indicator_type_list[0].function result_data_frame = getattr(self, indicator_function)(data_sets, parameters) # Compute indicator result summary log.info('Computing result summary for indicator Id: {}'.format( self.indicator.id)) nb_records_alert = self.compute_indicator_result( session_id, parameters, result_data_frame) # Send e-mail alert if 9 in parameters and not result_data_frame.loc[ result_data_frame['Alert'] == True].empty: # Create csv file to send in attachment file_name = 'indicator_{}_session_{}.csv'.format( self.indicator.id, session_id) file_path = os.path.dirname(__file__) + "/" + file_name result_data_frame.to_csv(file_path, header=True, index=False) # Convert distribution list parameter to python list parameters[9] = literal_eval(parameters[9]) # Prepare e-mail body body = {} body['indicator_name'] = self.indicator.name body['alert_threshold'] = parameters[1] + parameters[ 2] # 'Alert operator' + Alert threshold body['nb_records_alert'] = nb_records_alert body['log_url'] = 'http://' # To be updated # Send e-mail log.info( 'Sending e-mail alert for indicator Id {} and session Id {}'. format(self.indicator.id, session_id)) api_utils.send_mail(template='indicator', distribution_list=parameters[9], attachment=file_path, **body) # Delete csv file os.remove(file_path) MethodEvent('Stop').log_event(self.indicator.id, batch_id) self.error_message[ 'message'] = 'Indicator with Id {} completed successfully'.format( self.indicator.id) log.info(self.error_message['message']) return self.error_message
def mutate(self, info, input): # Convert input to dictionary data = api_utils.input_to_dictionary(input) event_type = Operation('ModelEventType').update(**data) return UpdateEventType(event_type=event_type)
def mutate(self, info, input): """Mutation to update an event.""" # Convert input to dictionary data = api_utils.input_to_dictionary(input) event = Operation('ModelEvent').update(**data) return UpdateEvent(event=event)
def mutate(self, info, input): """Mutation to update a batch.""" # Convert input to dictionary data = api_utils.input_to_dictionary(input) batch = Operation('ModelBatch').update(**data) return UpdateBatch(batch=batch)
def mutate(self, info, input): """Mutation to update a status.""" # Convert input to dictionary data = api_utils.input_to_dictionary(input) status = Operation('ModelStatus').update(**data) return UpdateStatus(status=status)
def mutate(self, info, input): """Mutation to update an indicator parameter.""" # Convert input to dictionary data = api_utils.input_to_dictionary(input) parameter = Operation('ModelIndicatorParameter').update(**data) return UpdateIndicatorParameter(parameter=parameter)
def mutate(self, info, input): # Convert input to dictionary data = api_utils.input_to_dictionary(input) indicator_result = Operation('ModelIndicatorResult').update(**data) return UpdateIndicatorResult(indicator_result=indicator_result)
def mutate(self, info, input): """Mutation to create a data source type.""" # Convert input to dictionary data = api_utils.input_to_dictionary(input) data_source_type = Operation('ModelDataSourceType').create(**data) return CreateDataSourceType(data_source_type=data_source_type)
def mutate(self, info, input): # Convert input to dictionary data = api_utils.input_to_dictionary(input) indicator_type = Operation('ModelIndicatorType').update(**data) return UpdateIndicatorType(indicator_type=indicator_type)
def mutate(self, info, input): # Convert input to dictionary data = api_utils.input_to_dictionary(input) data_source_type = Operation('ModelDataSourceType').update(**data) return UpdateDataSourceType(data_source_type=data_source_type)
def mutate(self, info, input): """Mutation to create an event type.""" # Convert input to dictionary data = api_utils.input_to_dictionary(input) event_type = Operation('ModelEventType').create(**data) return CreateEventType(event_type=event_type)