def run_record_validation(self, schema_name, table_name, validation_rules=None, fetch_size=500000): log.trace('run_record_validation: start. %s.%s' % (schema_name, table_name)) v = DbProfilerValidator.DbProfilerValidator( schema_name, table_name, validation_rules=validation_rules) if not v.record_validators: log.info(_("Skipping record validation since no validation rule.")) return {} column_names = self.get_column_names(schema_name, table_name) if not column_names: msg = 'No column found on the table `%s\'.' % table_name raise DbProfilerException.InternalError(msg) q = u'SELECT %s "%s" FROM "%s"."%s"' % (self.parallel_hint, '","'.join(column_names), schema_name, table_name) (count, failed) = self._query_record_validation(q, v, fetch_size=fetch_size) log.trace(("run_record_validation: end. " "row count %d invalid record %d" % (count, failed))) return v.get_validation_results()
def query_to_resultset(self, query, max_rows=10000): """Build a QueryResult object from the query Args: query (str): a query string to be executed. max_rows (int): max rows which can be kept in a QueryResult object. Returns: QueryResult: an object holding query, column names and result set. """ assert query assert isinstance(query, unicode) log.trace('query_to_resultset: start query=%s' % query) res = QueryResult(query) try: if self.conn is None: self.connect() cur = self.conn.cursor() cur.execute(res.query) desc = [] for d in cur.description: desc.append(d[0]) res.column_names = deepcopy(tuple(desc)) for i, r in enumerate(cur.fetchall()): # let's consider the memory size. if i > max_rows: raise DbProfilerException.InternalError( u'Exceeded the record limit (%d) for QueryResult.' % max_rows, query=query) res.resultset.append( deepcopy([ float(x) if isinstance(x, Decimal) else x for x in r ])) cur.close() except DbProfilerException.InternalError as e: raise e except DbProfilerException.DriverError as e: raise e except Exception as e: raise DbProfilerException.QueryError( "Could not execute a query: %s" % e.args[1].split('\n')[0], query=query, source=e) finally: if self.conn: self.conn.rollback() log.trace('query_to_resultset: end') return res
def get_sample_rows(self, schema_name, table_name, rows_limit=10): column_name = self.get_column_names(schema_name, table_name) if len(column_name) == 0: raise DbProfilerException.InternalError( "Could not get column names of the table: %s.%s" % (schema_name, table_name)) select_list = ','.join(column_name) assert select_list q = u'SELECT {0} FROM {1}.{2} LIMIT {3}'.format( select_list, schema_name, table_name, rows_limit) return self._query_sample_rows(q)