def __get_column_profile_phase1(self, schema_name, table_name): column_names = self.get_column_names(schema_name, table_name) if column_names is None: return None data_types = self.get_column_datatypes(schema_name, table_name) select_list = [] # num of rows select_list.append('COUNT(*)') for n, c in enumerate(column_names): log.trace("__get_column_profile_phase1: %s" % c) # nulls tmp = 'COUNT(CASE WHEN "%s" IS NULL THEN 1 ELSE NULL END)' % c select_list.append(tmp) # min,max if OraProfiler.has_minmax(data_types[c]): select_list.append(u'MIN("%s")' % c) select_list.append(u'MAX("%s")' % c) else: select_list.append('NULL') select_list.append('NULL') q = u'SELECT %s %s FROM "%s"."%s"' % ( self.parallel_hint, ','.join(select_list), schema_name, table_name) log.trace(q) (num_rows, _minmax, _nulls) = self._query_column_profile(column_names, q) # cache the results self.column_cache[(schema_name, table_name)] = (num_rows, _minmax, _nulls) return True
def run_record_validation(self, schema_name, table_name, validation_rules=None, fetch_size=500000): log.trace('run_record_validation: start. %s.%s' % (schema_name, table_name)) v = DbProfilerValidator.DbProfilerValidator( schema_name, table_name, validation_rules=validation_rules) if not v.record_validators: log.info(_("Skipping record validation since no validation rule.")) return {} column_names = self.get_column_names(schema_name, table_name) if not column_names: msg = 'No column found on the table `%s\'.' % table_name raise DbProfilerException.InternalError(msg) q = u'SELECT %s "%s" FROM "%s"."%s"' % (self.parallel_hint, '","'.join(column_names), schema_name, table_name) (count, failed) = self._query_record_validation(q, v, fetch_size=fetch_size) log.trace(("run_record_validation: end. " "row count %d invalid record %d" % (count, failed))) return v.get_validation_results()
def __get_column_profile_phase1(self, schema_name, table_name): column_names = self.get_column_names(schema_name, table_name) data_types = self.get_column_datatypes(schema_name, table_name) select_list = [] # num of rows select_list.append('COUNT(*)') for n, c in enumerate(column_names): log.trace("__get_column_profile_phase1: %s" % c) # nulls select_list.append( 'COUNT(CASE WHEN "%s" IS NULL THEN 1 ELSE NULL END)' % c) # min,max if MyProfiler.has_minmax(data_types[c]): select_list.append(u'MIN(`%s`)' % c) select_list.append(u'MAX(`%s`)' % c) else: select_list.append('NULL') select_list.append('NULL') q = u'SELECT %s FROM %s.%s' % (','.join(select_list), schema_name, table_name) log.trace(q) (num_rows, _minmax, _nulls) = self._query_column_profile(column_names, q) # cache the results self.column_cache[(schema_name, table_name)] = (num_rows, _minmax, _nulls) return True
def query_to_resultset(self, query, max_rows=10000): """Build a QueryResult object from the query Args: query (str): a query string to be executed. max_rows (int): max rows which can be kept in a QueryResult object. Returns: QueryResult: an object holding query, column names and result set. """ assert query assert isinstance(query, unicode) log.trace('query_to_resultset: start query=%s' % query) res = QueryResult(query) try: if self.conn is None: self.connect() cur = self.conn.cursor() cur.execute(res.query) desc = [] for d in cur.description: desc.append(d[0]) res.column_names = deepcopy(tuple(desc)) for i, r in enumerate(cur.fetchall()): # let's consider the memory size. if i > max_rows: raise DbProfilerException.InternalError( u'Exceeded the record limit (%d) for QueryResult.' % max_rows, query=query) res.resultset.append( deepcopy([ float(x) if isinstance(x, Decimal) else x for x in r ])) cur.close() except DbProfilerException.InternalError as e: raise e except DbProfilerException.DriverError as e: raise e except Exception as e: raise DbProfilerException.QueryError( "Could not execute a query: %s" % e.args[1].split('\n')[0], query=query, source=e) finally: if self.conn: self.conn.rollback() log.trace('query_to_resultset: end') return res
def connect(self): try: if self.host is not None and self.port is not None: # use host name and port number dsn_tns = self.driver.makedsn(self.host, self.port, self.dbname) else: # use tns name dsn_tns = self.dbname log.trace("dsn_tns: %s" % dsn_tns) self.conn = self.driver.connect(self.dbuser, self.dbpass, dsn_tns) except Exception as e: msg = (u"Could not connect to the server: %s" % unicode(e).split('\n')[0]) raise DbProfilerException.DriverError(msg, source=e) return True
def test_trace_001(self): log.trace(u"trace") log.trace_enabled = True log.trace(u"trace") log.trace(u"日本語unicode") log.trace("日本語str") log.trace({"foo": "bar"}) log.trace({"日本語foo": "日本語bar"})
def has_minmax(data_type): assert isinstance(data_type, list) log.trace("has_minmax: " + unicode(data_type)) if data_type[0].upper() in ['BINARY', 'VARBINARY']: return False return True