Exemple #1
0
    def __get_column_profile_phase1(self, schema_name, table_name):
        column_names = self.get_column_names(schema_name, table_name)
        if column_names is None:
            return None
        data_types = self.get_column_datatypes(schema_name, table_name)

        select_list = []
        # num of rows
        select_list.append('COUNT(*)')

        for n, c in enumerate(column_names):
            log.trace("__get_column_profile_phase1: %s" % c)
            # nulls
            tmp = 'COUNT(CASE WHEN "%s" IS NULL THEN 1 ELSE NULL END)' % c
            select_list.append(tmp)
            # min,max
            if OraProfiler.has_minmax(data_types[c]):
                select_list.append(u'MIN("%s")' % c)
                select_list.append(u'MAX("%s")' % c)
            else:
                select_list.append('NULL')
                select_list.append('NULL')
        q = u'SELECT %s %s FROM "%s"."%s"' % (
            self.parallel_hint, ','.join(select_list), schema_name, table_name)
        log.trace(q)

        (num_rows, _minmax,
         _nulls) = self._query_column_profile(column_names, q)

        # cache the results
        self.column_cache[(schema_name, table_name)] = (num_rows, _minmax,
                                                        _nulls)
        return True
Exemple #2
0
    def run_record_validation(self,
                              schema_name,
                              table_name,
                              validation_rules=None,
                              fetch_size=500000):
        log.trace('run_record_validation: start. %s.%s' %
                  (schema_name, table_name))

        v = DbProfilerValidator.DbProfilerValidator(
            schema_name, table_name, validation_rules=validation_rules)
        if not v.record_validators:
            log.info(_("Skipping record validation since no validation rule."))
            return {}

        column_names = self.get_column_names(schema_name, table_name)
        if not column_names:
            msg = 'No column found on the table `%s\'.' % table_name
            raise DbProfilerException.InternalError(msg)
        q = u'SELECT %s "%s" FROM "%s"."%s"' % (self.parallel_hint,
                                                '","'.join(column_names),
                                                schema_name, table_name)

        (count, failed) = self._query_record_validation(q,
                                                        v,
                                                        fetch_size=fetch_size)

        log.trace(("run_record_validation: end. "
                   "row count %d invalid record %d" % (count, failed)))
        return v.get_validation_results()
Exemple #3
0
    def __get_column_profile_phase1(self, schema_name, table_name):
        column_names = self.get_column_names(schema_name, table_name)
        data_types = self.get_column_datatypes(schema_name, table_name)
        select_list = []
        # num of rows
        select_list.append('COUNT(*)')

        for n, c in enumerate(column_names):
            log.trace("__get_column_profile_phase1: %s" % c)
            # nulls
            select_list.append(
                'COUNT(CASE WHEN "%s" IS NULL THEN 1 ELSE NULL END)' % c)
            # min,max
            if MyProfiler.has_minmax(data_types[c]):
                select_list.append(u'MIN(`%s`)' % c)
                select_list.append(u'MAX(`%s`)' % c)
            else:
                select_list.append('NULL')
                select_list.append('NULL')
        q = u'SELECT %s FROM %s.%s' % (','.join(select_list), schema_name,
                                       table_name)
        log.trace(q)

        (num_rows, _minmax,
         _nulls) = self._query_column_profile(column_names, q)

        # cache the results
        self.column_cache[(schema_name, table_name)] = (num_rows, _minmax,
                                                        _nulls)
        return True
Exemple #4
0
    def query_to_resultset(self, query, max_rows=10000):
        """Build a QueryResult object from the query

        Args:
            query (str): a query string to be executed.
            max_rows (int): max rows which can be kept in a QueryResult object.

        Returns:
            QueryResult: an object holding query, column names and result set.
        """
        assert query
        assert isinstance(query, unicode)
        log.trace('query_to_resultset: start query=%s' % query)

        res = QueryResult(query)
        try:
            if self.conn is None:
                self.connect()

            cur = self.conn.cursor()
            cur.execute(res.query)

            desc = []
            for d in cur.description:
                desc.append(d[0])
            res.column_names = deepcopy(tuple(desc))

            for i, r in enumerate(cur.fetchall()):
                # let's consider the memory size.
                if i > max_rows:
                    raise DbProfilerException.InternalError(
                        u'Exceeded the record limit (%d) for QueryResult.' %
                        max_rows,
                        query=query)
                res.resultset.append(
                    deepcopy([
                        float(x) if isinstance(x, Decimal) else x for x in r
                    ]))
            cur.close()
        except DbProfilerException.InternalError as e:
            raise e
        except DbProfilerException.DriverError as e:
            raise e
        except Exception as e:
            raise DbProfilerException.QueryError(
                "Could not execute a query: %s" % e.args[1].split('\n')[0],
                query=query,
                source=e)
        finally:
            if self.conn:
                self.conn.rollback()
        log.trace('query_to_resultset: end')
        return res
Exemple #5
0
    def connect(self):
        try:
            if self.host is not None and self.port is not None:
                # use host name and port number
                dsn_tns = self.driver.makedsn(self.host, self.port,
                                              self.dbname)
            else:
                # use tns name
                dsn_tns = self.dbname
            log.trace("dsn_tns: %s" % dsn_tns)
            self.conn = self.driver.connect(self.dbuser, self.dbpass, dsn_tns)
        except Exception as e:
            msg = (u"Could not connect to the server: %s" %
                   unicode(e).split('\n')[0])
            raise DbProfilerException.DriverError(msg, source=e)

        return True
Exemple #6
0
    def test_trace_001(self):
        log.trace(u"trace")

        log.trace_enabled = True
        log.trace(u"trace")

        log.trace(u"日本語unicode")
        log.trace("日本語str")
        log.trace({"foo": "bar"})
        log.trace({"日本語foo": "日本語bar"})
Exemple #7
0
 def has_minmax(data_type):
     assert isinstance(data_type, list)
     log.trace("has_minmax: " + unicode(data_type))
     if data_type[0].upper() in ['BINARY', 'VARBINARY']:
         return False
     return True