Esempio n. 1
0
    def run_record_validation(self,
                              schema_name,
                              table_name,
                              validation_rules=None,
                              fetch_size=500000):
        log.trace('run_record_validation: start. %s.%s' %
                  (schema_name, table_name))

        v = DbProfilerValidator.DbProfilerValidator(
            schema_name, table_name, validation_rules=validation_rules)
        if not v.record_validators:
            log.info(_("Skipping record validation since no validation rule."))
            return {}

        column_names = self.get_column_names(schema_name, table_name)
        if not column_names:
            msg = 'No column found on the table `%s\'.' % table_name
            raise DbProfilerException.InternalError(msg)
        q = u'SELECT %s "%s" FROM "%s"."%s"' % (self.parallel_hint,
                                                '","'.join(column_names),
                                                schema_name, table_name)

        (count, failed) = self._query_record_validation(q,
                                                        v,
                                                        fetch_size=fetch_size)

        log.trace(("run_record_validation: end. "
                   "row count %d invalid record %d" % (count, failed)))
        return v.get_validation_results()
Esempio n. 2
0
    def query_to_resultset(self, query, max_rows=10000):
        """Build a QueryResult object from the query

        Args:
            query (str): a query string to be executed.
            max_rows (int): max rows which can be kept in a QueryResult object.

        Returns:
            QueryResult: an object holding query, column names and result set.
        """
        assert query
        assert isinstance(query, unicode)
        log.trace('query_to_resultset: start query=%s' % query)

        res = QueryResult(query)
        try:
            if self.conn is None:
                self.connect()

            cur = self.conn.cursor()
            cur.execute(res.query)

            desc = []
            for d in cur.description:
                desc.append(d[0])
            res.column_names = deepcopy(tuple(desc))

            for i, r in enumerate(cur.fetchall()):
                # let's consider the memory size.
                if i > max_rows:
                    raise DbProfilerException.InternalError(
                        u'Exceeded the record limit (%d) for QueryResult.' %
                        max_rows,
                        query=query)
                res.resultset.append(
                    deepcopy([
                        float(x) if isinstance(x, Decimal) else x for x in r
                    ]))
            cur.close()
        except DbProfilerException.InternalError as e:
            raise e
        except DbProfilerException.DriverError as e:
            raise e
        except Exception as e:
            raise DbProfilerException.QueryError(
                "Could not execute a query: %s" % e.args[1].split('\n')[0],
                query=query,
                source=e)
        finally:
            if self.conn:
                self.conn.rollback()
        log.trace('query_to_resultset: end')
        return res
Esempio n. 3
0
    def get_sample_rows(self, schema_name, table_name, rows_limit=10):
        column_name = self.get_column_names(schema_name, table_name)
        if len(column_name) == 0:
            raise DbProfilerException.InternalError(
                "Could not get column names of the table: %s.%s" %
                (schema_name, table_name))

        select_list = ','.join(column_name)
        assert select_list

        q = u'SELECT {0} FROM {1}.{2} LIMIT {3}'.format(
            select_list, schema_name, table_name, rows_limit)
        return self._query_sample_rows(q)