Ejemplo n.º 1
0
def validate_eval(kv, p):
    """Validate a format with column values

    Args:
      kv (dict): column name as key, and value
      p (str): format to be evaluated

    Returns:
      bool: True on success, otherwise False
    """
    try:
        s = p.format(**kv)
    except KeyError as e:
        raise DbProfilerException.ValidationError(_("Parameter error: ") +
                                                  "`%s' %s" % (p, kv),
                                                  rule=p,
                                                  params=kv)

    try:
        return eval(s)
    except SyntaxError as e:
        raise DbProfilerException.ValidationError(_("Syntax error: ") +
                                                  "`%s'" % s,
                                                  rule=p,
                                                  params=kv)
Ejemplo n.º 2
0
    def validate(self, stats):
        """Validate a min/max rule based the column statistics

        Args:
            stats (dict): a table statistics. see Data_Structure.txt
                          for more info.

        Returns:
            True if the expression is true, otherwise False.
        """
        # rule: [ column_name, expression ]
        assert len(self.rule) == 2
        assert 'columns' in stats

        c = None
        for col in stats['columns']:
            if col['column_name'] == self.rule[0]:
                c = col
                break

        if c is None:
            raise DbProfilerException.ValidationError(
                _("Column `%s' not found. Check your validation rule again.") %
                self.rule[0], self.rule)
        assert 'row_count' in stats
        assert ('nulls' in c and 'min' in c and 'max' in c
                and 'cardinality' in c)
        kv = {
            'rows': stats['row_count'],
            'nulls': c['nulls'],
            'min': c['min'],
            'max': c['max'],
            'cardinality': c['cardinality']
        }

        self.statistics[0] += 1

        try:
            s = self.rule[1].format(**kv)
        except KeyError as e:
            self.statistics[1] += 1
            raise DbProfilerException.ValidationError(
                _("Parameter error: ") + "`%s'" % kv, self.rule)

        try:
            if eval(s) is False:
                self.statistics[1] += 1
                return False
        except SyntaxError:
            self.statistics[1] += 1
            raise DbProfilerException.ValidationError(
                _("Syntax error: ") + "`%s'" % s, self.rule)

        return True
Ejemplo n.º 3
0
    def query_to_resultset(self, query, max_rows=10000):
        """Build a QueryResult object from the query

        Args:
            query (str): a query string to be executed.
            max_rows (int): max rows which can be kept in a QueryResult object.

        Returns:
            QueryResult: an object holding query, column names and result set.
        """
        assert query
        assert isinstance(query, unicode)
        log.trace('query_to_resultset: start query=%s' % query)

        res = QueryResult(query)
        try:
            if self.conn is None:
                self.connect()

            cur = self.conn.cursor()
            cur.execute(res.query)

            desc = []
            for d in cur.description:
                desc.append(d[0])
            res.column_names = deepcopy(tuple(desc))

            for i, r in enumerate(cur.fetchall()):
                # let's consider the memory size.
                if i > max_rows:
                    raise DbProfilerException.InternalError(
                        u'Exceeded the record limit (%d) for QueryResult.' %
                        max_rows,
                        query=query)
                res.resultset.append(
                    deepcopy([
                        float(x) if isinstance(x, Decimal) else x for x in r
                    ]))
            cur.close()
        except DbProfilerException.InternalError as e:
            raise e
        except DbProfilerException.DriverError as e:
            raise e
        except Exception as e:
            raise DbProfilerException.QueryError(
                "Could not execute a query: %s" % e.args[1].split('\n')[0],
                query=query,
                source=e)
        finally:
            if self.conn:
                self.conn.rollback()
        log.trace('query_to_resultset: end')
        return res
Ejemplo n.º 4
0
    def run_record_validation(self,
                              schema_name,
                              table_name,
                              validation_rules=None,
                              fetch_size=500000):
        log.trace('run_record_validation: start. %s.%s' %
                  (schema_name, table_name))

        v = DbProfilerValidator.DbProfilerValidator(
            schema_name, table_name, validation_rules=validation_rules)
        if not v.record_validators:
            log.info(_("Skipping record validation since no validation rule."))
            return {}

        column_names = self.get_column_names(schema_name, table_name)
        if not column_names:
            msg = 'No column found on the table `%s\'.' % table_name
            raise DbProfilerException.InternalError(msg)
        q = u'SELECT %s "%s" FROM "%s"."%s"' % (self.parallel_hint,
                                                '","'.join(column_names),
                                                schema_name, table_name)

        (count, failed) = self._query_record_validation(q,
                                                        v,
                                                        fetch_size=fetch_size)

        log.trace(("run_record_validation: end. "
                   "row count %d invalid record %d" % (count, failed)))
        return v.get_validation_results()
Ejemplo n.º 5
0
    def connect(self):
        s = self.connstr + " user="******" password="******"Could not connect to the server: %s" %
                e.args[0].split('\n')[0],
                source=e)

        return True
Ejemplo n.º 6
0
    def __init__(self, connstr, dbuser, dbpass):
        self.connstr = connstr
        self.dbuser = dbuser
        self.dbpass = dbpass

        name = "psycopg2"
        try:
            self.driver = __import__(name, fromlist=[''])
        except Exception as e:
            raise DbProfilerException.DriverError(
                u"Could not load the driver module: %s" % name, source=e)
Ejemplo n.º 7
0
    def validate(self, column_names, record):
        """Validate one record

        Args:
            columns (list): a list of column names associated with each field.
            record (list): a record, consisting of list of fields.

        Returns:
            True on evaluation succeeded, otherwise False.
        """
        # self.rule: [ column name, format ]
        assert len(self.rule) == 2
        assert len(column_names) == len(record)

        self.statistics[0] += 1

        kv = {}
        for k, v in zip(column_names, record):
            # if the value is not a number, it needs to be quoted with "'".
            try:
                float(v)
                kv[k] = v
            except ValueError as e:
                kv[k] = "'" + v + "'"
        try:
            s = self.rule[1].format(**kv)
        except KeyError as e:
            self.statistics[1] += 1
            raise DbProfilerException.ValidationError(
                _("Parameter error: ") + "`%s'" % kv, self.rule)

        try:
            if eval(s) is False:
                self.statistics[1] += 1
                return False
        except SyntaxError:
            self.statistics[1] += 1
            raise DbProfilerException.ValidationError(
                _("Syntax error: ") + "`%s'" % s, self.rule)

        return True
Ejemplo n.º 8
0
 def connect(self):
     try:
         self.conn = self.driver.connect(host=self.host,
                                         db=self.dbname,
                                         user=self.dbuser,
                                         passwd=self.dbpass)
     except Exception as e:
         raise DbProfilerException.DriverError(
             u"Could not connect to the server: %s" %
             e.args[1].split('\n')[0],
             source=e)
     return True
Ejemplo n.º 9
0
    def connect(self):
        try:
            self.conn = self.driver.connect(self.host, self.dbuser,
                                            self.dbpass, self.dbname)
        except Exception as e:
            # assuming OperationalError
            msg = to_unicode(e[0][1]).replace('\n', ' ')
            msg = re.sub(r'DB-Lib.*', '', msg)
            raise DbProfilerException.DriverError(
                u"Could not connect to the server: %s" % msg, source=e)

        return True
Ejemplo n.º 10
0
    def __init__(self, host, dbname, dbuser, dbpass):
        self.host = host
        self.dbname = dbname
        self.dbuser = dbuser
        self.dbpass = dbpass

        name = "pymssql"
        try:
            self.driver = __import__(name, fromlist=[''])
        except Exception as e:
            raise DbProfilerException.DriverError(
                u"Could not load the driver module: %s" % name, source=e)
Ejemplo n.º 11
0
    def disconnect(self):
        if self.conn is None:
            return False

        try:
            self.conn.close()
        except Exception as e:
            raise DbProfilerException.DriverError(
                u"Could not disconnect from the server: %s" % to_unicode(e),
                source=e)
        self.conn = None
        return True
Ejemplo n.º 12
0
    def disconnect(self):
        if self.conn is None:
            return False

        try:
            self.conn.close()
        except Exception as e:
            msg = (u"Could not disconnect from the server: %s" %
                   unicode(e).split('\n')[0])
            raise DbProfilerException.DriverError(msg, source=e)
        self.conn = None
        return True
Ejemplo n.º 13
0
    def validate(self, dbdriver):
        if dbdriver is None:
            raise DbProfilerException.DriverError(
                _("Database driver not found."))

        try:
            res = dbdriver.q2rs(self.query)
        except DbProfilerException.QueryError as e:
            raise DbProfilerException.ValidationError(_("SQL error: ") +
                                                      "`%s'" % self.query,
                                                      self.label,
                                                      source=e)

        assert res
        assert len(res.column_names) == len(res.resultset[0])
        assert len(res.resultset) == 1

        kv = {}
        for k, v in zip(res.column_names, res.resultset[0]):
            kv[k] = v

        return validate_eval(kv, self.rule[2])
Ejemplo n.º 14
0
    def get_sample_rows(self, schema_name, table_name, rows_limit=10):
        column_name = self.get_column_names(schema_name, table_name)
        if len(column_name) == 0:
            raise DbProfilerException.InternalError(
                "Could not get column names of the table: %s.%s" %
                (schema_name, table_name))

        select_list = ','.join(column_name)
        assert select_list

        q = u'SELECT {0} FROM {1}.{2} LIMIT {3}'.format(
            select_list, schema_name, table_name, rows_limit)
        return self._query_sample_rows(q)
Ejemplo n.º 15
0
    def __init__(self, host, port, dbname, dbuser, dbpass):
        self.host = host
        self.port = port
        self.dbname = dbname
        self.dbuser = dbuser
        self.dbpass = dbpass

        name = "cx_Oracle"
        try:
            self.driver = __import__(name, fromlist=[''])
        except Exception as e:
            msg = u"Could not load the driver module: %s" % name
            raise DbProfilerException.DriverError(msg, source=e)
Ejemplo n.º 16
0
    def connect(self):
        try:
            if self.host is not None and self.port is not None:
                # use host name and port number
                dsn_tns = self.driver.makedsn(self.host, self.port,
                                              self.dbname)
            else:
                # use tns name
                dsn_tns = self.dbname
            log.trace("dsn_tns: %s" % dsn_tns)
            self.conn = self.driver.connect(self.dbuser, self.dbpass, dsn_tns)
        except Exception as e:
            msg = (u"Could not connect to the server: %s" %
                   unicode(e).split('\n')[0])
            raise DbProfilerException.DriverError(msg, source=e)

        return True