Example #1
0
    def get(self, column_name, label=None):
        """Get the counter data by column name (and label)

        Args:
            column_name (str): column name
            label (str): label string of validation rule

        Returns:
            int: when both column name & label are specified.
            dict: when only column name specified: {'label': count, ...}
        """
        if column_name not in self._column_counter and label is None:
            return None

        if column_name not in self._column_counter:
            m = "ColumnValidationCounter#get() key error: %s" % column_name
            raise InternalError(m)

        if label is None:
            return self._column_counter[column_name]

        if label not in self._column_counter[column_name]:
            m = ("ColumnValidationCounter#get() key error: %s, %s" %
                 (column_name, label))
            raise InternalError(m)

        return self._column_counter[column_name][label]
    def get_validation_rule(self, id):
        """
        Args:
            id(integer):

        Returns:
            tuple: (id,database_name,schema_name,table_name,column_name,
                    description,rule,param,param2) or None
        """
        query = (u"SELECT id,database_name,schema_name,table_name,column_name,"
                 u"description,rule,param,param2 FROM validation_rule "
                 u"WHERE id = %d" % id)

        log.trace("get_validation_rule: %s" % query.replace('\n', ''))
        tup = None
        try:
            cursor = self._conn.cursor()
            cursor.execute(query)
            r = cursor.fetchone()
            if r:
                tup = tuple(r)
        except Exception as e:
            raise InternalError(_("Could not get validation rule: "),
                                query=query, source=e)
        return tup
    def get_tags(self):
        """Get a list of tag names and number of tags associated with tables.

        Returns:
            list: a list of lists: [[tag,num of tables], ...]
        """
        log.trace("get_tags: start")

        query = """
SELECT tag_label,
       COUNT(*)
  FROM tags
 WHERE tag_label <> ''
 GROUP BY
       tag_label
 ORDER BY
       COUNT(*) DESC
"""

        tags = []
        try:
            cursor = self._conn.cursor()
            log.debug("get_tags: query = %s" % query)
            for r in cursor.execute(query):
                tags.append([r[0], r[1]])
        except Exception as e:
            log.trace("get_tags: " + unicode(e))
            raise InternalError(_("Could not get tag info: "),
                                query=query, source=e)

        log.trace("get_tags: end")
        return tags
Example #4
0
    def add_rule(self, id_, database_name, schema_name, table_name,
                 column_name, description, rule, param, param2=None):
        assert isinstance(id_, int)

        if self.schema_name != schema_name or self.table_name != table_name:
            return False

        label = id_
        log.debug("add_rule: label = %s" % label)

        assert param
        if rule == 'regexp':
            self.add_rule_regexp(label, column_name, param)
        elif rule == 'eval':
            self.add_rule_eval(label, column_name, param)
        elif rule == 'columnstat':
            self.add_rule_columnstat(label, column_name, param)
        elif rule == 'sql':
            assert param2
            self.add_rule_sql(label, column_name, param, param2)
        else:
            raise InternalError(_("Unsupported validation rule: %s") % rule)
        self.descriptions[label] = description

        return True
    def get_datamap_items(self, database_name, schema_name, table_name,
                          column_name=None):
        """Get one or more datamap entries from the repository

        Args:
          database_name (str):
          schema_name_name (str):
          table_name (str):
          column_name (str):

        Returns:
          list: a list which consists of one or more datamap entries.
        """
        assert database_name and schema_name and table_name

        query = u"""
SELECT data FROM datamapping
WHERE database_name = '%s' AND schema_name = '%s' AND table_name = '%s'
""" % (database_name, schema_name, table_name)
        if column_name:
            query = query + u" AND column_name = '%s'" % column_name
        query = query + u"ORDER BY lineno"

        datamap = []
        try:
            cursor = self._conn.cursor()
            for r in cursor.execute(query):
                datamap.append(json.loads(r[0]))
        except Exception as e:
            raise InternalError(_("Could not get data."), query=query)

        return datamap
    def append_table(self, tab):
        """
        Update a table record if the same record (with same timestamp)
        already exist.
        Otherwise, append the table record to the repository.

        Args:
            tab: a dictionary of table record.

        Returns:
            True on success, otherwise False.
        """
        assert (tab['database_name'] and tab['schema_name'] and
                tab['table_name'] and tab['timestamp'])

        query = None

        log.trace("append_table: start %s.%s.%s" %
                  (tab['database_name'], tab['schema_name'],
                   tab['table_name']))

        try:
            if self.has_table_record(tab):
                query = """
UPDATE repo
   SET data = '%s'
 WHERE database_name = '{database_name}'
   AND schema_name = '{schema_name}'
   AND table_name = '{table_name}'
   AND created_at = datetime('{timestamp}')
""".format(**tab) % DbProfilerFormatter.jsonize(tab).replace("'", "''")
            else:
                query = """
INSERT INTO repo VALUES ('{database_name}','{schema_name}','{table_name}',
                         datetime('{timestamp}'), '%s')
""".format(**tab) % DbProfilerFormatter.jsonize(tab).replace("'", "''")
                log.trace("append_table: INSERT")

            log.debug("append_table: query = %s" % query)

            assert self._conn
            cursor = self._conn.cursor()

            assert cursor
            cursor.execute(query)
            self._conn.commit()
        except Exception as e:
            raise InternalError(_("Could not register table data: "),
                                query=query, source=e)

        # Remove all tag id/label pairs to replace with new ones.
        tagid = "%s.%s.%s" % (tab['database_name'], tab['schema_name'],
                              tab['table_name'])
        self.delete_tag_id(tagid)
        if tab.get('tags'):
            for label in tab['tags']:
                self.put_tag(tagid, label)

        log.trace("append_table: end")
        return True
    def open(self):
        if self._conn:
            log.info(_("The repository file `%s' has already been opened.") %
                     self.filename)
            return

        repo_found = False
        try:
            repo_found = os.path.exists(self.filename)
        except Exception as e:
            raise DbProfilerException(
                _("Could not access to the repository file `%s'.") %
                self.filename)

        if repo_found is False:
            raise InternalError(_("The repository file `%s' not found.") %
                                self.filename)

        try:
            self._conn = sqlite3.connect(self.filename)
        except Exception as e:
            raise DbProfilerException(
                _("Could not read the repository file `%s'.") % self.filename)

        assert self._conn
        log.info(_("The repository file `%s' has been opened.") %
                 self.filename)
        return
 def get_tag_labels(self, tag_id):
     labels = []
     try:
         cursor = self._conn.cursor()
         query = u"SELECT tag_label FROM tags WHERE tag_id = '%s'" % tag_id
         for r in cursor.execute(query):
             labels.append(r[0])
     except Exception as e:
         raise InternalError(_("Could not get tag labels: "),
                             query=query, source=e)
     return labels
Example #9
0
    def incr(self, column_name, label):
        cols = column_name.replace(' ', '').split(",")
        if len(cols) >= 2:
            for col in cols:
                self.incr(col, label)
            return

        if column_name not in self._column_counter:
            m = ("ColumnValidationCounter#incr() key error: %s" % column_name)
            raise InternalError(m)
        if label not in self._column_counter[column_name]:
            m = ("ColumnValidationCounter#incr() key error: %s, %s" %
                 (column_name, label))
            raise InternalError(m)
        self._column_counter[column_name][label] += 1

        m = ("ColumnValidationCounter#incr(): %s,%s done." %
             (column_name, label))
        log.trace(m)
        return self._column_counter[column_name][label]
 def delete_tag_id(self, tag_id):
     log.trace('delete_tag_id: start %s' % tag_id)
     try:
         cursor = self._conn.cursor()
         query = u"DELETE FROM tags WHERE tag_id = '%s'" % tag_id
         cursor.execute(query)
         self._conn.commit()
     except Exception as e:
         raise InternalError(_("Could not delete tag id: "),
                             query=query, source=e)
     log.trace('delete_tag_id: end')
     return True
 def delete_textelement(self, id_):
     log.trace('delete_textelement: start')
     try:
         cursor = self._conn.cursor()
         query = u"DELETE FROM textelement WHERE id_= '%s'" % id_
         cursor.execute(query)
         self._conn.commit()
     except Exception as e:
         raise InternalError(_("Could not delete text element: "),
                             query=query, source=e)
     log.trace('delete_textelement: end')
     return True
    def create_validation_rule(self, database_name, schema_name, table_name,
                               column_name, description, rule,
                               param='', param2=''):
        """
        Args:
            database_name(str):
            schema_name(str):
            table_name(str):
            column_name(str):
            description(str):
            rule(str):
            param(str):
            param2(str):

        Returns:
            integer when the rule successfully gets created. None when already
            exists.
        """

        r = self.get_validation_rules(database_name, schema_name, table_name,
                                      column_name, description, rule,
                                      param, param2)
        assert len(r) <= 1
        if r:
            log.warning((_("The same validation rule already exists: ") +
                         u"{0},{1},{2},{3},{4},{5},{6},{7}"
                         .format(database_name, schema_name, table_name,
                                 column_name, description, rule,
                                 param, param2)))
            return None

        query = u"""
INSERT INTO validation_rule (id,database_name,schema_name,table_name,
                             column_name,description,rule,param,param2)
  VALUES ((SELECT coalesce(max(id),0)+1 FROM validation_rule),
          '{0}','{1}','{2}','{3}','{4}','{5}','{6}','{7}');
""".format(database_name, schema_name, table_name, column_name, description,
           rule,
           '' if param is None else "%s" % param.replace("'", "''"),
           '' if param2 is None else "%s" % param2.replace("'", "''"))

        log.trace("create_validation_rule: %s" % query.replace('\n', ''))
        id = None
        try:
            cursor = self._conn.cursor()
            cursor.execute(query)
            cursor.execute("SELECT max(id) FROM validation_rule")
            id = cursor.fetchone()[0]
            self._conn.commit()
        except Exception as e:
            raise InternalError(_("Could not register validation rule: "),
                                query=query, source=e)
        return id
 def put_textelement(self, id_, text):
     log.trace('put_textelement: start')
     try:
         cursor = self._conn.cursor()
         query = (u"INSERT INTO textelement VALUES ('%s', '%s')" %
                  (id_, text if text else ''))
         cursor.execute(query)
         self._conn.commit()
     except Exception as e:
         raise InternalError(_("Could not register text element: "),
                             query=query, source=e)
     log.trace('put_textelement: end')
     return True
 def get_textelements(self, id_):
     log.trace('get_textelements: start')
     texts = []
     try:
         cursor = self._conn.cursor()
         query = u"SELECT text_ FROM textelement WHERE id_= '%s'" % id_
         for r in cursor.execute(query):
             texts.append(r[0])
     except Exception as e:
         raise InternalError(_("Could not get text element: "),
                             query=query, source=e)
     log.trace('get_textelements: end')
     return texts
    def delete_files(self, objtype, objid):
        """Remove file names associated to the object.

        Args:
            objtype(str): object type ['tag','schema','table']
            objid(str): object identifier

        Returns:
            bool: True if succeeded.
        """
        if objtype not in ['tag', 'schema', 'table']:
            raise InternalError(_('invalid object type: %s') % objtype)

        id_ = u'%s:%s' % (objtype, objid)
        return self.delete_textelement(id_)
    def get_files(self, objtype, objid):
        """Get file names assigned to the object.

        Args:
            objtype(str): object type ['tag','schema','table']
            objid(str): object identifier

        Returns:
            list: a list of file names.
        """
        if objtype not in ['tag', 'schema', 'table']:
            raise InternalError(_('invalid object type: %s') % objtype)

        id_ = u'%s:%s' % (objtype, objid)
        return self.get_textelements(id_)
    def update_validation_rule(self, id, database_name, schema_name,
                               table_name, column_name, description,
                               rule, param=None, param2=None):
        """
        Args:
            id(integer):
            database_name(str):
            schema_name(str):
            table_name(str):
            column_name(str):
            description(str):
            rule(str):
            param(str):
            param2(str):

        Returns:
            True when the rule successfully gets updated, otherwise False.
        """

        query = u"""
UPDATE validation_rule
   SET database_name = '{0}',
       schema_name = '{1}',
       table_name = '{2}',
       column_name = '{3}',
       description = '{4}',
       rule = '{5}',
       param = '{6}',
       param2 = '{7}'
 WHERE id = {8}
""".format(database_name, schema_name, table_name, column_name, description,
           rule,
           '' if param is None else "%s" % param,
           '' if param2 is None else "%s" % param2, id)

        log.trace("update_validation_rule: %s" % query.replace('\n', ''))
        rowcount = 0
        try:
            cursor = self._conn.cursor()
            cursor.execute(query)
            rowcount = cursor.rowcount
            self._conn.commit()
        except Exception as e:
            raise InternalError(_("Could not update validation rule: "),
                                query=query, source=e)
        if rowcount == 0:
            return False
        return True
 def get_bg_terms_all(self):
     """
     Returns:
         list: a list of terms in the business glossary.
     """
     query = (u"SELECT term FROM business_glossary WHERE is_latest = 1 "
              u"ORDER BY length(term) desc,term")
     try:
         cursor = self._conn.cursor()
         cursor.execute(query)
         data = []
         for r in cursor.fetchall():
             data.append(r[0])
     except Exception as e:
         raise InternalError(_("Could not get a list of business terms: "),
                             query=query, source=e)
     return data
    def add_file(self, objtype, objid, filename):
        """Assign a file name to the object.

        Args:
            objtype(str): object type ['tag','schema','table']
            objid(str): object identifier
            filename(str): a file name or a file path.

        Returns:
            bool: True if succeeded or the file already exists.
        """
        if objtype not in ['tag', 'schema', 'table']:
            raise InternalError(_('invalid object type: %s') % objtype)

        if filename in self.get_files(objtype, objid):
            return True
        id_ = u'%s:%s' % (objtype, objid)
        return self.put_textelement(id_, filename)
    def get_bg_term(self, term):
        """
        Returns:
            list: the term and its attributes if the term exists.
                  Otherwise, None.
        """
        data = None

        query = u"""
SELECT
  id,
  term,
  description_short,
  description_long,
  created_at,
  owned_by,
  categories,
  synonyms,
  related_terms,
  assigned_assets
FROM
  business_glossary
WHERE
  term = '%s'
AND
  is_latest = 1
""" % term
        try:
            cursor = self._conn.cursor()
            cursor.execute(query)
            r = cursor.fetchone()
            if r:
                data = {}
                for i, c in enumerate(r):
                    if cursor.description[i][0] in ['categories', 'synonyms',
                                                    'related_terms',
                                                    'assigned_assets']:
                        data[cursor.description[i][0]] = json.loads(c)
                    else:
                        data[cursor.description[i][0]] = c
        except Exception as e:
            raise InternalError(_("Could not get a business term: "),
                                query=query, source=e)
        return data
 def put_tag(self, tag_id, tag_label):
     log.trace('put_tag: start %s %s' % (tag_id, tag_label))
     if not tag_label:
         return False
     try:
         cursor = self._conn.cursor()
         query = (u"DELETE FROM tags WHERE tag_id = '%s' "
                  u"AND tag_label = '%s'" %
                  (tag_id, tag_label))
         cursor.execute(query)
         query = (u"INSERT INTO tags VALUES ('%s', '%s')" %
                  (tag_id, tag_label))
         cursor.execute(query)
         self._conn.commit()
     except Exception as e:
         raise InternalError(_("Could not register tag: "),
                             query=query, source=e)
     log.trace('put_tag: end')
     return True
    def get_table(self, database_name, schema_name, table_name):
        """
        Get a table record from the repository by object name.

        Args:
            database_name(str): database name
            schema_name(str):   schema name
            table_name(str):    table name

        Returns:
            a dictionary of table record. {table_record}
        """
        assert database_name and schema_name and table_name

        table = None
        log.trace('get_table: start %s.%s.%s' %
                  (database_name, schema_name, table_name))

        query = """
SELECT data
  FROM repo
 WHERE database_name = '{0}'
   AND schema_name = '{1}'
   AND table_name = '{2}'
 ORDER BY created_at DESC
 LIMIT 1
""".format(database_name, schema_name, table_name)

        log.debug("get_table: query = %s" % query)

        try:
            cursor = self._conn.cursor()
            cursor.execute(query)
            r = cursor.fetchone()
            if r:
                table = json.loads(unicode(r[0]))
        except Exception as e:
            raise InternalError(_("Could not get table data: "),
                                query=query, source=e)

        log.trace('get_table: end')
        return table
    def has_table_record(self, tab):
        """
        Check if the table record exist in the repository.

        Args:
            tab: a dictionary of table record.

        Returns:
            True if the table record exists, otherwise False.
        """
        assert (tab['database_name'] and tab['schema_name'] and
                tab['table_name'])

        log.trace("has_table_record: start %s.%s.%s" %
                  (tab['database_name'], tab['schema_name'],
                   tab['table_name']))

        query = """
SELECT COUNT(*)
  FROM repo
 WHERE database_name = '{database_name}'
   AND schema_name = '{schema_name}'
   AND table_name = '{table_name}'
   AND created_at = datetime('{timestamp}')
""".format(**tab)

        try:
            cursor = self._conn.cursor()
            log.debug("has_table_record: query = %s" % query)
            cursor.execute(query)
            r = cursor.fetchone()
            log.debug("has_table_record: r = %s" % unicode(r))
            if r[0] > 0:
                return True
        except Exception as e:
            log.trace("has_table_record: " + unicode(e))
            raise InternalError(_("Could not get table info: "),
                                query=query, source=e)

        log.trace("has_table_record: end")
        return False
    def get_validation_rules(self, database_name=None, schema_name=None,
                             table_name=None, column_name=None,
                             description=None, rule=None,
                             param=None, param2=None):
        """
        Returns:
            list: a list of tuples containing  validation rules.
        """
        query = u"SELECT id FROM validation_rule"
        cond = []
        if database_name:
            cond.append("database_name = '%s'" % database_name)
        if schema_name:
            cond.append("schema_name = '%s'" % schema_name)
        if table_name:
            cond.append("table_name = '%s'" % table_name)
        if column_name:
            cond.append("column_name = '%s'" % column_name)
        if rule:
            cond.append("rule = '%s'" % rule)
        if param is not None:
            cond.append("param = '%s'" % param.replace("'", "''"))
        if param2 is not None:
            cond.append("param2 = '%s'" % param2.replace("'", "''"))
        if len(cond) > 0:
            query = query + " WHERE (%s)" % ' AND '.join(cond)

        ids = []
        try:
            cursor = self._conn.cursor()
            cursor.execute(query)
            for r in cursor.fetchall():
                ids.append(r[0])
        except Exception as e:
            raise InternalError(_("Could not get validation rules: "),
                                query=query, source=e)

        rules = []
        for i in ids:
            rules.append(self.get_validation_rule(i))
        return rules
    def get_table_history(self, database_name, schema_name, table_name):
        """
        Get a table record history from the repository by object name,
        newest one coming first.

        Args:
            database_name(str): database name
            schema_name(str):   schema name
            table_name(str):    table name

        Returns:
            a list of dictionaries of table records. [{table record}, ...]
        """
        assert database_name and schema_name and table_name

        table_history = []

        query = """
SELECT data
  FROM repo
 WHERE database_name = '{0}'
   AND schema_name = '{1}'
   AND table_name = '{2}'
 ORDER BY created_at DESC
""".format(database_name, schema_name, table_name)

        log.trace("get_table_history: query = %s" % query)

        try:
            cursor = self._conn.cursor()
            for r in cursor.execute(query):
                table_history.append(json.loads(unicode(r[0])))
        except Exception as e:
            raise InternalError(
                _("Could not get table data with its history: "),
                query=query, source=e)

        return table_history
    def delete_validation_rule(self, id):
        """
        Args:
            id(integer):

        Returns:
            True when the rule successfully gets deleted, otherwise False.
        """

        query = u"DELETE FROM validation_rule WHERE id = %s" % id

        log.trace("delete_validation_rule: %s" % query.replace('\n', ''))
        rowcount = 0
        try:
            cursor = self._conn.cursor()
            cursor.execute(query)
            rowcount = cursor.rowcount
            self._conn.commit()
        except Exception as e:
            raise InternalError(_("Could not delete validation rule: "),
                                query=query, source=e)
        if rowcount == 0:
            return False
        return True
    def get_schemas(self):
        """Get a list of database name and number of tables in each schema.

        Returns:
            list: a list of lists: [[dbname,schemaname,num of tables], ...]
        """
        log.trace("get_schemas: start")

        query = """
SELECT database_name,
       schema_name,
       COUNT(DISTINCT table_name)
  FROM repo
 GROUP BY
       database_name,
       schema_name
 ORDER BY
       database_name,
       schema_name
"""

        schemas = []
        try:
            cursor = self._conn.cursor()
            log.debug("get_schemas: query = %s" % query)
            cursor.execute(query)
            for r in cursor.fetchall():
                r2 = [_s2u(x) for x in r]
                schemas.append(r2)
        except Exception as e:
            log.trace("get_schemas: " + unicode(e))
            raise InternalError(_("Could not get schema names: "),
                                query=query, source=e)

        log.trace("get_schemas: end")
        return schemas
Example #28
0
    def run(self, schema_name=None, table_name=None,
            skip_record_validation=False, validation_rules=None):
        assert schema_name and table_name

        # column profiling requires table profiling
        if self.skip_table_profiling:
            self.skip_column_profiling = True

        log.info(_("Profiling %s.%s: start") % (schema_name, table_name))
        if isinstance(validation_rules, list):
            log.info(_("%d validation rule(s)") % len(validation_rules))

        # table meta
        tablemeta = TableMeta(self.dbname, schema_name, table_name)
        tablemeta.timestamp = datetime.now()
        tablemeta.column_names = self.get_column_names(schema_name, table_name)
        # column meta
        columnmeta = {}
        for col in tablemeta.column_names:
            columnmeta[col] = TableColumnMeta(schema_name, table_name, col)

        log.info(_("Data types: start"))
        data_types = self.get_column_datatypes(schema_name, table_name)
        if len(data_types) == 0:
            log.error(_("Could not get data types."))
            raise InternalError(_("Could not get column data types at all."))
        for col in tablemeta.column_names:
            columnmeta[col].datatype = data_types[col]
        log.info(_("Data types: end"))

        if self.skip_table_profiling:
            log.info(_("Skipping table profiling."))
        elif self.profile_row_count_enabled is True:
            log.info(_("Row count: start"))
            tablemeta.row_count = self.get_row_count(schema_name, table_name)
            log.info(_("Row count: end (%s)") %
                     "{:,d}".format(tablemeta.row_count))

        if self.profile_sample_rows:
            log.info(_("Sample rows: start"))
            tablemeta.sample_rows = self.get_sample_rows(schema_name,
                                                         table_name)
            log.info(_("Sample rows: end"))
        else:
            log.info(_("Sample rows: skipping"))

        if self.skip_column_profiling:
            log.info(_("Skipping column profiling."))
        elif tablemeta.row_count > self.column_profiling_threshold:
            log.info((_("Skipping column profiling because "
                        "the table has more than %s rows") %
                      ("{:,d}".format(self.column_profiling_threshold))))
        else:
            self.run_column_profiling(schema_name, table_name, tablemeta,
                                      columnmeta)
            self._run_record_validation(schema_name, table_name, tablemeta,
                                        columnmeta, validation_rules,
                                        skip_record_validation)

        # Add all column meta data to the table meta.
        for col in tablemeta.column_names:
            tablemeta.columns.append(columnmeta[col])

        table_data = tablemeta.makedic()
        if (self.skip_column_profiling or
                tablemeta.row_count > self.column_profiling_threshold):
            log.info(_("Skipping column statistics validation "
                       "and SQL validation."))
        else:
            table_data = self.run_postscan_validation(schema_name, table_name,
                                                      tablemeta, columnmeta,
                                                      table_data,
                                                      validation_rules)
        log.info(_("Profiling %s.%s: end") % (schema_name, table_name))

        return table_data
    def put_bg_term(self, term, desc_short, desc_long, owner, categories,
                    synonyms, related_terms, assigned_assets):
        """
        Returns:
            True when the term get registered successfully. Otherwise, False.
        """
        assert isinstance(categories, list) or categories is None
        assert isinstance(synonyms, list) or synonyms is None
        assert isinstance(related_terms, list) or related_terms is None
        assert isinstance(assigned_assets, list) or assigned_assets is None

        log.trace("put_bg_term: start")

        query1 = (u"UPDATE business_glossary SET is_latest = 0 "
                  u"WHERE term = '%s' AND is_latest = 1" % term)

        query2 = u"""
INSERT INTO business_glossary (
  id,
  term,
  description_short,
  description_long,
  created_at,
  owned_by,
  categories,
  synonyms,
  related_terms,
  assigned_assets,
  is_latest
) VALUES (
  {0},
  '{1}',
  '{2}',
  '{3}',
  datetime(),
  '{4}',
  '{5}',
  '{6}',
  '{7}',
  '{8}',
  1
)
""".format(0, term, desc_short, desc_long, owner,
           json.dumps(categories),
           json.dumps(synonyms),
           json.dumps(related_terms),
           json.dumps(assigned_assets))

        log.trace(query1)
        log.trace(query2)

        query = None
        try:
            cursor = self._conn.cursor()
            query = query1
            cursor.execute(query1)

            cursor = self._conn.cursor()
            query = query2
            cursor.execute(query2)

            self._conn.commit()
        except Exception as e:
            raise InternalError(_("Could not register a business term: "),
                                query=query, source=e)

        log.trace("put_bg_term: end")
        return True
    def put_datamap_item(self, datamap):
        """Put a datamap entry into the repository

        Args:
          datamap (dict): a dictionary holding a datamap entry.

        Returns:
          bool: True on success, otherwise False.
        """
        assert (datamap.get('database_name') and datamap.get('schema_name') and
                datamap.get('table_name') and datamap.get('record_id'))

        for k in datamap:
            if isinstance(datamap[k], unicode):
                datamap[k] = datamap[k].replace("'", "''")

        query = u"""
DELETE FROM datamapping
 WHERE database_name = '%s'
   AND schema_name = '%s'
   AND table_name = '%s'
   AND column_name = '%s'
   AND record_id = '%s'
""" % (datamap['database_name'], datamap['schema_name'],
            datamap['table_name'], datamap.get('column_name', ''),
            datamap.get('record_id', ''))

        try:
            cursor = self._conn.cursor()
            cursor.execute(query)
            self._conn.commit()
            log.trace(u'Successfully removed the previous datamap entry: %s' %
                      datamap)
        except Exception as e:
            raise InternalError(_("Could not register data mapping."),
                                query=query)

        query = u"""
INSERT INTO datamapping (
  lineno,
  database_name,schema_name,table_name,column_name,
  record_id,
  source_database_name,source_schema_name,source_table_name,source_column_name,
  created_at,
  data)
 VALUES (
  %d,
  '%s', '%s', '%s', '%s',
  '%s',
  '%s', '%s', '%s', '%s',
  datetime('%s'), '%s')
""" % (datamap['lineno'], datamap['database_name'], datamap['schema_name'],
            datamap['table_name'], datamap.get('column_name', ''),
            datamap.get('record_id', ''),
            datamap.get('source_database_name', ''),
            datamap.get('source_schema_name', ''),
            datamap.get('source_table_name', ''),
            datamap.get('source_column_name', ''),
            datetime.now().isoformat(),
            json.dumps(datamap))

        try:
            cursor = self._conn.cursor()
            cursor.execute(query)
            self._conn.commit()
            log.trace(u'Successfully stored the datamap entry: %s' % datamap)
        except Exception as e:
            raise InternalError(_("Could not register data mapping."),
                                query=query)

        return True