Ejemplo n.º 1
0
    def create_filter_list(self, filter_list):
        """
        Create a list of filter expressions.

        :param filter_list: a list of dictionaries representing the filter
            expression to be used as part of an SQL WHERE clause

        :return: a list of strings representing the expressions.
        """
        field_definitions = {}
        for field_def in fields_for(self.tablename):
            field_definitions[field_def.get('name')] = field_def

        string_list = []
        for item in filter_list:
            string, field = create_on_string(item)

            field_definition = field_definitions.get(field)
            field_mode = field_definition.get('mode').lower()

            # if based on a nullable field, make sure to use the exists function
            if field_mode.lower() == 'nullable':
                item['qualifier'] = item['qualifier'].upper()
                string, _ = create_on_string(item)
                nullable_str = (
                    ' exists (SELECT * FROM `:idataset.observation` AS record2 '
                    'WHERE :join_tablename.observation_id = record2.observation_id '
                    'AND {conditional})')
                string = nullable_str.format(conditional=string)

            string_list.append(string)

        return string_list
Ejemplo n.º 2
0
    def do(self):
        """
        This function actually runs deid and using both rule specifications and application of the rules
        """
        self.update_rules()
        d = Deid(pipeline=self.pipeline, rules=self.deid_rules, parent=self)

        p = d.apply(self.table_info, self.store, self.get_tablename())

        is_meta = np.sum([1 * ('on' in _item) for _item in p]) != 0
        LOGGER.info('table:\t%s\t\tis a meta table:\t%s', self.get_tablename(),
                    is_meta)
        if not is_meta:
            sql = [self.to_sql(p)]
            _rsql = None
            dml_sql = []
        else:
            #
            # Processing meta tables
            sql = []
            relational_cols = [col for col in p if 'on' not in col]
            meta_cols = [col for col in p if 'on' in col]

            _map = {}
            for col in meta_cols:
                on_string, _ = create_on_string(col['on'])
                if on_string not in _map:
                    _map[on_string] = {'specification': [], 'on': {}}
                _map[on_string]['specification'] += [col]
                _map[on_string]['on'] = col['on']

            fillter = []
            for filter_id in _map:
                item = _map.get(filter_id, {}).get('specification', [])
                item_filter = _map.get(filter_id, {}).get('on', {})
                fillter.append(item_filter)

                _sql = self.to_sql(item +
                                   relational_cols) + ' AND ' + filter_id

                sql.append(_sql)

            _rsql = self.gather_unfiltered_records(relational_cols, fillter)

            sql.append(_rsql)

            # create additional SQL cleaning statements
            dml_sql = self.gather_dml_queries(p)

            for index, segment in enumerate(sql):
                formatted = segment.replace(':idataset', self.idataset)
                sql[index] = formatted.replace(':join_tablename',
                                               self.tablename)

        if 'debug' in self.action:
            self.debug(p)
        else:
            # write SQL to file
            sql_filepath = os.path.join(self.logpath, self.idataset,
                                        self.tablename + '.sql')
            with open(sql_filepath, 'w') as sql_file:
                final_sql = "\n\nAppend these results to previous results\n\n".join(
                    sql)
                sql_file.write(final_sql)

                if dml_sql:
                    sql_file.write(
                        '\n\nDML SQL statements to execute on de-identified table data\n\n'
                    )
                    final_sql = '\n\n  ----------------------------------\n\n'.join(
                        dml_sql)
                    sql_file.write(final_sql)

            if 'submit' in self.action:
                for index, statement in enumerate(sql):
                    self.submit(statement, not index)

                for statement in dml_sql:
                    self.submit(statement, False, dml=True)

            if 'simulate' in self.action:
                #
                # Make this threaded if there is a submit action that is associated with it
                self.simulate(p)

        LOGGER.info('FINISHED de-identification on table:\t%s', self.tablename)