Ejemplo n.º 1
0
    def is_alive(self):
        try:
            with pg.connect(self._connection_string) as connection:
                pass
            return True

        except:
            logger = logger_manager.LoggerManager(logger_name='opendata-anonymizer', module='opendata')
            logger.log_error('postgres_connection_failed',
                             "Failed to connect to postgres with connection string {0}. ERROR: {1}".format(
                                 self._connection_string, traceback.format_exc().replace('\n', '')))
            return False
Ejemplo n.º 2
0
    def __init__(self, opendata_config_module_path):
        logger = logger_manager.LoggerManager(logger_name='opendata-anonymizer', module_name='opendata')

        absolute_config_path = os.path.abspath(opendata_config_module_path)
        config = self._load_module(absolute_config_path, logger)

        try:
            self.anonymizer = config.anonymizer
        except Exception:
            logger.log_error('anonymizer_configuration_reading_failed',
                             "Config file at {0} doesn't have attribute `anonymizer`.".format(absolute_config_path))
            raise

        try:
            self.mongo_db = config.mongo_db
        except Exception:
            logger.log_error('anonymizer_configuration_reading_failed',
                             "Config file at {0} doesn't have attribute `mongo_db`.".format(absolute_config_path))
            raise

        try:
            self.postgres = config.postgres
        except Exception:
            logger.log_error('anonymizer_configuration_reading_failed',
                             "Config file at {0} doesn't have attribute `postgres`.".format(absolute_config_path))
            raise

        try:
            self.hiding_rules = config.hiding_rules
        except Exception:
            logger.log_error('anonymizer_configuration_reading_failed',
                             "Config file at {0} doesn't have attribute `hiding_rules`.".format(absolute_config_path))
            raise

        try:
            self.substitution_rules = config.substitution_rules
        except Exception:
            logger.log_error('anonymizer_configuration_reading_failed',
                             "Config file at {0} doesn't have attribute `substitution_rules`.".format(absolute_config_path))
            raise

        try:
            self.field_data_file = config.field_data_file
        except Exception:
            logger.log_error('anonymizer_configuration_reading_failed',
                             "Config file at {0} doesn't have attribute `field_data_file`.".format(
                                 absolute_config_path))
            raise
Ejemplo n.º 3
0
    def __init__(self, config, previous_run_manager=None):
        self._logger = logger_manager.LoggerManager(logger_name='opendata-anonymizer', module_name='opendata')

        global ATEXIT_SINGLETON
        ATEXIT_SINGLETON = self

        self._config = config
        self.mongo_connection_string = "mongodb://{user}:{password}@{host}:{port}/{database}".format(
            **{'user': config.mongo_db['user'],
               'password': config.mongo_db['password'],
               'host': config.mongo_db['host_address'],
               'port': config.mongo_db['port'],
               'database': config.mongo_db['auth_db']})
        self._mongo_client = MongoClient(self.mongo_connection_string)

        self._previous_run_manager = previous_run_manager if previous_run_manager else PreviousRunManager(config)
        self.last_processed_timestamp = self._get_last_processed_timestamp()
Ejemplo n.º 4
0
    def _ensure_table(self, schema):
        try:
            with pg.connect(self._connection_string) as connection:
                cursor = connection.cursor()
                column_schema = ', '.join(' '.join(column_name_and_type) for column_name_and_type in schema + [])
                if column_schema:
                    column_schema = ', ' + column_schema

                try:
                    cursor.execute("CREATE TABLE {table_name} (id SERIAL PRIMARY KEY{column_schema})".format(
                        **{'table_name': self._table_name, 'column_schema': column_schema}))
                except:
                    pass    # Table existed
        except Exception:
            logger = logger_manager.LoggerManager(logger_name='opendata-anonymizer', module='opendata')
            logger.log_error('failed_ensuring_postgres_table',
                             "Failed to ensure postgres table {0} existence with connection {1}. ERROR: {2}".format(
                                 self._table_name, self._connection_string, traceback.format_exc().replace('\n', '')
                             ))
            raise
Ejemplo n.º 5
0
    def add_data(self, data):
        if data:
            try:
                # Inject requestInDate for fast daily queries
                for datum in data:
                    datum['requestInDate'] = datetime.fromtimestamp(datum['requestInTs'] / 1000).strftime('%Y-%m-%d')

                data = [[record[field_name] for field_name in self._field_order] for record in data]

                with pg.connect(self._connection_string) as connection:
                    cursor = connection.cursor()
                    insertion_str = ','.join(cursor.mogrify("({0})".format(','.join(['%s'] * len(row))), row).decode('utf8') for row in data)
                    cursor.execute('INSERT INTO {table_name} ({fields}) VALUES '.format(
                        **{'table_name': self._table_name, 'fields': ','.join(self._field_order)}) + insertion_str)
            except Exception:
                logger = logger_manager.LoggerManager(logger_name='opendata-anonymizer', module='opendata')
                logger.log_error('log_insertion_failed',
                                 "Failed to insert logs to postgres. ERROR: {0}".format(
                                     traceback.format_exc().replace('\n', '')
                                 ))
                raise
Ejemplo n.º 6
0
    def _ensure_privileges(self):
        try:
            with pg.connect(self._connection_string) as connection:
                cursor = connection.cursor()

                for readonly_user in self._readonly_users:
                    try:
                        cursor.execute("GRANT USAGE ON SCHEMA public TO {readonly_user};".format(**{
                            'readonly_user': readonly_user
                        }))
                        cursor.execute("GRANT SELECT ON {table_name} TO {readonly_user};".format(**{
                            'table_name': self._table_name,
                            'readonly_user': readonly_user
                        }))
                    except:
                        pass    # Privileges existed

        except Exception:
            logger = logger_manager.LoggerManager(logger_name='opendata-anonymizer', module='opendata')
            logger.log_error('ensuring_readolny_users_permissions_failed',
                             "Failed to ensure readonly users' permissions for postgres table {0} existence with connection {1}".format(
                                 self._table_name, self._connection_string, traceback.format_exc().replace('\n', '')
                             ))
            raise
Ejemplo n.º 7
0
from anonymizer.utils import logger_manager
import anonymizer.settings as settings

if len(argv) > 1:
    anonymization_limit = int(argv[1])
else:
    anonymization_limit = 0

ROOT_DIR = os.path.abspath(os.path.dirname(__file__))

logger_manager.setup_logger(logger_name='opendata-anonymizer', log_level=settings.log['level'],
                            log_path=settings.log['path'], max_file_size=settings.log['max_file_size'],
                            backup_count=settings.log['backup_count'])

logger = logger_manager.LoggerManager(logger_name='opendata-anonymizer', module_name='opendata', heartbeat_dir=settings.heartbeat['dir'])

start_time = datetime.now()

logger.log_info('anonymization_session_started', 'Started anonymization session')
logger.log_heartbeat('Started anonymization session', 'opendata-anonymizer', 'SUCCEEDED')

opendata_config_path = os.path.join(ROOT_DIR, 'settings.py')

try:
    try:
        config = AnonymizerConfig(opendata_config_path)
    except Exception:
        logger.log_heartbeat('Failed reading settings.py', 'opendata-anonymizer', 'FAILED')
        raise