예제 #1
0
    def test_new_credentials_have_pgaudit_configuration(self):
        ensure_databases_configured().handle()

        user = UserFactory(email="*****@*****.**")
        st = SourceTableFactory(dataset=MasterDataSetFactory.create(
            user_access_type=UserAccessType.REQUIRES_AUTHENTICATION))

        source_tables = source_tables_for_user(user)
        db_role_schema_suffix = db_role_schema_suffix_for_user(user)
        user_creds_to_drop = new_private_database_credentials(
            db_role_schema_suffix,
            source_tables,
            postgres_user(user.email),
            user,
            valid_for=datetime.timedelta(days=1),
        )

        connections[st.database.memorable_name].cursor().execute("COMMIT")

        rolename = user_creds_to_drop[0]["db_user"]
        query = f"SELECT rolname, rolconfig FROM pg_roles WHERE rolname = '{rolename}';"

        with connections[st.database.memorable_name].cursor() as cursor:
            cursor.execute(query)
            results = cursor.fetchall()
            assert "pgaudit.log=ALL" in results[0][1]
            assert "pgaudit.log_catalog=off" in results[0][1]
예제 #2
0
    def test_deletes_expired_and_unused_users(self):
        ensure_databases_configured().handle()

        user = UserFactory(email='*****@*****.**')
        st = SourceTableFactory(
            dataset=MasterDataSetFactory.create(
                user_access_type='REQUIRES_AUTHENTICATION'
            )
        )

        source_tables = source_tables_for_user(user)
        db_role_schema_suffix = db_role_schema_suffix_for_user(user)
        user_creds_to_drop = new_private_database_credentials(
            db_role_schema_suffix,
            source_tables,
            postgres_user(user.email),
            user,
            valid_for=datetime.timedelta(days=31),
        )
        qs_creds_to_drop = new_private_database_credentials(
            db_role_schema_suffix,
            source_tables,
            postgres_user(user.email, suffix='qs'),
            user,
            valid_for=datetime.timedelta(seconds=0),
        )
        qs_creds_to_keep = new_private_database_credentials(
            db_role_schema_suffix,
            source_tables,
            postgres_user(user.email, suffix='qs'),
            user,
            valid_for=datetime.timedelta(minutes=1),
        )

        connections[st.database.memorable_name].cursor().execute('COMMIT')

        # Make sure that `qs_creds_to_drop` has definitely expired
        time.sleep(1)

        with mock.patch('dataworkspace.apps.applications.utils.gevent.sleep'):
            delete_unused_datasets_users()

        with connections[st.database.memorable_name].cursor() as cursor:
            cursor.execute(
                "SELECT usename FROM pg_catalog.pg_user WHERE usename IN %s",
                [
                    (
                        user_creds_to_drop[0]['db_user'],
                        qs_creds_to_drop[0]['db_user'],
                        qs_creds_to_keep[0]['db_user'],
                    )
                ],
            )
            assert cursor.fetchall() == [(qs_creds_to_keep[0]['db_user'],)]
def migrate_existing_application_instance_db_users(apps, _):
    ApplicationInstanceDbUsers = apps.get_model("applications",
                                                "ApplicationInstanceDbUsers")
    for db_user in ApplicationInstanceDbUsers.objects.all():
        if db_user.application_instance.application_template.application_type == "TOOL":
            db_role_and_schema_suffix = db_role_schema_suffix_for_user(
                db_user.application_instance.owner)
        else:
            db_role_and_schema_suffix = db_role_schema_suffix_for_app(
                db_user.application_instance.application_template)

        db_user.db_persistent_role = f"{USER_SCHEMA_STEM}{db_role_and_schema_suffix}"
        db_user.save()
예제 #4
0
    def test_db_user_record(self):
        user_count = DatabaseUser.objects.count()

        user = factories.UserFactory()
        source_tables = source_tables_for_user(user)
        db_role_schema_suffix = db_role_schema_suffix_for_user(user)
        new_private_database_credentials(
            db_role_schema_suffix,
            source_tables,
            user.email,
            user,
            valid_for=datetime.timedelta(days=31),
        )
        assert DatabaseUser.objects.count() == user_count + 1
예제 #5
0
def spawn(
    name,
    user_id,
    tag,
    application_instance_id,
    spawner_options,
):
    user = get_user_model().objects.get(pk=user_id)
    application_instance = ApplicationInstance.objects.get(
        id=application_instance_id)

    (source_tables, db_role_schema_suffix, db_user) = (
        (
            source_tables_for_user(user),
            db_role_schema_suffix_for_user(user),
            postgres_user(user.email),
        ) if application_instance.application_template.application_type
        == 'TOOL' else (
            source_tables_for_app(application_instance.application_template),
            db_role_schema_suffix_for_app(
                application_instance.application_template),
            postgres_user(
                application_instance.application_template.host_basename),
        ))

    credentials = new_private_database_credentials(
        db_role_schema_suffix,
        source_tables,
        db_user,
        user,
        valid_for=datetime.timedelta(days=31),
    )

    if application_instance.application_template.application_type == 'TOOL':
        # For AppStream to access credentials
        write_credentials_to_bucket(user, credentials)

    app_schema = f'{USER_SCHEMA_STEM}{db_role_schema_suffix}'

    get_spawner(name).spawn(
        user,
        tag,
        application_instance,
        spawner_options,
        credentials,
        app_schema,
    )
    def handle(self, *args, **options):
        self.stdout.write('store_db_creds_in_s3 started')

        bucket = settings.NOTEBOOKS_BUCKET
        self.stdout.write('Will store credentials in bucket {}'.format(bucket))

        all_users = get_user_model().objects.order_by('last_name', 'first_name', 'id')
        for user in all_users:
            self.stdout.write(f'Creating credentials for {user.email}')

            source_tables = source_tables_for_user(user)
            db_role_schema_suffix = db_role_schema_suffix_for_user(user)
            creds = new_private_database_credentials(
                db_role_schema_suffix, source_tables, postgres_user(user.email)
            )
            write_credentials_to_bucket(user, creds)
            self.stdout.write(str(creds))

        self.stdout.write(self.style.SUCCESS('store_db_creds_in_s3 finished'))
예제 #7
0
    def handle(self, *args, **options):
        self.stdout.write("store_db_creds_in_s3 started")

        bucket = settings.NOTEBOOKS_BUCKET
        self.stdout.write("Will store credentials in bucket {}".format(bucket))

        all_users = get_user_model().objects.order_by("last_name",
                                                      "first_name", "id")
        for user in all_users:
            self.stdout.write(f"Creating credentials for {user.email}")

            source_tables = source_tables_for_user(user)
            db_role_schema_suffix = db_role_schema_suffix_for_user(user)
            creds = new_private_database_credentials(
                db_role_schema_suffix,
                source_tables,
                postgres_user(user.email),
                user,
                valid_for=datetime.timedelta(days=31),
            )
            write_credentials_to_bucket(user, creds)
            self.stdout.write(str(creds))

        self.stdout.write(self.style.SUCCESS("store_db_creds_in_s3 finished"))
예제 #8
0
def application_api_PUT(request, public_host):
    # A transaction is unnecessary: the single_running_or_spawning_integrity
    # key prevents duplicate spawning/running applications at the same
    # public host
    try:
        application_instance = get_api_visible_application_instance_by_public_host(
            public_host)
    except ApplicationInstance.DoesNotExist:
        pass
    else:
        return JsonResponse({'message': 'Application instance already exists'},
                            status=409)

    try:
        (
            application_template,
            tag,
            _,
            commit_id,
        ) = application_template_tag_user_commit_from_host(public_host)
    except ApplicationTemplate.DoesNotExist:
        return JsonResponse({'message': 'Application template does not exist'},
                            status=400)

    app_type = application_template.application_type

    (source_tables, db_role_schema_suffix, db_user) = ((
        source_tables_for_user(request.user),
        db_role_schema_suffix_for_user(request.user),
        postgres_user(request.user.email),
    ) if app_type == 'TOOL' else (
        source_tables_for_app(application_template),
        db_role_schema_suffix_for_app(application_template),
        postgres_user(application_template.host_basename),
    ))

    credentials = new_private_database_credentials(db_role_schema_suffix,
                                                   source_tables, db_user)

    if app_type == 'TOOL':
        # For AppStream to access credentials
        write_credentials_to_bucket(request.user, credentials)

    try:
        memory, cpu = request.GET['__memory_cpu'].split('_')
    except KeyError:
        memory = None
        cpu = None

    spawner_options = json.dumps(application_options(application_template))

    try:
        application_instance = ApplicationInstance.objects.create(
            owner=request.user,
            application_template=application_template,
            spawner=application_template.spawner,
            spawner_application_template_options=spawner_options,
            spawner_application_instance_id=json.dumps({}),
            public_host=public_host,
            state='SPAWNING',
            single_running_or_spawning_integrity=public_host,
            cpu=cpu,
            memory=memory,
            commit_id=commit_id,
        )
    except IntegrityError:
        application_instance = get_api_visible_application_instance_by_public_host(
            public_host)
    else:
        # The database users are stored so when the database users are cleaned up,
        # we know _not_ to delete any users used by running or spawning apps
        for creds in credentials:
            ApplicationInstanceDbUsers.objects.create(
                application_instance=application_instance,
                db_id=creds['db_id'],
                db_username=creds['db_user'],
            )

        app_schema = f'{USER_SCHEMA_STEM}{db_role_schema_suffix}'

        spawn.delay(
            application_template.spawner,
            request.user.email,
            str(request.user.profile.sso_id),
            tag,
            application_instance.id,
            spawner_options,
            credentials,
            app_schema,
        )

    return JsonResponse(api_application_dict(application_instance), status=200)
예제 #9
0
def get_user_explorer_connection_settings(user, alias):
    from dataworkspace.apps.explorer.connections import (  # pylint: disable=import-outside-toplevel
        connections,
    )

    if not alias:
        alias = settings.EXPLORER_DEFAULT_CONNECTION

    if alias not in connections:
        raise InvalidExplorerConnectionException(
            'Attempted to access connection %s, but that is not a registered Explorer connection.'
            % alias
        )

    def get_available_user_connections(_user_credentials):
        return {data['memorable_name']: data for data in _user_credentials}

    with cache.lock(
        f'get-explorer-connection-{user.profile.sso_id}',
        blocking_timeout=30,
        timeout=180,
    ):
        cache_key = user_cached_credentials_key(user)
        user_credentials = cache.get(cache_key, None)

        # Make sure that the connection settings are still valid
        if user_credentials:
            db_aliases_to_credentials = get_available_user_connections(user_credentials)
            try:
                with user_explorer_connection(db_aliases_to_credentials[alias]):
                    pass
            except psycopg2.OperationalError:
                logger.exception(
                    "Unable to connect using existing cached explorer credentials for %s",
                    user,
                )
                user_credentials = None

        if not user_credentials:
            db_role_schema_suffix = db_role_schema_suffix_for_user(user)
            source_tables = source_tables_for_user(user)
            db_user = postgres_user(user.email, suffix='explorer')
            duration = timedelta(hours=24)
            cache_duration = (duration - timedelta(minutes=15)).total_seconds()

            user_credentials = new_private_database_credentials(
                db_role_schema_suffix,
                source_tables,
                db_user,
                valid_for=duration,
                force_create_for_databases=Database.objects.filter(
                    memorable_name__in=connections.keys()
                ).all(),
            )
            cache.set(cache_key, user_credentials, timeout=cache_duration)

    db_aliases_to_credentials = get_available_user_connections(user_credentials)
    if alias not in db_aliases_to_credentials:
        raise RuntimeError(
            f"The credentials for {user.email} did not include any for the `{alias}` database."
        )

    return db_aliases_to_credentials[alias]
예제 #10
0
    def form_valid(self, form):
        # call new_private_database_credentials to make sure everything is set
        config = settings.DATAFLOW_API_CONFIG
        user = self.request.user
        source_tables = source_tables_for_user(user)
        db_role_schema_suffix = db_role_schema_suffix_for_user(user)
        db_user = postgres_user(user.email)
        duration = timedelta(hours=24)

        cleaned = form.cleaned_data
        if cleaned["schema"] not in self.all_schemas:
            new_private_database_credentials(
                db_role_schema_suffix,
                source_tables,
                db_user,
                user,
                duration,
            )

        file_info = get_s3_csv_file_info(cleaned["path"])

        logger.info(file_info)

        for field in file_info["column_definitions"]:
            field["data_type"] = SCHEMA_POSTGRES_DATA_TYPE_MAP.get(
                cleaned[field["column_name"]], PostgresDataTypes.TEXT
            )

        import_path = settings.DATAFLOW_IMPORTS_BUCKET_ROOT + "/" + cleaned["path"]
        logger.debug("import_path %s", import_path)

        copy_file_to_uploads_bucket(cleaned["path"], import_path)

        filename = cleaned["path"].split("/")[-1]
        logger.debug(filename)
        conf = {
            "file_path": import_path,
            "schema_name": cleaned["schema"],
            "table_name": cleaned["table_name"],
            "column_definitions": file_info["column_definitions"],
            "encoding": file_info["encoding"],
        }
        logger.debug(conf)
        if cleaned["schema"] not in self.all_schemas:
            conf["db_role"] = cleaned["schema"]

        try:
            response = trigger_dataflow_dag(
                conf,
                config["DATAFLOW_S3_IMPORT_DAG"],
                f'{cleaned["schema"]}-{cleaned["table_name"]}-{datetime.now().isoformat()}',
            )
        except HTTPError:
            return HttpResponseRedirect(
                f'{reverse("your-files:create-table-failed")}?' f"filename={filename}"
            )

        params = {
            "filename": filename,
            "schema": cleaned["schema"],
            "table_name": cleaned["table_name"],
            "execution_date": response["execution_date"],
        }
        return HttpResponseRedirect(
            f'{reverse("your-files:create-table-validating")}?{urlencode(params)}'
        )