class TestAsyncIOGridFSBucket(AsyncIOTestCase):
    @asyncio.coroutine
    def _reset(self):
        yield from self.db.drop_collection("fs.files")
        yield from self.db.drop_collection("fs.chunks")
        yield from self.db.drop_collection("alt.files")
        yield from self.db.drop_collection("alt.chunks")

    def setUp(self):
        super(TestAsyncIOGridFSBucket, self).setUp()
        self.loop.run_until_complete(self._reset())
        self.bucket = AsyncIOMotorGridFSBucket(self.db)

    def tearDown(self):
        self.loop.run_until_complete(self._reset())
        super(TestAsyncIOGridFSBucket, self).tearDown()

    @asyncio_test
    def test_basic(self):
        oid = yield from self.bucket.upload_from_stream("test_filename",
                                                        b"hello world")
        gout = yield from self.bucket.open_download_stream(oid)
        self.assertEqual(b"hello world", (yield from gout.read()))
        self.assertEqual(1, (yield from self.db.fs.files.count()))
        self.assertEqual(1, (yield from self.db.fs.chunks.count()))

        dst = BytesIO()
        yield from self.bucket.download_to_stream(gout._id, dst)
        self.assertEqual(b"hello world", dst.getvalue())

        yield from self.bucket.delete(oid)
        with self.assertRaises(NoFile):
            yield from self.bucket.open_download_stream(oid)
        self.assertEqual(0, (yield from self.db.fs.files.count()))
        self.assertEqual(0, (yield from self.db.fs.chunks.count()))
Example #2
0
    async def test_iter_gridfs(self):
        gfs = AsyncIOMotorGridFSBucket(self.db)

        async def cleanup():
            await self.db.fs.files.delete_many({})
            await self.db.fs.chunks.delete_many({})

        await cleanup()

        # Empty iterator.
        async for _ in gfs.find({'_id': 1}):
            self.fail()

        data = b'data'

        for n_files in 1, 2, 10:
            for i in range(n_files):
                async with gfs.open_upload_stream(filename='filename') as f:
                    await f.write(data)

            # Force extra batches to test iteration.
            j = 0
            async for _ in gfs.find({'filename': 'filename'}).batch_size(3):
                j += 1

            self.assertEqual(j, n_files)
            await cleanup()

        await gfs.upload_from_stream_with_id(
            1, 'filename', source=data, chunk_size_bytes=1)
        cursor = gfs.find({'_id': 1})
        await cursor.fetch_next
        gout = cursor.next_object()
        chunks = []
        async for chunk in gout:
            chunks.append(chunk)

        self.assertEqual(len(chunks), len(data))
        self.assertEqual(b''.join(chunks), data)
 def setUp(self):
     super(TestAsyncIOGridFSBucket, self).setUp()
     self.loop.run_until_complete(self._reset())
     self.bucket = AsyncIOMotorGridFSBucket(self.db)
Example #4
0
    def __init__(self, host: str, port: int):
        # Set up fernet
        # Read from base64 encoded key
        if os.environ.get("LOCKBOX_CREDENTIAL_KEY"):
            key = os.environ.get("LOCKBOX_CREDENTIAL_KEY")
        # Read from key file
        elif os.environ.get("LOCKBOX_CREDENTIAL_KEY_FILE"):
            try:
                with open(os.environ.get("LOCKBOX_CREDENTIAL_KEY_FILE"),
                          "rb") as f:
                    key = base64.b64encode(f.read())
            except IOError as e:
                raise ValueError(
                    "Cannot read password encryption key file") from e
        else:
            raise ValueError(
                "Encryption key for passwords must be provided! Set LOCKBOX_CREDENTIAL_KEY or LOCKBOX_CREDENTIAL_KEY_FILE."
            )
        # Should raise ValueError if key is invalid
        self.fernet = Fernet(key)

        if os.environ.get("LOCKBOX_SCHOOL"):
            try:
                self.school_code = int(os.environ["LOCKBOX_SCHOOL"])
            except ValueError as e:
                logger.error(f"Invalid school code: {e}")
                self.school_code = None
        else:
            self.school_code = None

        self.client = AsyncIOMotorClient(host, port)
        self._private_db = self.client["lockbox"]
        self._shared_db = self.client["shared"]
        self._private_instance = MotorAsyncIOInstance(self._private_db)
        self._shared_instance = MotorAsyncIOInstance(self._shared_db)
        self._shared_gridfs = AsyncIOMotorGridFSBucket(self._shared_db)

        self.LockboxFailureImpl = self._private_instance.register(
            documents.LockboxFailure)
        self.FillFormResultImpl = self._private_instance.register(
            documents.FillFormResult)
        self.UserImpl = self._private_instance.register(documents.User)
        self.FormGeometryEntryImpl = self._private_instance.register(
            documents.FormGeometryEntry)
        self.CachedFormGeometryImpl = self._private_instance.register(
            documents.CachedFormGeometry)
        self.TaskImpl = self._private_instance.register(documents.Task)

        self.FormFieldImpl = self._shared_instance.register(
            documents.FormField)
        self.FormImpl = self._shared_instance.register(documents.Form)
        self.CourseImpl = self._shared_instance.register(documents.Course)
        self.FormFillingTestImpl = self._shared_instance.register(
            documents.FormFillingTest)
        self.LockboxFailureImplShared = self._shared_instance.register(
            documents.LockboxFailure)
        self.FillFormResultImplShared = self._shared_instance.register(
            documents.FillFormResult)

        self._scheduler = scheduler.Scheduler(self)
        tasks.set_task_handlers(self._scheduler)
        # Current school day, set by the check day task
        # Used as a fallback & indicator of whether the day's been checked
        # None when the day has not been checked
        self.current_day = None
Example #5
0
class LockboxDB:
    """
    Holds databases for lockbox.
    """
    def __init__(self, host: str, port: int):
        # Set up fernet
        # Read from base64 encoded key
        if os.environ.get("LOCKBOX_CREDENTIAL_KEY"):
            key = os.environ.get("LOCKBOX_CREDENTIAL_KEY")
        # Read from key file
        elif os.environ.get("LOCKBOX_CREDENTIAL_KEY_FILE"):
            try:
                with open(os.environ.get("LOCKBOX_CREDENTIAL_KEY_FILE"),
                          "rb") as f:
                    key = base64.b64encode(f.read())
            except IOError as e:
                raise ValueError(
                    "Cannot read password encryption key file") from e
        else:
            raise ValueError(
                "Encryption key for passwords must be provided! Set LOCKBOX_CREDENTIAL_KEY or LOCKBOX_CREDENTIAL_KEY_FILE."
            )
        # Should raise ValueError if key is invalid
        self.fernet = Fernet(key)

        if os.environ.get("LOCKBOX_SCHOOL"):
            try:
                self.school_code = int(os.environ["LOCKBOX_SCHOOL"])
            except ValueError as e:
                logger.error(f"Invalid school code: {e}")
                self.school_code = None
        else:
            self.school_code = None

        self.client = AsyncIOMotorClient(host, port)
        self._private_db = self.client["lockbox"]
        self._shared_db = self.client["shared"]
        self._private_instance = MotorAsyncIOInstance(self._private_db)
        self._shared_instance = MotorAsyncIOInstance(self._shared_db)
        self._shared_gridfs = AsyncIOMotorGridFSBucket(self._shared_db)

        self.LockboxFailureImpl = self._private_instance.register(
            documents.LockboxFailure)
        self.FillFormResultImpl = self._private_instance.register(
            documents.FillFormResult)
        self.UserImpl = self._private_instance.register(documents.User)
        self.FormGeometryEntryImpl = self._private_instance.register(
            documents.FormGeometryEntry)
        self.CachedFormGeometryImpl = self._private_instance.register(
            documents.CachedFormGeometry)
        self.TaskImpl = self._private_instance.register(documents.Task)

        self.FormFieldImpl = self._shared_instance.register(
            documents.FormField)
        self.FormImpl = self._shared_instance.register(documents.Form)
        self.CourseImpl = self._shared_instance.register(documents.Course)
        self.FormFillingTestImpl = self._shared_instance.register(
            documents.FormFillingTest)
        self.LockboxFailureImplShared = self._shared_instance.register(
            documents.LockboxFailure)
        self.FillFormResultImplShared = self._shared_instance.register(
            documents.FillFormResult)

        self._scheduler = scheduler.Scheduler(self)
        tasks.set_task_handlers(self._scheduler)
        # Current school day, set by the check day task
        # Used as a fallback & indicator of whether the day's been checked
        # None when the day has not been checked
        self.current_day = None

    async def init(self):
        """
        Initialize the databases and task scheduler.
        """
        await self.UserImpl.ensure_indexes()
        await self.CourseImpl.ensure_indexes()
        await self.CachedFormGeometryImpl.collection.drop()
        await self.CachedFormGeometryImpl.ensure_indexes()
        await self._scheduler.start()

        # Re-schedule the check day task if current day is not checked
        if self.current_day is None:
            await self._reschedule_check_day()

    def private_db(self) -> AsyncIOMotorDatabase:
        """
        Get a reference to the private database.
        """
        return self._private_db

    def shared_db(self) -> AsyncIOMotorDatabase:
        """
        Get a reference to the shared database.
        """
        return self._shared_db

    def shared_gridfs(self) -> AsyncIOMotorGridFSBucket:
        """
        Get a reference to the shared GridFS bucket.
        """
        return self._shared_gridfs

    async def _reschedule_check_day(self) -> None:
        """
        Reschedule the check day task.

        If the task is set to run later today, no action will be taken.
        If the task will not run today or does not exist, it will be scheduled immediately.
        """
        check_task = await self.TaskImpl.find_one(
            {"kind": documents.TaskType.CHECK_DAY.value})
        if check_task is None:
            # Create check task if it does not exist
            await self._scheduler.create_task(kind=documents.TaskType.CHECK_DAY
                                              )
        # Check if the task will run later today
        # If the check task is set to run on a different date then make it run now
        elif check_task.next_run_at.replace(
                tzinfo=datetime.timezone.utc).astimezone(
                    tasks.LOCAL_TZ).date() > datetime.datetime.today().date():
            check_task.next_run_at = datetime.datetime.utcnow()
            await check_task.commit()
            self._scheduler.update()

    async def populate_user_courses(self,
                                    user,
                                    courses: typing.List[TimetableItem],
                                    clear_previous: bool = True) -> None:
        """
        Populate a user's courses, creating new Course documents if new courses are encountered.

        If clear_previous is True, all previous courses will be cleared.
        However, the Course documents in the shared database will not be touched, since they might
        also be referred to by other users.
        """
        if clear_previous:
            user.courses = []
        else:
            user.courses = user.courses or []
        # Populate courses collection
        for course in courses:
            db_course = await self.CourseImpl.find_one(
                {"course_code": course.course_code})
            if db_course is None:
                db_course = self.CourseImpl(
                    course_code=course.course_code,
                    teacher_name=course.course_teacher_name)
                # Without this, known_slots for different courses will all point to the same instance of list
                db_course.known_slots = []
            else:
                # Make sure the teacher name is set
                if not db_course.teacher_name:
                    db_course.teacher_name = course.course_teacher_name
            # Fill in known slots
            slot_str = f"{course.course_cycle_day}-{course.course_period}"
            if slot_str not in db_course.known_slots:
                db_course.known_slots.append(slot_str)
            await db_course.commit()
            if db_course.pk not in user.courses:
                user.courses.append(db_course.pk)
        await user.commit()

    async def create_user(self) -> str:
        """
        Create a new user.

        Returns token on success.
        """
        token = secrets.token_hex(32)
        await self.UserImpl(token=token).commit()
        return token

    async def modify_user(
            self,
            token: str,
            login: str = None,
            password: str = None,  # pylint: disable=unused-argument
            active: bool = None,
            grade: int = None,
            first_name: str = None,
            last_name: str = None,
            **kwargs) -> None:
        """
        Modify user data.

        Also verifies credentials if modifying login or password.
        """
        user = await self.UserImpl.find_one({"token": token})
        if user is None:
            raise LockboxDBError("Bad token", LockboxDBError.BAD_TOKEN)
        try:
            if login is not None:
                user.login = login
            if password is not None:
                user.password = self.fernet.encrypt(password.encode("utf-8"))
            if active is not None:
                user.active = active
            if grade is not None:
                user.grade = grade
            if first_name is not None:
                user.first_name = first_name
            if last_name is not None:
                user.last_name = last_name
            # Verify user credentials if username and password are both present
            # and at least one is being modified
            if user.login is not None and user.password is not None and (
                    login is not None or password is not None):
                logger.info(f"Verifying credentials for login {user.login}")
                try:
                    async with TDSBConnects() as session:
                        await session.login(login, password)
                        info = await session.get_user_info()
                        schools = info.schools
                        if self.school_code is None:
                            if len(schools) != 1:
                                logger.info(
                                    f"Login {user.login} has an invalid number of schools."
                                )
                                raise LockboxDBError(
                                    f"TDSB Connects reported {len(schools)} schools; nffu can only handle 1 school",
                                    LockboxDBError.OTHER)
                            school = schools[0]
                        else:
                            for s in schools:
                                if s.code == self.school_code:
                                    school = s
                                    break
                            else:
                                logger.info(
                                    f"Login {user.login} is not in the configured school"
                                )
                                raise LockboxDBError(
                                    f"You do not appear to be in the school nffu was set up for (#{self.school_code}); nffu can only handle 1 school",
                                    LockboxDBError.OTHER)
                        user.email = info.email
                        # Try to get user grade, first name, and last name
                        try:
                            user.grade = int(
                                info._data["SchoolCodeList"][0]["StudentInfo"]
                                ["CurrentGradeLevel"])
                            # CurrentGradeLevel increments once per *calendar* year
                            # So the value is off-by-one during the first half of the school year
                            # School year is in the form XXXXYYYY, e.g. 20202021
                            if not school.school_year.endswith(
                                    str(datetime.datetime.now().year)):
                                user.grade += 1
                        except (ValueError, KeyError, IndexError):
                            pass
                        try:
                            user.first_name = info._data["SchoolCodeList"][0][
                                "StudentInfo"]["FirstName"]
                            user.last_name = info._data["SchoolCodeList"][0][
                                "StudentInfo"]["LastName"]
                        except (ValidationError, KeyError, IndexError):
                            pass
                except aiohttp.ClientResponseError as e:
                    logger.info(f"TDSB login error for login {user.login}")
                    # Invalid credentials, clean up and raise
                    if e.code == 401:
                        raise LockboxDBError(
                            "Incorrect TDSB credentials",
                            LockboxDBError.INVALID_FIELD) from e
                    raise LockboxDBError(
                        f"HTTP error while logging into TDSB Connects: {str(e)}"
                    ) from e
                # Now we know credentials are valid
                await user.commit()
                logger.info(f"Credentials good for login {user.login}")
                await self._scheduler.create_task(
                    kind=documents.TaskType.POPULATE_COURSES, owner=user)
            else:
                await user.commit()

            # If user is active and has complete set of credentials, make a fill form task for them
            if user.active and user.login is not None and user.password is not None:
                task = await self.TaskImpl.find_one({
                    "kind":
                    documents.TaskType.FILL_FORM.value,
                    "owner":
                    user
                })
                if task is None:
                    logger.info(
                        f"Creating new fill form task for user {user.pk}")
                    # Calculate next run time
                    # This time will always be in the next day, so check if it's possible to do it today
                    run_at = tasks.next_run_time(tasks.FILL_FORM_RUN_TIME)
                    if (run_at - datetime.timedelta(days=1)).replace(
                            tzinfo=None) >= datetime.datetime.utcnow():
                        run_at -= datetime.timedelta(days=1)
                    task = await self._scheduler.create_task(
                        kind=documents.TaskType.FILL_FORM,
                        run_at=run_at,
                        owner=user)
                    # Reschedule the check day task as well
                    # The task might not exist if this is the first user
                    await self._reschedule_check_day()
            # If active is set to false for this user, remove their fill form task
            elif not active:
                task = await self.TaskImpl.find_one({
                    "kind":
                    documents.TaskType.FILL_FORM.value,
                    "owner":
                    user
                })
                if task is not None:
                    logger.info(f"Deleting fill form task for user {user.pk}")
                    await task.remove()
                    self._scheduler.update()
        except ValidationError as e:
            raise LockboxDBError(f"Invalid field: {e}",
                                 LockboxDBError.INVALID_FIELD) from e

    async def get_user(self, token: str) -> typing.Dict[str, typing.Any]:
        """
        Get user data as a formatted dict.
        """
        user = await self.UserImpl.find_one({"token": token})
        if user is None:
            raise LockboxDBError("Bad token", LockboxDBError.BAD_TOKEN)
        return user.dump()

    async def delete_user(self, token: str) -> None:
        """
        Delete a user by token.
        """
        user = await self.UserImpl.find_one({"token": token})
        if user is None:
            raise LockboxDBError("Bad token", LockboxDBError.BAD_TOKEN)
        # Delete screenshots
        if user.last_fill_form_result is not None:
            if user.last_fill_form_result.form_screenshot_id is not None:
                try:
                    await self._shared_gridfs.delete(
                        user.last_fill_form_result.form_screenshot_id)
                except gridfs.NoFile:
                    logger.warning(
                        f"Fill form: Failed to delete previous result form screenshot for user {user.pk}: No file"
                    )
            if user.last_fill_form_result.confirmation_screenshot_id is not None:
                try:
                    await self._shared_gridfs.delete(
                        user.last_fill_form_result.confirmation_screenshot_id)
                except gridfs.NoFile:
                    logger.warning(
                        f"Fill form: Failed to delete previous result conformation page screenshot for user {user.pk}: No file"
                    )
        # Delete fill form task
        task = await self.TaskImpl.find_one({
            "kind":
            documents.TaskType.FILL_FORM.value,
            "owner":
            user
        })
        if task is not None:
            logger.info(f"Deleting fill form task for user {user.pk}")
            await task.remove()
            self._scheduler.update()
        await user.remove()

    async def delete_user_error(self, token: str, eid: str) -> None:
        """
        Delete an error by id for a user.
        """
        try:
            result = await self.UserImpl.collection.update_one(
                {"token": token},
                {"$pull": {
                    "errors": {
                        "_id": bson.ObjectId(eid)
                    }
                }})
        except bson.errors.InvalidId as e:
            raise LockboxDBError("Bad error id") from e
        if result.matched_count == 0:
            raise LockboxDBError("Bad token", LockboxDBError.BAD_TOKEN)
        if result.modified_count == 0:
            raise LockboxDBError("Bad error id")

    async def update_user_courses(self, token: str) -> None:
        """
        Refresh the detected courses for a user.
        """
        user = await self.UserImpl.find_one({"token": token})
        if user is None:
            raise LockboxDBError("Bad token", LockboxDBError.BAD_TOKEN)
        if user.login is None or user.password is None:
            raise LockboxDBError("Cannot update courses: Missing credentials",
                                 LockboxDBError.STATE_CONFLICT)
        # Make sure the password is valid
        try:
            self.fernet.decrypt(user.password).decode("utf-8")
        except InvalidToken as e:
            logger.critical(f"User {user.pk}'s password cannot be decrypted")
            raise LockboxDBError(
                "Internal server error: Cannot decrypt password",
                LockboxDBError.INTERNAL_ERROR) from e
        await self._scheduler.create_task(
            kind=documents.TaskType.POPULATE_COURSES, owner=user)

    async def update_all_courses(self) -> None:
        """
        Refresh the detected courses for ALL users.
        """
        batch_size = 3
        if os.environ.get("LOCKBOX_UPDATE_COURSES_BATCH_SIZE"):
            try:
                b = int(os.environ["LOCKBOX_UPDATE_COURSES_BATCH_SIZE"])
                if b < 1:
                    raise ValueError("Batch size cannot be less than 1")
                batch_size = b
            except ValueError as e:
                logger.error(
                    f"Update all courses: Invalid batch size specified by env var (defaulted to {batch_size}): {e}"
                )
        interval = 60
        if os.environ.get("LOCKBOX_UPDATE_COURSES_INTERVAL"):
            try:
                i = int(os.environ["LOCKBOX_UPDATE_COURSES_INTERVAL"])
                if i < 0:
                    raise ValueError("Interval cannot be less than 0")
                interval = i
            except ValueError as e:
                logger.error(
                    f"Update all courses: Invalid interval specified by env var (defaulted to {interval}s): {e}"
                )
        run_at = datetime.datetime.utcnow()
        batch = 0
        async for user in self.UserImpl.find({
                "login": {
                    "$ne": None
                },
                "password": {
                    "$ne": None
                }
        }):
            await self._scheduler.create_task(
                documents.TaskType.POPULATE_COURSES, run_at, user)
            batch += 1
            if batch >= batch_size:
                batch = 0
                run_at += datetime.timedelta(seconds=interval)

    async def get_form_geometry(self, token: str, url: str,
                                grab_screenshot: bool) -> dict:
        """
        Get the form geometry for a given form URL.
        """
        user = await self.UserImpl.find_one({"token": token})
        if user is None:
            raise LockboxDBError("Bad token", LockboxDBError.BAD_TOKEN)
        if user.login is None or user.password is None:
            raise LockboxDBError("Cannot sign into form: Missing credentials",
                                 LockboxDBError.STATE_CONFLICT)
        geom = await self.CachedFormGeometryImpl.find_one({"url": url})
        # Check if screenshot requirement is satisfied
        if geom is not None and grab_screenshot:
            screenshot_valid = False
            # If screenshot ID exists, check the GridFS bucket to make sure it's actually valid
            # the screenshot data may have been deleted by fenetre
            if geom.screenshot_file_id is not None:
                async for _ in self._shared_gridfs.find(
                    {"_id": geom.screenshot_file_id}):
                    screenshot_valid = True
                    break
        else:
            screenshot_valid = True
        # If this form was never requested before,
        # or the screenshot requirement is not satisfied AND the operation is not already pending
        if geom is None or (not screenshot_valid
                            and geom.geometry is not None):
            # If this is a re-run, clear the old result
            if geom is not None:
                await geom.remove()
            # Re-make the geometry
            try:
                geom = self.CachedFormGeometryImpl(
                    url=url,
                    requested_by=token,
                    geometry=None,
                    grab_screenshot=grab_screenshot)
            except ValidationError as e:
                raise LockboxDBError(f"Invalid field: {e}",
                                     LockboxDBError.INVALID_FIELD) from e
            await geom.commit()
            # Create tasks to get form geometry and clean up
            await self._scheduler.create_task(
                documents.TaskType.GET_FORM_GEOMETRY,
                owner=user,
                argument=str(geom.pk))
            await self._scheduler.create_task(
                documents.TaskType.REMOVE_OLD_FORM_GEOMETRY,
                datetime.datetime.utcnow() + datetime.timedelta(minutes=15),
                owner=user,
                argument=str(geom.pk))
            return {
                "geometry": None,
                "auth_required": None,
                "screenshot_id": None
            }
        # Result pending
        if geom.geometry is None and geom.response_status is None:
            return {
                "geometry": None,
                "auth_required": None,
                "screenshot_id": None
            }
        # Result exists
        if geom.response_status is None:
            return {
                "geometry": [e.dump() for e in geom.geometry],
                "auth_required": geom.auth_required,
                "screenshot_id": str(geom.screenshot_file_id)
            }
        return {
            "geometry": [e.dump() for e in geom.geometry],
            "screenshot_id": str(geom.screenshot_file_id),
            "auth_required": geom.auth_required,
            "error": geom.error,
            "status": geom.response_status
        }

    async def get_tasks(self) -> typing.List[dict]:
        """
        Get a list of serialized tasks.
        """
        return [
            task.dump()
            async for task in self.TaskImpl.find().sort("next_run_at", 1).sort(
                "retry_count", -1).sort("is_running", -1)
        ]

    async def find_form_test_context(self, oid: str):
        return await self.FormFillingTestImpl.find_one(
            {"_id": bson.ObjectId(oid)})

    async def start_form_test(self, oid: str, token: str):
        """
        Start filling in a test form
        """

        user = await self.UserImpl.find_one({"token": token})
        if user is None:
            raise LockboxDBError("Bad token", LockboxDBError.BAD_TOKEN)
        await self._scheduler.create_task(
            kind=documents.TaskType.TEST_FILL_FORM, owner=user, argument=oid)
        await self._scheduler.create_task(
            run_at=datetime.datetime.utcnow() + datetime.timedelta(hours=6),
            kind=documents.TaskType.REMOVE_OLD_TEST_RESULTS,
            argument=oid)