Example #1
0
    def _handle_description(self, section_name: str, module_name: str,
                            module_modname: str, module_id: str,
                            module_description: str) -> [File]:
        """
        Creates a description file
        @param module_description: The description of the module
        @params: All necessary parameters to create a file.
        @return: A list of files that exist in a module.
        """
        files = []
        content_type = 'description'
        content_filename = module_name
        content_filepath = '/'
        content_filesize = len(module_description)
        content_fileurl = ''
        content_timemodified = 0
        content_isexternalfile = False

        m = hashlib.sha1()
        hashable_description = ResultsHandler._filter_changing_attributes(
            module_description)
        m.update(hashable_description.encode('utf-8'))
        hash_description = m.hexdigest()

        if module_modname.startswith(('url', 'index_mod')):
            module_modname = 'url_description'

        description = File(
            module_id=module_id,
            section_name=section_name,
            module_name=module_name,
            content_filepath=content_filepath,
            content_filename=content_filename,
            content_fileurl=content_fileurl,
            content_filesize=content_filesize,
            content_timemodified=content_timemodified,
            module_modname=module_modname,
            content_type=content_type,
            content_isexternalfile=content_isexternalfile,
            hash=hash_description,
        )

        description.text_content = module_description
        files += self._find_all_urls_in_description(section_name, module_name,
                                                    module_modname, module_id,
                                                    content_filepath,
                                                    module_description)

        files.append(description)

        return files
Example #2
0
    def changes_to_notify(self) -> [Course]:
        changed_courses = []

        conn = sqlite3.connect(self.db_file)
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()

        cursor.execute(
            """SELECT course_id, course_fullname
            FROM files WHERE notified = 0 GROUP BY course_id;"""
        )

        curse_rows = cursor.fetchall()

        for course_row in curse_rows:
            course = Course(course_row['course_id'], course_row['course_fullname'])

            cursor.execute(
                """SELECT *
                FROM files WHERE notified = 0 AND course_id = ?;""",
                (course.id,),
            )

            file_rows = cursor.fetchall()

            course.files = []

            for file_row in file_rows:
                notify_file = File.fromRow(file_row)
                if notify_file.modified or notify_file.moved:
                    # add reference to new file

                    cursor.execute(
                        """SELECT *
                        FROM files
                        WHERE old_file_id = ?;""",
                        (notify_file.file_id,),
                    )

                    file_row = cursor.fetchone()
                    if file_row is not None:
                        notify_file.new_file = File.fromRow(file_row)

                course.files.append(notify_file)

            changed_courses.append(course)

        conn.close()
        return changed_courses
Example #3
0
    def _handle_cookie_mod(self, section_name: str, module_name: str,
                           module_modname: str, module_id: str,
                           module_url: str) -> [File]:
        """
        Creates a list of files out of a cookie module
        @param module_url: The url to the cookie module.
        @params: All necessary parameters to create a file.
        @return: A list of files that were created out of the module.
        """
        files = []

        new_file = File(
            module_id=module_id,
            section_name=section_name,
            module_name=module_name,
            content_filepath='/',
            content_filename=module_name,
            content_fileurl=module_url,
            content_filesize=0,
            content_timemodified=0,
            module_modname=module_modname,
            content_type='cookie_mod',
            content_isexternalfile=True,
            hash=None,
        )

        files.append(new_file)
        return files
Example #4
0
    def move_file(self, file: File, course_id: int, course_fullname: str):
        conn = sqlite3.connect(self.db_file)
        cursor = conn.cursor()

        data_new = {'course_id': course_id, 'course_fullname': course_fullname}
        data_new.update(file.getMap())

        if file.old_file is not None:
            # insert a new file,
            # but it is already notified because the same file already exists
            # as moved
            data_new.update(
                {'old_file_id': file.old_file.file_id, 'modified': 0, 'moved': 0, 'deleted': 0, 'notified': 1}
            )
            cursor.execute(File.INSERT, data_new)

            data_old = {'course_id': course_id, 'course_fullname': course_fullname}
            data_old.update(file.old_file.getMap())

            cursor.execute(
                """UPDATE files
            SET notified = 0, moved = 1
            WHERE file_id = :file_id;
            """,
                data_old,
            )
        else:
            # this should never happen, but the old file is not saved in the
            # file descriptor, so we need to inform about the new file
            # notified = 0
            data_new.update({'modified': 0, 'deleted': 0, 'moved': 0, 'notified': 0})
            cursor.execute(File.INSERT, data_new)

        conn.commit()
        conn.close()
Example #5
0
    def _find_all_urls_in_description(
        self,
        section_name: str,
        module_name: str,
        module_modname: str,
        module_id: str,
        content_filepath: str,
        description: str,
    ) -> [File]:
        """Parses a description to find all urls in it. Then it creates for every url a file entry.

        Args:
            section_name (str): The name of the course section
            module_name (str): Name of the Module
            module_modname (str): Type of the Module
            module_id (str): Module Id
            description (str): The descrption string

        Returns:
            [File]: A list of created file entries.
        """

        urls = list(set(re.findall(r'href=[\'"]?([^\'" >]+)', description)))
        urls += list(set(re.findall(r'src=[\'"]?([^\'" >]+)', description)))

        result = []
        original_module_modname = module_modname

        for url in urls:
            if url == '':
                continue

            module_modname = 'url-description-' + original_module_modname

            url_parts = urlparse.urlparse(url)
            if url_parts.hostname == self.moodle_domain and url_parts.path.find(
                    '/webservice/') >= 0:
                module_modname = 'index_mod-description-' + original_module_modname

            elif url_parts.hostname == self.moodle_domain:
                module_modname = 'cookie_mod-description-' + original_module_modname

            new_file = File(
                module_id=module_id,
                section_name=section_name,
                module_name=module_name,
                content_filepath=content_filepath,
                content_filename=url,
                content_fileurl=url,
                content_filesize=0,
                content_timemodified=0,
                module_modname=module_modname,
                content_type='description-url',
                content_isexternalfile=True,
                hash=None,
            )
            result.append(new_file)
        return result
    def new_file(self, file: File, course_id: int, course_fullname: str):
        # saves a file to index

        conn = sqlite3.connect(self.db_file)
        cursor = conn.cursor()

        data = {'course_id': course_id, 'course_fullname': course_fullname}
        data.update(file.getMap())

        data.update({'modified': 0, 'deleted': 0, 'moved': 0, 'notified': 0})

        cursor.execute(File.INSERT, data)

        conn.commit()
        conn.close()
    def delete_file(self, file: File, course_id: int, course_fullname: str):
        conn = sqlite3.connect(self.db_file)
        cursor = conn.cursor()

        data = {'course_id': course_id, 'course_fullname': course_fullname}
        data.update(file.getMap())

        cursor.execute(
            """UPDATE files
            SET notified = 0, deleted = 1, time_stamp = :time_stamp
            WHERE file_id = :file_id;
            """,
            data,
        )

        conn.commit()
        conn.close()
Example #8
0
    def get_old_files(self) -> [Course]:
        # get all stored files (that are not yet deleted)
        conn = sqlite3.connect(self.db_file)
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()
        stored_courses = []

        cursor.execute("""SELECT DISTINCT course_id, course_fullname
            FROM files WHERE old_file_id IS NOT NULL""")

        course_rows = cursor.fetchall()
        for course_row in course_rows:
            course = Course(course_row['course_id'],
                            course_row['course_fullname'])

            cursor.execute(
                """SELECT *
                FROM files
                WHERE course_id = ?
                AND old_file_id IS NOT NULL""",
                (course.id, ),
            )

            updated_files = cursor.fetchall()

            course.files = []

            for updated_file in updated_files:
                cursor.execute(
                    """SELECT *
                    FROM files
                    WHERE file_id = ?""",
                    (updated_file['old_file_id'], ),
                )

                old_file = cursor.fetchone()

                notify_file = File.fromRow(old_file)
                course.files.append(notify_file)

            stored_courses.append(course)

        conn.close()
        return stored_courses
Example #9
0
    def get_stored_files(self) -> [Course]:
        # get all stored files (that are not yet deleted)
        conn = sqlite3.connect(self.db_file)
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()
        stored_courses = []

        cursor.execute(
            """SELECT course_id, course_fullname
            FROM files WHERE deleted = 0 AND modified = 0 AND moved = 0
            GROUP BY course_id;"""
        )

        curse_rows = cursor.fetchall()

        for course_row in curse_rows:
            course = Course(course_row['course_id'], course_row['course_fullname'])

            cursor.execute(
                """SELECT *
                FROM files
                WHERE deleted = 0
                AND modified = 0
                AND moved = 0
                AND course_id = ?;""",
                (course.id,),
            )

            file_rows = cursor.fetchall()

            course.files = []

            for file_row in file_rows:
                notify_file = File.fromRow(file_row)
                course.files.append(notify_file)

            stored_courses.append(course)

        conn.close()
        return stored_courses
Example #10
0
    def _handle_files(self, section_name: str, module_name: str,
                      module_modname: str, module_id: str,
                      module_contents: []) -> [File]:
        """
        Iterates over all files that are in a module or assignment and
        returns a list of all files
        @param module_contents: The list of content of the module
                                or assignment.
        @params: All necessary parameters to create a file.
        @return: A list of files that exist in a module.
        """
        files = []
        for content in module_contents:
            content_type = content.get('type', '')
            content_filename = content.get('filename', '')
            content_filepath = content.get('filepath', '/')
            if content_filepath is None:
                content_filepath = '/'
            content_filesize = content.get('filesize', 0)
            content_fileurl = content.get('fileurl', '')
            content_timemodified = content.get('timemodified', 0)
            content_isexternalfile = content.get('isexternalfile', False)

            if content_fileurl == '' and module_modname.startswith(
                ('url', 'index_mod', 'cookie_mod')):
                continue

            if module_modname.startswith('index_mod'):
                content_filename = module_name

            hash_description = None
            if content_type == 'description':
                content_description = content.get('description', '')
                hashable_description = ResultsHandler._filter_changing_attributes(
                    content_description)
                m = hashlib.sha1()
                m.update(hashable_description.encode('utf-8'))
                hash_description = m.hexdigest()

            new_file = File(
                module_id=module_id,
                section_name=section_name,
                module_name=module_name,
                content_filepath=content_filepath,
                content_filename=content_filename,
                content_fileurl=content_fileurl,
                content_filesize=content_filesize,
                content_timemodified=content_timemodified,
                module_modname=module_modname,
                content_type=content_type,
                content_isexternalfile=content_isexternalfile,
                hash=hash_description,
            )

            if content_type == 'description':
                new_file.text_content = content_description
                files += self._find_all_urls_in_description(
                    section_name, module_name, module_modname, module_id,
                    content_filepath, content_description)

            files.append(new_file)
        return files
    def _handle_files(
        self,
        section_name: str,
        section_id: int,
        module_name: str,
        module_modname: str,
        module_id: str,
        module_contents: [],
    ) -> [File]:
        """
        Iterates over all files that are in a module or assignment and
        returns a list of all files
        @param module_contents: The list of content of the module
                                or assignment.
        @params: All necessary parameters to create a file.
        @return: A list of files that exist in a module.
        """
        files = []
        for content in module_contents:
            content_type = content.get('type', '')
            content_filename = content.get('filename', '')
            content_filepath = content.get('filepath', '/')
            if content_filepath is None:
                content_filepath = '/'
            content_filesize = content.get('filesize', 0)
            content_fileurl = content.get('fileurl', '')
            content_timemodified = content.get('timemodified', 0)
            content_isexternalfile = content.get('isexternalfile', False)

            # description related
            content_description = content.get('description', '')
            no_search_for_urls = content.get('no_search_for_urls', False)
            no_search_for_moodle_urls = content.get('no_search_for_moodle_urls', False)
            filter_urls_during_search_containing = content.get('filter_urls_during_search_containing', [])
            content_no_hash = content.get('no_hash', False)

            # html related
            content_html = content.get('html', '')

            if content_fileurl == '' and module_modname.startswith(('url', 'index_mod', 'cookie_mod')):
                continue

            # Add the extention condition to avoid renaming pdf files or other downloaded content from moodle pages.
            if module_modname.startswith('index_mod') and content_filename.endswith('.html'):
                content_filename = module_name

            hash_description = None
            if content_type == 'description' and not content_no_hash:
                hashable_description = ResultsHandler._filter_changing_attributes(content_description)
                m = hashlib.sha1()
                m.update(hashable_description.encode('utf-8'))
                hash_description = m.hexdigest()

            new_file = File(
                module_id=module_id,
                section_name=section_name,
                section_id=section_id,
                module_name=module_name,
                content_filepath=content_filepath,
                content_filename=content_filename,
                content_fileurl=content_fileurl,
                content_filesize=content_filesize,
                content_timemodified=content_timemodified,
                module_modname=module_modname,
                content_type=content_type,
                content_isexternalfile=content_isexternalfile,
                hash=hash_description,
            )

            if content_type == 'description':
                new_file.text_content = content_description
                content_html = content_description
            if content_type == 'html':
                new_file.html_content = content_html

            if content_type in ['description', 'html'] and not no_search_for_urls:
                files += self._find_all_urls(
                    section_name,
                    section_id,
                    module_name,
                    module_modname,
                    module_id,
                    content_filepath,
                    content_html,
                    no_search_for_moodle_urls,
                    filter_urls_during_search_containing,
                )

            files.append(new_file)
        return files
    def _find_all_urls(
        self,
        section_name: str,
        section_id: int,
        module_name: str,
        module_modname: str,
        module_id: str,
        content_filepath: str,
        content_html: str,
        no_search_for_moodle_urls: bool,
        filter_urls_containing: [str],
    ) -> [File]:
        """Parses a html string to find all urls in it. Then it creates for every url a file entry.

        Args:
            section_name (str): The name of the course section
            section_id (int): The id of the course section
            module_name (str): Name of the Module
            module_modname (str): Type of the Module
            module_id (str): Module Id
            content_html (str): The html string

        Returns:
            [File]: A list of created file entries.
        """

        urls = list(set(re.findall(r'href=[\'"]?([^\'" >]+)', content_html)))
        urls += list(set(re.findall(r'<a[^>]*>(http[^<]*)<\/a>', content_html)))
        urls += list(set(re.findall(r'src=[\'"]?([^\'" >]+)', content_html)))
        urls = list(set(urls))

        result = []
        original_module_modname = module_modname

        for url in urls:
            if url == '':
                continue

            # To avoid different encodings and quotes and so that youtube-dl downloads correctly
            # (See issues #96 and #103), we remove all encodings.
            url = html.unescape(url)
            url = urlparse.unquote(url)

            url_parts = urlparse.urlparse(url)
            if (
                url_parts.hostname == self.moodle_domain
                or url_parts.netloc == self.moodle_domain
                and no_search_for_moodle_urls
            ):
                # Skip if no moodle urls should be found
                continue

            for filter_str in filter_urls_containing:
                # Skip url if a filter matches
                if url.find(filter_str) >= 0:
                    continue

            if url_parts.hostname == self.moodle_domain and url_parts.path.find('/theme/image.php/') >= 0:
                url = re.sub(
                    r"\/theme\/image.php\/(\w+)\/(\w+)\/\d+\/",
                    r"/theme/image.php/\g<1>/\g<2>/-1/",
                    url,
                )

            module_modname = 'url-description-' + original_module_modname

            if url_parts.hostname == self.moodle_domain and url_parts.path.find('/webservice/') >= 0:
                module_modname = 'index_mod-description-' + original_module_modname

            elif url_parts.hostname == self.moodle_domain:
                module_modname = 'cookie_mod-description-' + original_module_modname

            fist_guess_filename = url
            if fist_guess_filename.startswith('data:image/'):
                file_extension_guess = 'png'
                if len(fist_guess_filename.split(';')) > 1:
                    if len(fist_guess_filename.split(';')[0].split('/')) > 1:
                        file_extension_guess = fist_guess_filename.split(';')[0].split('/')[1]

                fist_guess_filename = 'inline_image.' + file_extension_guess

            if len(fist_guess_filename) > 254:
                fist_guess_filename = fist_guess_filename[:254]

            new_file = File(
                module_id=module_id,
                section_name=section_name,
                section_id=section_id,
                module_name=module_name,
                content_filepath=content_filepath,
                content_filename=fist_guess_filename,
                content_fileurl=url,
                content_filesize=0,
                content_timemodified=0,
                module_modname=module_modname,
                content_type='description-url',
                content_isexternalfile=True,
                hash=None,
            )
            result.append(new_file)
        return result