def _handle_description(self, section_name: str, module_name: str, module_modname: str, module_id: str, module_description: str) -> [File]: """ Creates a description file @param module_description: The description of the module @params: All necessary parameters to create a file. @return: A list of files that exist in a module. """ files = [] content_type = 'description' content_filename = module_name content_filepath = '/' content_filesize = len(module_description) content_fileurl = '' content_timemodified = 0 content_isexternalfile = False m = hashlib.sha1() hashable_description = ResultsHandler._filter_changing_attributes( module_description) m.update(hashable_description.encode('utf-8')) hash_description = m.hexdigest() if module_modname.startswith(('url', 'index_mod')): module_modname = 'url_description' description = File( module_id=module_id, section_name=section_name, module_name=module_name, content_filepath=content_filepath, content_filename=content_filename, content_fileurl=content_fileurl, content_filesize=content_filesize, content_timemodified=content_timemodified, module_modname=module_modname, content_type=content_type, content_isexternalfile=content_isexternalfile, hash=hash_description, ) description.text_content = module_description files += self._find_all_urls_in_description(section_name, module_name, module_modname, module_id, content_filepath, module_description) files.append(description) return files
def changes_to_notify(self) -> [Course]: changed_courses = [] conn = sqlite3.connect(self.db_file) conn.row_factory = sqlite3.Row cursor = conn.cursor() cursor.execute( """SELECT course_id, course_fullname FROM files WHERE notified = 0 GROUP BY course_id;""" ) curse_rows = cursor.fetchall() for course_row in curse_rows: course = Course(course_row['course_id'], course_row['course_fullname']) cursor.execute( """SELECT * FROM files WHERE notified = 0 AND course_id = ?;""", (course.id,), ) file_rows = cursor.fetchall() course.files = [] for file_row in file_rows: notify_file = File.fromRow(file_row) if notify_file.modified or notify_file.moved: # add reference to new file cursor.execute( """SELECT * FROM files WHERE old_file_id = ?;""", (notify_file.file_id,), ) file_row = cursor.fetchone() if file_row is not None: notify_file.new_file = File.fromRow(file_row) course.files.append(notify_file) changed_courses.append(course) conn.close() return changed_courses
def _handle_cookie_mod(self, section_name: str, module_name: str, module_modname: str, module_id: str, module_url: str) -> [File]: """ Creates a list of files out of a cookie module @param module_url: The url to the cookie module. @params: All necessary parameters to create a file. @return: A list of files that were created out of the module. """ files = [] new_file = File( module_id=module_id, section_name=section_name, module_name=module_name, content_filepath='/', content_filename=module_name, content_fileurl=module_url, content_filesize=0, content_timemodified=0, module_modname=module_modname, content_type='cookie_mod', content_isexternalfile=True, hash=None, ) files.append(new_file) return files
def move_file(self, file: File, course_id: int, course_fullname: str): conn = sqlite3.connect(self.db_file) cursor = conn.cursor() data_new = {'course_id': course_id, 'course_fullname': course_fullname} data_new.update(file.getMap()) if file.old_file is not None: # insert a new file, # but it is already notified because the same file already exists # as moved data_new.update( {'old_file_id': file.old_file.file_id, 'modified': 0, 'moved': 0, 'deleted': 0, 'notified': 1} ) cursor.execute(File.INSERT, data_new) data_old = {'course_id': course_id, 'course_fullname': course_fullname} data_old.update(file.old_file.getMap()) cursor.execute( """UPDATE files SET notified = 0, moved = 1 WHERE file_id = :file_id; """, data_old, ) else: # this should never happen, but the old file is not saved in the # file descriptor, so we need to inform about the new file # notified = 0 data_new.update({'modified': 0, 'deleted': 0, 'moved': 0, 'notified': 0}) cursor.execute(File.INSERT, data_new) conn.commit() conn.close()
def _find_all_urls_in_description( self, section_name: str, module_name: str, module_modname: str, module_id: str, content_filepath: str, description: str, ) -> [File]: """Parses a description to find all urls in it. Then it creates for every url a file entry. Args: section_name (str): The name of the course section module_name (str): Name of the Module module_modname (str): Type of the Module module_id (str): Module Id description (str): The descrption string Returns: [File]: A list of created file entries. """ urls = list(set(re.findall(r'href=[\'"]?([^\'" >]+)', description))) urls += list(set(re.findall(r'src=[\'"]?([^\'" >]+)', description))) result = [] original_module_modname = module_modname for url in urls: if url == '': continue module_modname = 'url-description-' + original_module_modname url_parts = urlparse.urlparse(url) if url_parts.hostname == self.moodle_domain and url_parts.path.find( '/webservice/') >= 0: module_modname = 'index_mod-description-' + original_module_modname elif url_parts.hostname == self.moodle_domain: module_modname = 'cookie_mod-description-' + original_module_modname new_file = File( module_id=module_id, section_name=section_name, module_name=module_name, content_filepath=content_filepath, content_filename=url, content_fileurl=url, content_filesize=0, content_timemodified=0, module_modname=module_modname, content_type='description-url', content_isexternalfile=True, hash=None, ) result.append(new_file) return result
def new_file(self, file: File, course_id: int, course_fullname: str): # saves a file to index conn = sqlite3.connect(self.db_file) cursor = conn.cursor() data = {'course_id': course_id, 'course_fullname': course_fullname} data.update(file.getMap()) data.update({'modified': 0, 'deleted': 0, 'moved': 0, 'notified': 0}) cursor.execute(File.INSERT, data) conn.commit() conn.close()
def delete_file(self, file: File, course_id: int, course_fullname: str): conn = sqlite3.connect(self.db_file) cursor = conn.cursor() data = {'course_id': course_id, 'course_fullname': course_fullname} data.update(file.getMap()) cursor.execute( """UPDATE files SET notified = 0, deleted = 1, time_stamp = :time_stamp WHERE file_id = :file_id; """, data, ) conn.commit() conn.close()
def get_old_files(self) -> [Course]: # get all stored files (that are not yet deleted) conn = sqlite3.connect(self.db_file) conn.row_factory = sqlite3.Row cursor = conn.cursor() stored_courses = [] cursor.execute("""SELECT DISTINCT course_id, course_fullname FROM files WHERE old_file_id IS NOT NULL""") course_rows = cursor.fetchall() for course_row in course_rows: course = Course(course_row['course_id'], course_row['course_fullname']) cursor.execute( """SELECT * FROM files WHERE course_id = ? AND old_file_id IS NOT NULL""", (course.id, ), ) updated_files = cursor.fetchall() course.files = [] for updated_file in updated_files: cursor.execute( """SELECT * FROM files WHERE file_id = ?""", (updated_file['old_file_id'], ), ) old_file = cursor.fetchone() notify_file = File.fromRow(old_file) course.files.append(notify_file) stored_courses.append(course) conn.close() return stored_courses
def get_stored_files(self) -> [Course]: # get all stored files (that are not yet deleted) conn = sqlite3.connect(self.db_file) conn.row_factory = sqlite3.Row cursor = conn.cursor() stored_courses = [] cursor.execute( """SELECT course_id, course_fullname FROM files WHERE deleted = 0 AND modified = 0 AND moved = 0 GROUP BY course_id;""" ) curse_rows = cursor.fetchall() for course_row in curse_rows: course = Course(course_row['course_id'], course_row['course_fullname']) cursor.execute( """SELECT * FROM files WHERE deleted = 0 AND modified = 0 AND moved = 0 AND course_id = ?;""", (course.id,), ) file_rows = cursor.fetchall() course.files = [] for file_row in file_rows: notify_file = File.fromRow(file_row) course.files.append(notify_file) stored_courses.append(course) conn.close() return stored_courses
def _handle_files(self, section_name: str, module_name: str, module_modname: str, module_id: str, module_contents: []) -> [File]: """ Iterates over all files that are in a module or assignment and returns a list of all files @param module_contents: The list of content of the module or assignment. @params: All necessary parameters to create a file. @return: A list of files that exist in a module. """ files = [] for content in module_contents: content_type = content.get('type', '') content_filename = content.get('filename', '') content_filepath = content.get('filepath', '/') if content_filepath is None: content_filepath = '/' content_filesize = content.get('filesize', 0) content_fileurl = content.get('fileurl', '') content_timemodified = content.get('timemodified', 0) content_isexternalfile = content.get('isexternalfile', False) if content_fileurl == '' and module_modname.startswith( ('url', 'index_mod', 'cookie_mod')): continue if module_modname.startswith('index_mod'): content_filename = module_name hash_description = None if content_type == 'description': content_description = content.get('description', '') hashable_description = ResultsHandler._filter_changing_attributes( content_description) m = hashlib.sha1() m.update(hashable_description.encode('utf-8')) hash_description = m.hexdigest() new_file = File( module_id=module_id, section_name=section_name, module_name=module_name, content_filepath=content_filepath, content_filename=content_filename, content_fileurl=content_fileurl, content_filesize=content_filesize, content_timemodified=content_timemodified, module_modname=module_modname, content_type=content_type, content_isexternalfile=content_isexternalfile, hash=hash_description, ) if content_type == 'description': new_file.text_content = content_description files += self._find_all_urls_in_description( section_name, module_name, module_modname, module_id, content_filepath, content_description) files.append(new_file) return files
def _handle_files( self, section_name: str, section_id: int, module_name: str, module_modname: str, module_id: str, module_contents: [], ) -> [File]: """ Iterates over all files that are in a module or assignment and returns a list of all files @param module_contents: The list of content of the module or assignment. @params: All necessary parameters to create a file. @return: A list of files that exist in a module. """ files = [] for content in module_contents: content_type = content.get('type', '') content_filename = content.get('filename', '') content_filepath = content.get('filepath', '/') if content_filepath is None: content_filepath = '/' content_filesize = content.get('filesize', 0) content_fileurl = content.get('fileurl', '') content_timemodified = content.get('timemodified', 0) content_isexternalfile = content.get('isexternalfile', False) # description related content_description = content.get('description', '') no_search_for_urls = content.get('no_search_for_urls', False) no_search_for_moodle_urls = content.get('no_search_for_moodle_urls', False) filter_urls_during_search_containing = content.get('filter_urls_during_search_containing', []) content_no_hash = content.get('no_hash', False) # html related content_html = content.get('html', '') if content_fileurl == '' and module_modname.startswith(('url', 'index_mod', 'cookie_mod')): continue # Add the extention condition to avoid renaming pdf files or other downloaded content from moodle pages. if module_modname.startswith('index_mod') and content_filename.endswith('.html'): content_filename = module_name hash_description = None if content_type == 'description' and not content_no_hash: hashable_description = ResultsHandler._filter_changing_attributes(content_description) m = hashlib.sha1() m.update(hashable_description.encode('utf-8')) hash_description = m.hexdigest() new_file = File( module_id=module_id, section_name=section_name, section_id=section_id, module_name=module_name, content_filepath=content_filepath, content_filename=content_filename, content_fileurl=content_fileurl, content_filesize=content_filesize, content_timemodified=content_timemodified, module_modname=module_modname, content_type=content_type, content_isexternalfile=content_isexternalfile, hash=hash_description, ) if content_type == 'description': new_file.text_content = content_description content_html = content_description if content_type == 'html': new_file.html_content = content_html if content_type in ['description', 'html'] and not no_search_for_urls: files += self._find_all_urls( section_name, section_id, module_name, module_modname, module_id, content_filepath, content_html, no_search_for_moodle_urls, filter_urls_during_search_containing, ) files.append(new_file) return files
def _find_all_urls( self, section_name: str, section_id: int, module_name: str, module_modname: str, module_id: str, content_filepath: str, content_html: str, no_search_for_moodle_urls: bool, filter_urls_containing: [str], ) -> [File]: """Parses a html string to find all urls in it. Then it creates for every url a file entry. Args: section_name (str): The name of the course section section_id (int): The id of the course section module_name (str): Name of the Module module_modname (str): Type of the Module module_id (str): Module Id content_html (str): The html string Returns: [File]: A list of created file entries. """ urls = list(set(re.findall(r'href=[\'"]?([^\'" >]+)', content_html))) urls += list(set(re.findall(r'<a[^>]*>(http[^<]*)<\/a>', content_html))) urls += list(set(re.findall(r'src=[\'"]?([^\'" >]+)', content_html))) urls = list(set(urls)) result = [] original_module_modname = module_modname for url in urls: if url == '': continue # To avoid different encodings and quotes and so that youtube-dl downloads correctly # (See issues #96 and #103), we remove all encodings. url = html.unescape(url) url = urlparse.unquote(url) url_parts = urlparse.urlparse(url) if ( url_parts.hostname == self.moodle_domain or url_parts.netloc == self.moodle_domain and no_search_for_moodle_urls ): # Skip if no moodle urls should be found continue for filter_str in filter_urls_containing: # Skip url if a filter matches if url.find(filter_str) >= 0: continue if url_parts.hostname == self.moodle_domain and url_parts.path.find('/theme/image.php/') >= 0: url = re.sub( r"\/theme\/image.php\/(\w+)\/(\w+)\/\d+\/", r"/theme/image.php/\g<1>/\g<2>/-1/", url, ) module_modname = 'url-description-' + original_module_modname if url_parts.hostname == self.moodle_domain and url_parts.path.find('/webservice/') >= 0: module_modname = 'index_mod-description-' + original_module_modname elif url_parts.hostname == self.moodle_domain: module_modname = 'cookie_mod-description-' + original_module_modname fist_guess_filename = url if fist_guess_filename.startswith('data:image/'): file_extension_guess = 'png' if len(fist_guess_filename.split(';')) > 1: if len(fist_guess_filename.split(';')[0].split('/')) > 1: file_extension_guess = fist_guess_filename.split(';')[0].split('/')[1] fist_guess_filename = 'inline_image.' + file_extension_guess if len(fist_guess_filename) > 254: fist_guess_filename = fist_guess_filename[:254] new_file = File( module_id=module_id, section_name=section_name, section_id=section_id, module_name=module_name, content_filepath=content_filepath, content_filename=fist_guess_filename, content_fileurl=url, content_filesize=0, content_timemodified=0, module_modname=module_modname, content_type='description-url', content_isexternalfile=True, hash=None, ) result.append(new_file) return result