def get_img(soup, fileDict, uc): """ Returns fileDict updated with addresses of D2L-created image files, like icons and background images, used in the formatting of a single presentation page. fileDict['formatImg'] list will be updated. Parameters: soup: a BeautifulSoup object created from a presentation page fileDict: dict of all files linked to in a presentation """ for img in soup.findAll('img'): if img['src'].find('d2lFile') < 0: img = DOMAIN + img['src'] if img not in fileDict['imgUrls']: fileDict['imgUrls'].append(img) else: address = DOMAIN + img['src'] epoId = get_epo_id(img['src']) if epoId not in fileDict['fileIds']: fileDict['fileIds'].append(epoId) fileDict['fileUrls'].append(address) fileName = eportfolio.get_ep_object_properties(uc, epoId).\ FileName.strip() fileDict['fileNames'].append(fileName) return fileDict
def download_presentation(epObject, uc): """ Creates and populates a fileDict and downloads files it references. Creates a directory named after the presentation containing individual folders for each type of file downloaded, as outlined below: Presentation (includes index.html) |___Pages (HTML files) |___Content (user images, docs, and other files) |___Formatting (css and image files for layout and formatting) Returns the fileDict. Parameters: fileDict: dict of all files linked to in a presentation """ fileDict = make_file_dict() fileDict = populate_file_dict(epObject, uc, fileDict) now = str(datetime.datetime.now().hour) + \ str(datetime.datetime.now().minute) + \ str(datetime.datetime.now().second) directoryName = epObject.Name.replace(" ", "") + "_presentation_" + now os.mkdir(directoryName) os.chdir(directoryName) temp = tempfile.TemporaryFile() temp.write(urllib.request.urlopen(fileDict['pageUrls'][0]).read()) temp.seek(0) update_page(temp, fileDict, "index.html", index=True) temp.close() os.mkdir("Pages") os.chdir("Pages") for (pageUrl, pageFileName) in zip(fileDict['pageUrls'][1:], fileDict['pageFileNames'][1:]): temp = tempfile.TemporaryFile() temp.write(urllib.request.urlopen(pageUrl).read()) update_page(temp, fileDict, pageFileName) temp.close() os.chdir("../") os.mkdir("Content") os.chdir("Content") for (fileUrl, fileId) in zip(fileDict['fileUrls'], fileDict['fileIds']): fileName = eportfolio.get_ep_object_properties(uc, fileId).\ FileName.strip() urllib.request.urlretrieve(fileUrl, fileName) os.chdir("../") os.mkdir("Formatting") os.chdir("Formatting") for (cssUrl, cssFileName) in zip(fileDict['cssUrls'], fileDict['cssFileNames']): temp = tempfile.TemporaryFile() temp.write(urllib.request.urlopen(cssUrl).read()) temp.seek(0) update_css_file(cssUrl, temp, cssFileName) temp.close() for imgUrl in fileDict['imgUrls']: fileName = imgUrl[imgUrl.rfind("/"): ] if fileName.find("?") > 0: fileName = fileName[: fileName.find("?")] urllib.request.urlretrieve(imgUrl, fileName) os.chdir("../") print(str(fileDict)) return fileDict
def get_embedded_object(soup, fileDict, uc): """ Returns fileDict updated with ePortfolio object IDs of objects embedded in the presentation. fileDict['files'] list will be populated with objects linked within a single presentation page. Parameters: soup: a BeautifulSoup object created from a presentation page fileDict: dict of all files linked to in a presentation """ for a in soup.find_all('a'): href = str(a['href']) if href.find('d2lfile') > 0: epoId = get_epo_id(href) if epoId not in fileDict['fileIds']: fileDict['fileIds'].append(epoId) fileDict['fileUrls'].append(DOMAIN + href) fileName = eportfolio.get_ep_object_properties(uc, epoId).\ FileName.strip() fileDict['fileNames'].append(fileName) return fileDict