def rename_slide(old, new, root=None, relink=True, verbose=False): THUMB_NAME = "-thumb.jpg" FULL_NAME = "-full.jpg" if root is None: root = '.' # current directory if not os.path.exists(old): raise IOError("%s does not exist" % old) if not is_slide(old): raise TypeError("%s is not a valid slide" % old) old_slide_name = os.path.basename(old) new_slide_name = os.path.basename(new) inner_file, slide_type = parse_slide(old) #relink first so we don't have to worry about adjusting file paths if relink: renamer = mv_refs(old_slide_name, new_slide_name) parse_folder(root, actions=[renamer], verbose=verbose) #rename inner file inner_pieces = os.path.splitext(inner_file) new_inner_file = os.path.join(old,new_slide_name+inner_pieces[1]) safe_rename(inner_file, new_inner_file) #rename -thumb.jpg if it exists old_thumb = os.path.join(old,old_slide_name+THUMB_NAME) new_thumb = os.path.join(old,new_slide_name+THUMB_NAME) if os.path.exists(old_thumb): safe_rename(old_thumb,new_thumb) #rename -full.jpg if it exists old_full = os.path.join(old,old_slide_name+FULL_NAME) new_full = os.path.join(old,new_slide_name+FULL_NAME) if os.path.exists(old_full): safe_rename(old_full,new_full) #rename outer parent folder safe_rename(old, new)
def parse_slide_folders(path): # filter all file paths for only folders that contain a file with the same name directly inside filepaths = [] dirs = set([]) base_depth = path.count(os.sep) CUTOFF = 1 # only go one directory deep for root, dirnames, filenames in os.walk(path): if root.count(os.sep) - base_depth <= CUTOFF: parsed = parse_slide(root) if parsed is not None: slide_filename = parsed[0] filepaths.append(os.path.split(slide_filename)) dirs.add(root) # for filename in filenames: # parent_path, parent_name = os.path.split(root) # if is_slide(filename): # filepaths.append((root, filename)) # dirs.add(root) return filepaths, list(dirs)
def parse_meta(filename): slide_file = parse_slide(filename) if slide_file is None: return None with ZipFile(filename, 'r') as z: with(z.open(slide_file[0])) as f: slide_type = slide_file[1] title_string = None description_string = None if slide_type == ".htm" or slide_type == ".html": soup = BeautifulSoup(f.read(), "lxml") title = soup.find('meta', {'name':'veeva_title'}) if title is not None: title_string = title.get('content', None) description = soup.find('meta', {'name':'veeva_description'}) if description is not None: description_string = description.get('content', None) if slide_type == ".pdf": doc = PDFDocument() parser = PDFParser(f) # omfg this is so janky, there needs to be a better library parser.set_document(doc) doc.set_parser(parser) metadata = doc.info if len(metadata) > 0: latest = metadata[-1] try: if latest['Title'] != '': title_string = latest['Title'] except KeyError: title_string = None try: if latest['Subject'] != '': description_string = latest['Subject'] except KeyError: description_string = None if slide_type == ".jpg" or slide_type == ".jpeg": tmp_file_name = str(uuid.uuid1()) + ".jpg" with open(tmp_file_name, 'wb') as tf: tf.write(f.read()) xmpfile = XMPFiles(file_path=tmp_file_name) xmp = xmpfile.get_xmp() xmpfile.close_file() try: title_string = xmp.get_localized_text(consts.XMP_NS_DC, 'title', None, 'x-default') except XMPError: title_string = None try: description_string = xmp.get_localized_text(consts.XMP_NS_DC, 'description', None, 'x-default') except XMPError: description_string = None os.remove(tmp_file_name) return {"filename": filename, "veeva_title": title_string, "veeva_description": description_string}
def parse_meta(filename, htmlonly=False): slide_file = parse_slide(filename) if slide_file is None: return { 'filename': os.path.basename(filename), 'veeva_title': os.path.splitext(os.path.basename(filename))[0], 'veeva_description': os.path.splitext(os.path.basename(filename))[0] } with ZipFile(filename, 'r') as z: with (z.open(slide_file[0])) as f: slide_type = slide_file[1] title_string = None description_string = None if slide_type == ".htm" or slide_type == ".html": soup = BeautifulSoup(f.read(), "lxml") title = soup.find('meta', {'name': 'veeva_title'}) if title is not None: title_string = title.get('content', None) description = soup.find('meta', {'name': 'veeva_description'}) if description is not None: description_string = description.get('content', None) if slide_type == ".pdf" and not htmlonly: doc = PDFDocument() parser = PDFParser(f) # omfg this is so janky, there needs to be a better library parser.set_document(doc) doc.set_parser(parser) metadata = doc.info if len(metadata) > 0: latest = metadata[-1] try: if latest['Title'] != '': title_string = latest['Title'] except KeyError: title_string = None try: if latest['Subject'] != '': description_string = latest['Subject'] except KeyError: description_string = None if slide_type == ".jpg" or slide_type == ".jpeg" and not htmlonly: tmp_file_name = str(uuid.uuid1()) + ".jpg" with open(tmp_file_name, 'wb') as tf: tf.write(f.read()) xmpfile = XMPFiles(file_path=tmp_file_name) xmp = xmpfile.get_xmp() xmpfile.close_file() try: title_string = xmp.get_localized_text( consts.XMP_NS_DC, 'title', None, 'x-default') except XMPError: title_string = None try: description_string = xmp.get_localized_text( consts.XMP_NS_DC, 'description', None, 'x-default') except XMPError: description_string = None os.remove(tmp_file_name) return { "filename": filename, "veeva_title": title_string, "veeva_description": description_string }