def __init__(self, json_data, begin, end, blog_info, directory): Saver.__init__(self, json_data, os.path.join( directory, QzonePath.BLOG), purge_file_name(blog_info.category)) self._filename = "%s_%s_%05d-%05d.json" % ( blog_info.title, blog_info.blog_id, begin, end - 1) self._filename = purge_file_name(self._filename)
def download_media(url, directory, filename, has_extension=False, *args, **kwargs): result = False s = "\rdownloading " + url + " -> %05.2f%% " chunk_size = 1024 << 4 with directory_lock: if not os.path.exists(directory): os.makedirs(directory) lock = None with dict_lock: lock = _locks.setdefault(url, Lock()) with lock: with requests.get(url, stream=True) as r: total_len = math.inf if "content-length" in r.headers: total_len = int(r.headers["content-length"]) current_len = 0 filename = purge_file_name(filename) if not has_extension: extension = "jpg" if "content-type" in r.headers: content_type = r.headers["content-type"] extension = content_type[content_type.find("/") + 1:] filename = "%s.%s" % (filename, extension) filename = os.path.join(directory, filename) if not os.path.exists(filename): with open(filename, "wb") as f: for data in r.iter_content(chunk_size): f.write(data) current_len += len(data) percent = 100 * current_len / total_len print(s % (percent), end="") result = True print("\n%s is downloaded" % url) return result
def download_media(url, dir, file_name, otime): s = "\rdownloading %s -> %05.2f%% " chunk_size = 1024 if otime: exif_ifd = {piexif.ExifIFD.DateTimeOriginal: otime} exif_dict = {"Exif": exif_ifd} exif_bytes = piexif.dump(exif_dict) with closing(requests.get(url, stream=True)) as r: extension = "jpg" if "content-type" in r.headers: content_type = r.headers["content-type"] extension = content_type[content_type.find("/") + 1:] total_len = math.inf if "content-length" in r.headers: total_len = int(r.headers["content-length"]) current_len = 0 file_name = purge_file_name(file_name) with _lock: if not os.path.exists(dir): os.makedirs(dir) file_name = os.path.join(dir, "%s.%s" % (file_name, extension)) with open(file_name, "wb") as f: for data in r.iter_content(chunk_size): f.write(data) current_len += len(data) percent = 100 * current_len / total_len print(s % (url, percent), end="") if otime: piexif.insert(exif_bytes, file_name) print("\n%s is downloaded" % url)
def download_media(url, dir, file_name): result = False s = "\rdownloading %s -> %05.2f%% " chunk_size = 1024 with requests.get(url, stream=True, timeout=30) as r: extension = "jpg" if "content-type" in r.headers: content_type = r.headers["content-type"] extension = content_type[content_type.find("/") + 1:] total_len = math.inf if "content-length" in r.headers: total_len = int(r.headers["content-length"]) current_len = 0 file_name = purge_file_name(file_name) with _lock: if not os.path.exists(dir): os.makedirs(dir) file_name = os.path.join(dir, "%s.%s" % (file_name, extension)) with open(file_name, "wb") as f: for data in r.iter_content(chunk_size): f.write(data) current_len += len(data) percent = 100 * current_len / total_len print(s % (url, percent), end="") result = True print("\n%s is downloaded" % url) return result
def get_photo_comment_media_list(json_data, uin, album_name, album_id, download_if_not_exist): '''获取相册评论中的资源列表 ''' album_dir = purge_file_name("%s_%s" % (album_name, album_id)) directory = os.path.join(uin, QzonePath.PHOTO, album_dir, QzonePath.DOWNLOAD) downloaded_file = os.path.join( uin, QzonePath.PHOTO, QzoneFileName.DOWNLOADED) return get_media_list(json_data, directory, downloaded_file, download_if_not_exist, photo=True)
def get_photo_url(url, uin, album_name, album_id, download_if_not_exist=False, comment=False): '''获取照片的 url ''' album_dir = purge_file_name("%s_%s" % (album_name, album_id)) photo_download_dir = os.path.join(uin, QzonePath.PHOTO, album_dir, QzonePath.DOWNLOAD) downloaded_file = os.path.join( uin, QzonePath.PHOTO, QzoneFileName.DOWNLOADED) u = get_url(url, photo_download_dir, url, False, download_if_not_exist, downloaded_file, photo=True, comment=False) return u
def __init__(self, directory, blog_info, html_content, read_num=0): self._html_content = html_content blog_path = os.path.join( directory, QzonePath.BLOG, purge_file_name(blog_info.category)) if not os.path.exists(blog_path): os.makedirs(blog_path) filename = blog_info.get_file_name() self._blog_filename = os.path.join(blog_path, filename) self._blog_info = blog_info self._read = read_num self._bs_obj = BeautifulSoup(self._html_content, "html.parser")
def get_file_name(self): return purge_file_name("%s_%s.html" % (self._title, self._blog_id))
def purge_file(file): return purge_file_name(file)
def batch(self, should_rename=True, should_add_exif=True): # re constants p_date = re.compile( r"(\d{4})(0[1-9]|1[0-2])(0[1-9]|[1-2]\d|3[0-1])[ _]([0-1]\d|2[0-3])([0-5]\d)([0-5]\d)" ) p_floatview_json = re.compile(r"^floatview_photo_\d{5}-\d{5}.json$") p_raw_json = re.compile(r"^photo_\d{5}-\d{5}.json$") target_dir = os.path.join(os.getcwd(), self.target_uin, "photo") if not os.path.exists(target_dir): print("路径不存在,请确认照片已下载,并在本文件尾部添加目标QQ号") album_info_dir = os.path.join(target_dir, "album_info.json") with open(album_info_dir, "r", encoding="utf-8") as album_info_f: album_info = json.load(album_info_f) for album in album_info["data"]["albumListModeSort"]: album_dir = "" files_in_target_dir = os.listdir(target_dir) album_id_purged = purge_file_name(album["id"]) # find album fold for file_name_in_target_dir in files_in_target_dir: if album_id_purged in file_name_in_target_dir: album_dir = os.path.join(target_dir, file_name_in_target_dir) # rename album fold if should_rename: if not re.search(p_date, file_name_in_target_dir): album_create_timestamp = int( album["createtime"]) # 取相册创建时间 album_create_date = time.strftime( '%Y%m%d %H%M%S', time.localtime(album_create_timestamp)) file_name_in_target_dir_new = album_create_date + " " + file_name_in_target_dir album_dir_new = os.path.join( target_dir, file_name_in_target_dir_new) os.rename(album_dir, album_dir_new) album_dir = album_dir_new break if album_dir == "": print("相册文件夹缺失:", os.path.join(target_dir, album["name"])) continue # find floatview and raw json (500+ json文件会分裂。。) files_in_album_dir = os.listdir(album_dir) floatview_json_dir_list = [] raw_json_dir_list = [] for file_name_in_album_dir in files_in_album_dir: if re.search(p_floatview_json, file_name_in_album_dir): floatview_json_dir_list.append( os.path.join(album_dir, file_name_in_album_dir)) elif re.search(p_raw_json, file_name_in_album_dir): raw_json_dir_list.append( os.path.join(album_dir, file_name_in_album_dir)) floatview_list = [] raw_list = [] for floatview_json_dir in floatview_json_dir_list: with open(floatview_json_dir, "r", encoding="utf-8") as floatview_json_f: floatview_json = json.load(floatview_json_f) for _floatview_info in floatview_json["data"]["photos"]: floatview_list.append(_floatview_info) for raw_json_dir in raw_json_dir_list: with open(raw_json_dir, "r", encoding="utf-8") as raw_json_f: raw_json = json.load(raw_json_f) for _raw_info in raw_json["data"]["photoList"]: raw_list.append(_raw_info) # floatview_info downloaded_dir = os.path.join(album_dir, "downloaded") photos_in_album_downloaded_dir = os.listdir(downloaded_dir) for floatview_info in floatview_list: lloc = floatview_info["lloc"] # find raw_info raw_info = None for _raw_info in raw_list: if _raw_info["lloc"] == lloc: raw_info = _raw_info break # find photo_dir photo_dir = "" lloc_purged = purge_file_name(lloc) for photo_name in photos_in_album_downloaded_dir: if lloc_purged in photo_name: photo_dir = os.path.join(downloaded_dir, photo_name) break if photo_dir != "": if should_add_exif: photoExifRecover = PhotoExifRecover( photo_dir, floatview_info, raw_info) photoExifRecover.recover() # rename photo if should_rename: [dir_name, photo_name] = os.path.split(photo_dir) if not re.search(p_date, photo_name): exif_in_file = piexif.load(photo_dir) if "Exif" in exif_in_file.keys( ) and piexif.ExifIFD.DateTimeOriginal in exif_in_file[ "Exif"].keys() and exif_in_file["Exif"][ piexif.ExifIFD.DateTimeOriginal]: photo_create_date = bytes.decode( exif_in_file["Exif"] [piexif.ExifIFD.DateTimeOriginal]).replace( ":", "") photo_name_new = photo_create_date + " " + photo_name photo_dir_new = os.path.join( dir_name, photo_name_new) os.rename(photo_dir, photo_dir_new) photoExifRecover.file_dir = photo_dir_new photo_dir = photo_dir_new else: print("照片缺失:", os.path.join(downloaded_dir, lloc_purged))
def __init__(self, json_data): self._json_data = json_data self._album_dir = purge_file_name("%s_%s" % (json_data["name"], json_data["id"]))
def batch(self, should_rename=True, should_add_exif=True): # re constants p_date = re.compile( r"(\d{4})(0[1-9]|1[0-2])(0[1-9]|[1-2]\d|3[0-1])[ _]([0-1]\d|2[0-3])([0-5]\d)([0-5]\d)" ) p_floatview_json = re.compile(r"^floatview_photo_\d{5}-\d{5}.json$") p_raw_json = re.compile(r"^photo_\d{5}-\d{5}.json$") target_dir = os.path.join(os.getcwd(), self.target_uin, "photo") if not os.path.exists(target_dir): print("路径不存在,请确认照片已下载,并在本文件尾部添加目标QQ号") album_info_dir = os.path.join(target_dir, "album_info.json") with open(album_info_dir, "r", encoding="utf-8") as album_info_f: album_info = json.load(album_info_f) # No album at all! if album_info["data"]["albumListModeSort"] is None: print("【json记录中无相册!】") return for album in album_info["data"]["albumListModeSort"]: album_dir = "" files_in_target_dir = os.listdir(target_dir) album_id_purged = purge_file_name(album["id"]) # find album folder for file_name_in_target_dir in files_in_target_dir: if album_id_purged in file_name_in_target_dir: album_dir = os.path.join(target_dir, file_name_in_target_dir) # rename album fold if should_rename: if not re.search(p_date, file_name_in_target_dir): album_create_timestamp = int( album["createtime"]) # 取相册创建时间 album_create_date = time.strftime( '%Y%m%d %H%M%S', time.localtime(album_create_timestamp)) file_name_in_target_dir_new = album_create_date + " " + file_name_in_target_dir album_dir_new = os.path.join( target_dir, file_name_in_target_dir_new) os.rename(album_dir, album_dir_new) album_dir = album_dir_new break if album_dir == "": print("相册文件夹缺失:", os.path.join(target_dir, album["name"])) continue # find floatview and raw json (500+ json文件会分裂。。) files_in_album_dir = os.listdir(album_dir) floatview_json_dir_list = [] raw_json_dir_list = [] for file_name_in_album_dir in files_in_album_dir: if re.search(p_floatview_json, file_name_in_album_dir): floatview_json_dir_list.append( os.path.join(album_dir, file_name_in_album_dir)) elif re.search(p_raw_json, file_name_in_album_dir): raw_json_dir_list.append( os.path.join(album_dir, file_name_in_album_dir)) # floatview or raw json is missing! if len(floatview_json_dir_list) == 0 or len( raw_json_dir_list) == 0: print("【相册中照片json数据缺失】:", album_dir) continue floatview_list = [] raw_list = [] for floatview_json_dir in floatview_json_dir_list: with open(floatview_json_dir, "r", encoding="utf-8") as floatview_json_f: floatview_json = json.load(floatview_json_f) for _floatview_info in floatview_json["data"]["photos"]: floatview_list.append(_floatview_info) for raw_json_dir in raw_json_dir_list: with open(raw_json_dir, "r", encoding="utf-8") as raw_json_f: raw_json = json.load(raw_json_f) for _raw_info in raw_json["data"]["photoList"]: raw_list.append(_raw_info) # find downloaded folder and file list within downloaded_dir = os.path.join(album_dir, "downloaded") # downloaded folder is missing! if not os.path.exists(downloaded_dir): print("【无downloaded文件夹】:", downloaded_dir) continue photos_in_album_downloaded_dir = os.listdir(downloaded_dir) # start to handle every photo within an album # floatview_info for floatview_info in floatview_list: lloc = floatview_info["lloc"] # find raw_info raw_info = None for _raw_info in raw_list: if _raw_info["lloc"] == lloc: raw_info = _raw_info break # find photo_dir photo_dir = "" lloc_purged = purge_file_name(lloc) for photo_name in photos_in_album_downloaded_dir: if lloc_purged in photo_name: photo_dir = os.path.join(downloaded_dir, photo_name) break if photo_dir == "": print("照片缺失:", os.path.join(downloaded_dir, lloc_purged)) continue # recover EXIF if should_add_exif: try: photoExifRecover = PhotoExifRecover( photo_dir, floatview_info, raw_info) photoExifRecover.recover() except Exception as e: error_message = "EXIF写入失败:" + photo_dir + "\n↘失败原因:" + str( e) print(error_message) self.e.append(error_message) continue # 对于EXIF写入发生异常的文件跳过重命名步骤 # rename photo if should_rename: [dir_name, photo_name] = os.path.split(photo_dir) if not re.search(p_date, photo_name): exif_in_file = piexif.load(photo_dir) if "Exif" in exif_in_file.keys() \ and piexif.ExifIFD.DateTimeOriginal in exif_in_file["Exif"].keys() \ and exif_in_file["Exif"][piexif.ExifIFD.DateTimeOriginal]: photo_create_date = \ bytes.decode(exif_in_file["Exif"][piexif.ExifIFD.DateTimeOriginal]).replace(":", "") photo_name_new = photo_create_date + " " + photo_name photo_dir_new = os.path.join( dir_name, photo_name_new) os.rename(photo_dir, photo_dir_new) photoExifRecover.file_dir = photo_dir_new photo_dir = photo_dir_new