Beispiel #1
0
    def __init__(self, json_data, begin, end, blog_info, directory):
        Saver.__init__(self, json_data, os.path.join(
            directory, QzonePath.BLOG), purge_file_name(blog_info.category))

        self._filename = "%s_%s_%05d-%05d.json" % (
            blog_info.title, blog_info.blog_id, begin, end - 1)
        self._filename = purge_file_name(self._filename)
Beispiel #2
0
def download_media(url, directory, filename, has_extension=False, *args, **kwargs):
    result = False
    s = "\rdownloading " + url + " -> %05.2f%% "
    chunk_size = 1024 << 4
    with directory_lock:
        if not os.path.exists(directory):
            os.makedirs(directory)
    lock = None
    with dict_lock:
        lock = _locks.setdefault(url, Lock())
    with lock:
        with requests.get(url, stream=True) as r:
            total_len = math.inf
            if "content-length" in r.headers:
                total_len = int(r.headers["content-length"])
            current_len = 0
            filename = purge_file_name(filename)
            if not has_extension:
                extension = "jpg"
                if "content-type" in r.headers:
                    content_type = r.headers["content-type"]
                    extension = content_type[content_type.find("/") + 1:]
                filename = "%s.%s" % (filename, extension)
            filename = os.path.join(directory, filename)
            if not os.path.exists(filename):
                with open(filename, "wb") as f:
                    for data in r.iter_content(chunk_size):
                        f.write(data)
                        current_len += len(data)
                        percent = 100 * current_len / total_len
                        print(s % (percent), end="")
                result = True
                print("\n%s is downloaded" % url)
    return result
Beispiel #3
0
def download_media(url, dir, file_name, otime):
    s = "\rdownloading %s -> %05.2f%% "
    chunk_size = 1024

    if otime:
        exif_ifd = {piexif.ExifIFD.DateTimeOriginal: otime}
        exif_dict = {"Exif": exif_ifd}
        exif_bytes = piexif.dump(exif_dict)

    with closing(requests.get(url, stream=True)) as r:
        extension = "jpg"
        if "content-type" in r.headers:
            content_type = r.headers["content-type"]
            extension = content_type[content_type.find("/") + 1:]
        total_len = math.inf
        if "content-length" in r.headers:
            total_len = int(r.headers["content-length"])
        current_len = 0
        file_name = purge_file_name(file_name)
        with _lock:
            if not os.path.exists(dir):
                os.makedirs(dir)
        file_name = os.path.join(dir, "%s.%s" % (file_name, extension))
        with open(file_name, "wb") as f:
            for data in r.iter_content(chunk_size):
                f.write(data)
                current_len += len(data)
                percent = 100 * current_len / total_len
                print(s % (url, percent), end="")
    if otime:
        piexif.insert(exif_bytes, file_name)
    print("\n%s is downloaded" % url)
Beispiel #4
0
def download_media(url, dir, file_name):
    result = False
    s = "\rdownloading %s -> %05.2f%% "
    chunk_size = 1024
    with requests.get(url, stream=True, timeout=30) as r:
        extension = "jpg"
        if "content-type" in r.headers:
            content_type = r.headers["content-type"]
            extension = content_type[content_type.find("/") + 1:]
        total_len = math.inf
        if "content-length" in r.headers:
            total_len = int(r.headers["content-length"])
        current_len = 0
        file_name = purge_file_name(file_name)
        with _lock:
            if not os.path.exists(dir):
                os.makedirs(dir)
        file_name = os.path.join(dir, "%s.%s" % (file_name, extension))
        with open(file_name, "wb") as f:
            for data in r.iter_content(chunk_size):
                f.write(data)
                current_len += len(data)
                percent = 100 * current_len / total_len
                print(s % (url, percent), end="")
        result = True
        print("\n%s is downloaded" % url)
    return result
def get_photo_comment_media_list(json_data, uin, album_name, album_id, download_if_not_exist):
    '''获取相册评论中的资源列表
    '''
    album_dir = purge_file_name("%s_%s" % (album_name, album_id))
    directory = os.path.join(uin, QzonePath.PHOTO,
                             album_dir, QzonePath.DOWNLOAD)
    downloaded_file = os.path.join(
        uin, QzonePath.PHOTO, QzoneFileName.DOWNLOADED)
    return get_media_list(json_data, directory, downloaded_file,
                          download_if_not_exist, photo=True)
def get_photo_url(url, uin, album_name, album_id, download_if_not_exist=False, comment=False):
    '''获取照片的 url
    '''
    album_dir = purge_file_name("%s_%s" % (album_name, album_id))
    photo_download_dir = os.path.join(uin, QzonePath.PHOTO, album_dir,
                                      QzonePath.DOWNLOAD)
    downloaded_file = os.path.join(
        uin, QzonePath.PHOTO, QzoneFileName.DOWNLOADED)
    u = get_url(url, photo_download_dir, url, False,
                download_if_not_exist, downloaded_file, photo=True, comment=False)
    return u
Beispiel #7
0
    def __init__(self, directory, blog_info, html_content, read_num=0):
        self._html_content = html_content

        blog_path = os.path.join(
            directory, QzonePath.BLOG, purge_file_name(blog_info.category))
        if not os.path.exists(blog_path):
            os.makedirs(blog_path)
        filename = blog_info.get_file_name()
        self._blog_filename = os.path.join(blog_path, filename)

        self._blog_info = blog_info
        self._read = read_num

        self._bs_obj = BeautifulSoup(self._html_content, "html.parser")
Beispiel #8
0
 def get_file_name(self):
     return purge_file_name("%s_%s.html" % (self._title, self._blog_id))
def purge_file(file):
    return purge_file_name(file)
    def batch(self, should_rename=True, should_add_exif=True):
        # re constants
        p_date = re.compile(
            r"(\d{4})(0[1-9]|1[0-2])(0[1-9]|[1-2]\d|3[0-1])[ _]([0-1]\d|2[0-3])([0-5]\d)([0-5]\d)"
        )
        p_floatview_json = re.compile(r"^floatview_photo_\d{5}-\d{5}.json$")
        p_raw_json = re.compile(r"^photo_\d{5}-\d{5}.json$")

        target_dir = os.path.join(os.getcwd(), self.target_uin, "photo")
        if not os.path.exists(target_dir):
            print("路径不存在,请确认照片已下载,并在本文件尾部添加目标QQ号")

        album_info_dir = os.path.join(target_dir, "album_info.json")
        with open(album_info_dir, "r", encoding="utf-8") as album_info_f:
            album_info = json.load(album_info_f)

        for album in album_info["data"]["albumListModeSort"]:
            album_dir = ""
            files_in_target_dir = os.listdir(target_dir)
            album_id_purged = purge_file_name(album["id"])

            # find album fold
            for file_name_in_target_dir in files_in_target_dir:
                if album_id_purged in file_name_in_target_dir:
                    album_dir = os.path.join(target_dir,
                                             file_name_in_target_dir)
                    # rename album fold
                    if should_rename:
                        if not re.search(p_date, file_name_in_target_dir):
                            album_create_timestamp = int(
                                album["createtime"])  # 取相册创建时间
                            album_create_date = time.strftime(
                                '%Y%m%d %H%M%S',
                                time.localtime(album_create_timestamp))
                            file_name_in_target_dir_new = album_create_date + " " + file_name_in_target_dir
                            album_dir_new = os.path.join(
                                target_dir, file_name_in_target_dir_new)
                            os.rename(album_dir, album_dir_new)
                            album_dir = album_dir_new
                    break
            if album_dir == "":
                print("相册文件夹缺失:", os.path.join(target_dir, album["name"]))
                continue

            # find floatview and raw json (500+ json文件会分裂。。)
            files_in_album_dir = os.listdir(album_dir)
            floatview_json_dir_list = []
            raw_json_dir_list = []
            for file_name_in_album_dir in files_in_album_dir:
                if re.search(p_floatview_json, file_name_in_album_dir):
                    floatview_json_dir_list.append(
                        os.path.join(album_dir, file_name_in_album_dir))
                elif re.search(p_raw_json, file_name_in_album_dir):
                    raw_json_dir_list.append(
                        os.path.join(album_dir, file_name_in_album_dir))

            floatview_list = []
            raw_list = []
            for floatview_json_dir in floatview_json_dir_list:
                with open(floatview_json_dir, "r",
                          encoding="utf-8") as floatview_json_f:
                    floatview_json = json.load(floatview_json_f)
                    for _floatview_info in floatview_json["data"]["photos"]:
                        floatview_list.append(_floatview_info)
            for raw_json_dir in raw_json_dir_list:
                with open(raw_json_dir, "r", encoding="utf-8") as raw_json_f:
                    raw_json = json.load(raw_json_f)
                    for _raw_info in raw_json["data"]["photoList"]:
                        raw_list.append(_raw_info)

            # floatview_info
            downloaded_dir = os.path.join(album_dir, "downloaded")
            photos_in_album_downloaded_dir = os.listdir(downloaded_dir)
            for floatview_info in floatview_list:
                lloc = floatview_info["lloc"]

                # find raw_info
                raw_info = None
                for _raw_info in raw_list:
                    if _raw_info["lloc"] == lloc:
                        raw_info = _raw_info
                        break

                # find photo_dir
                photo_dir = ""
                lloc_purged = purge_file_name(lloc)
                for photo_name in photos_in_album_downloaded_dir:
                    if lloc_purged in photo_name:
                        photo_dir = os.path.join(downloaded_dir, photo_name)
                        break
                if photo_dir != "":
                    if should_add_exif:
                        photoExifRecover = PhotoExifRecover(
                            photo_dir, floatview_info, raw_info)
                        photoExifRecover.recover()
                    # rename photo
                    if should_rename:

                        [dir_name, photo_name] = os.path.split(photo_dir)
                        if not re.search(p_date, photo_name):
                            exif_in_file = piexif.load(photo_dir)
                            if "Exif" in exif_in_file.keys(
                            ) and piexif.ExifIFD.DateTimeOriginal in exif_in_file[
                                    "Exif"].keys() and exif_in_file["Exif"][
                                        piexif.ExifIFD.DateTimeOriginal]:
                                photo_create_date = bytes.decode(
                                    exif_in_file["Exif"]
                                    [piexif.ExifIFD.DateTimeOriginal]).replace(
                                        ":", "")
                                photo_name_new = photo_create_date + " " + photo_name
                                photo_dir_new = os.path.join(
                                    dir_name, photo_name_new)
                                os.rename(photo_dir, photo_dir_new)
                                photoExifRecover.file_dir = photo_dir_new
                                photo_dir = photo_dir_new
                else:
                    print("照片缺失:", os.path.join(downloaded_dir, lloc_purged))
Beispiel #11
0
 def __init__(self, json_data):
     self._json_data = json_data
     self._album_dir = purge_file_name("%s_%s" %
                                       (json_data["name"], json_data["id"]))
Beispiel #12
0
    def batch(self, should_rename=True, should_add_exif=True):
        # re constants
        p_date = re.compile(
            r"(\d{4})(0[1-9]|1[0-2])(0[1-9]|[1-2]\d|3[0-1])[ _]([0-1]\d|2[0-3])([0-5]\d)([0-5]\d)"
        )
        p_floatview_json = re.compile(r"^floatview_photo_\d{5}-\d{5}.json$")
        p_raw_json = re.compile(r"^photo_\d{5}-\d{5}.json$")

        target_dir = os.path.join(os.getcwd(), self.target_uin, "photo")
        if not os.path.exists(target_dir):
            print("路径不存在,请确认照片已下载,并在本文件尾部添加目标QQ号")

        album_info_dir = os.path.join(target_dir, "album_info.json")
        with open(album_info_dir, "r", encoding="utf-8") as album_info_f:
            album_info = json.load(album_info_f)

        # No album at all!
        if album_info["data"]["albumListModeSort"] is None:
            print("【json记录中无相册!】")
            return

        for album in album_info["data"]["albumListModeSort"]:
            album_dir = ""
            files_in_target_dir = os.listdir(target_dir)
            album_id_purged = purge_file_name(album["id"])

            # find album folder
            for file_name_in_target_dir in files_in_target_dir:
                if album_id_purged in file_name_in_target_dir:
                    album_dir = os.path.join(target_dir,
                                             file_name_in_target_dir)

                    # rename album fold
                    if should_rename:
                        if not re.search(p_date, file_name_in_target_dir):
                            album_create_timestamp = int(
                                album["createtime"])  # 取相册创建时间
                            album_create_date = time.strftime(
                                '%Y%m%d %H%M%S',
                                time.localtime(album_create_timestamp))
                            file_name_in_target_dir_new = album_create_date + " " + file_name_in_target_dir
                            album_dir_new = os.path.join(
                                target_dir, file_name_in_target_dir_new)
                            os.rename(album_dir, album_dir_new)
                            album_dir = album_dir_new

                    break
            if album_dir == "":
                print("相册文件夹缺失:", os.path.join(target_dir, album["name"]))
                continue

            # find floatview and raw json (500+ json文件会分裂。。)
            files_in_album_dir = os.listdir(album_dir)
            floatview_json_dir_list = []
            raw_json_dir_list = []
            for file_name_in_album_dir in files_in_album_dir:
                if re.search(p_floatview_json, file_name_in_album_dir):
                    floatview_json_dir_list.append(
                        os.path.join(album_dir, file_name_in_album_dir))
                elif re.search(p_raw_json, file_name_in_album_dir):
                    raw_json_dir_list.append(
                        os.path.join(album_dir, file_name_in_album_dir))

            # floatview or raw json is missing!
            if len(floatview_json_dir_list) == 0 or len(
                    raw_json_dir_list) == 0:
                print("【相册中照片json数据缺失】:", album_dir)
                continue

            floatview_list = []
            raw_list = []
            for floatview_json_dir in floatview_json_dir_list:
                with open(floatview_json_dir, "r",
                          encoding="utf-8") as floatview_json_f:
                    floatview_json = json.load(floatview_json_f)
                    for _floatview_info in floatview_json["data"]["photos"]:
                        floatview_list.append(_floatview_info)
            for raw_json_dir in raw_json_dir_list:
                with open(raw_json_dir, "r", encoding="utf-8") as raw_json_f:
                    raw_json = json.load(raw_json_f)
                    for _raw_info in raw_json["data"]["photoList"]:
                        raw_list.append(_raw_info)

            # find downloaded folder and file list within
            downloaded_dir = os.path.join(album_dir, "downloaded")
            # downloaded folder is missing!
            if not os.path.exists(downloaded_dir):
                print("【无downloaded文件夹】:", downloaded_dir)
                continue
            photos_in_album_downloaded_dir = os.listdir(downloaded_dir)

            # start to handle every photo within an album

            # floatview_info
            for floatview_info in floatview_list:
                lloc = floatview_info["lloc"]

                # find raw_info
                raw_info = None
                for _raw_info in raw_list:
                    if _raw_info["lloc"] == lloc:
                        raw_info = _raw_info
                        break

                # find photo_dir
                photo_dir = ""
                lloc_purged = purge_file_name(lloc)
                for photo_name in photos_in_album_downloaded_dir:
                    if lloc_purged in photo_name:
                        photo_dir = os.path.join(downloaded_dir, photo_name)
                        break

                if photo_dir == "":
                    print("照片缺失:", os.path.join(downloaded_dir, lloc_purged))
                    continue

                # recover EXIF
                if should_add_exif:
                    try:
                        photoExifRecover = PhotoExifRecover(
                            photo_dir, floatview_info, raw_info)
                        photoExifRecover.recover()
                    except Exception as e:
                        error_message = "EXIF写入失败:" + photo_dir + "\n↘失败原因:" + str(
                            e)
                        print(error_message)
                        self.e.append(error_message)
                        continue  # 对于EXIF写入发生异常的文件跳过重命名步骤

                # rename photo
                if should_rename:
                    [dir_name, photo_name] = os.path.split(photo_dir)
                    if not re.search(p_date, photo_name):
                        exif_in_file = piexif.load(photo_dir)
                        if "Exif" in exif_in_file.keys() \
                                and piexif.ExifIFD.DateTimeOriginal in exif_in_file["Exif"].keys() \
                                and exif_in_file["Exif"][piexif.ExifIFD.DateTimeOriginal]:
                            photo_create_date = \
                                bytes.decode(exif_in_file["Exif"][piexif.ExifIFD.DateTimeOriginal]).replace(":", "")
                            photo_name_new = photo_create_date + " " + photo_name
                            photo_dir_new = os.path.join(
                                dir_name, photo_name_new)
                            os.rename(photo_dir, photo_dir_new)
                            photoExifRecover.file_dir = photo_dir_new
                            photo_dir = photo_dir_new