Example #1
0
def get_one_page_follow_list(account_id, cursor=None):
    query_url = "https://www.instagram.com/query/"
    # node支持的字段:id,is_verified,followed_by_viewer,requested_by_viewer,full_name,profile_pic_url,username
    params = "nodes{username},page_info"
    if cursor is None:
        post_data = "q=ig_user(%s){follows.first(%s){%s}}" % (account_id, USER_COUNT_PER_PAGE, params)
    else:
        post_data = "q=ig_user(%s){follows.after(%s,%s){%s}}" % (account_id, cursor, USER_COUNT_PER_PAGE, params)
    # todo session id error
    # IGSCdaccb7f76627fa16a0d418f32a733030cb4cdeefaaddc5464a3da52eb8acfe06%3AID8fxYoOH96eMPpf4kEWwIhLA9ihMLuO%3A%7B%22_token_ver%22%3A2%2C%22_auth_user_id%22%3A3539660450%2C%22_token%22%3A%223539660450%3Amm50iieIxyG0NWWxuFifs0j23vhA5WpR%3Afd860ccd5c16e35eadf3e0946c00178b50fce7b45a9d09c62498dbbffdc8fa2b%22%2C%22asns%22%3A%7B%2247.89.39.193%22%3A45102%2C%22time%22%3A1480388199%7D%2C%22_auth_user_backend%22%3A%22accounts.backends.CaseInsensitiveModelBackend%22%2C%22last_refreshed%22%3A1480392303.831638%2C%22_platform%22%3A4%2C%22_auth_user_hash%22%3A%22%22%7D
    header_list = {
        "Referer": "https://www.instagram.com/",
        "X-CSRFToken": CSRF_TOKEN,
        "Cookie": "csrftoken=%s; sessionid=%s;" % (CSRF_TOKEN, SESSION_ID),
    }
    follow_list_return_code, follow_list_data = tool.http_request(query_url, post_data, header_list)[:2]
    if follow_list_return_code == 1:
        try:
            follow_list_data = json.loads(follow_list_data)
        except ValueError:
            pass
        else:
            if robot.check_sub_key(("follows",), follow_list_data):
                if robot.check_sub_key(("page_info", "nodes"), follow_list_data["follows"]):
                    if robot.check_sub_key(("end_cursor", "has_next_page"), follow_list_data["follows"]["page_info"]):
                        return follow_list_data["follows"]
    return None
Example #2
0
def get_one_page_photo_data(account_id, page_count):
    photo_page_url = "http://photo.weibo.com/photos/get_all"
    photo_page_url += "?uid=%s&count=%s&page=%s&type=3" % (account_id, IMAGE_COUNT_PER_PAGE, page_count)
    photo_page_data = auto_redirect_visit(photo_page_url)
    try:
        page = json.loads(photo_page_data)
    except ValueError:
        pass
    else:
        if robot.check_sub_key(("data",), page):
            if robot.check_sub_key(("total", "photo_list"), page["data"]):
                return page["data"]
    return None
Example #3
0
def get_video_url(video_vid, video_id):
    video_info_url = "http://wsi.weishi.com/weishi/video/downloadVideo.php?vid=%s&id=%s" % (video_vid, video_id)
    video_info_page_return_code, video_info_page = tool.http_request(video_info_url)[:2]
    if video_info_page_return_code == 1:
        try:
            video_info_page = json.loads(video_info_page)
        except ValueError:
            pass
        else:
            if robot.check_sub_key(("data", ), video_info_page):
                if robot.check_sub_key(("url", ), video_info_page["data"]):
                    return str(random.choice(video_info_page["data"]["url"]))
    return None
Example #4
0
def get_video_info(video_id):
    # http://api.xiaoka.tv/live/web/get_play_live?scid=qxonW5XeZru03nUB
    video_info_url = "http://api.xiaoka.tv/live/web/get_play_live?scid=%s" % video_id
    video_info_return_code, video_info_data = tool.http_request(video_info_url)[:2]
    if video_info_return_code == 1:
        try:
            video_info_data = json.loads(video_info_data)
        except ValueError:
            pass
        else:
            if robot.check_sub_key(("result", "data"), video_info_data) and int(video_info_data["result"]) == 1:
                if robot.check_sub_key(("createtime", "linkurl"), video_info_data["data"]):
                    return video_info_data
    return None
Example #5
0
def get_video_url_by_video_id(video_id):
    video_info_url = "http://gslb.miaopai.com/stream/%s.json?token=" % video_id
    video_info_page_return_code, video_info_page = tool.http_request(video_info_url)[:2]
    if video_info_page_return_code == 1:
        try:
            video_info_page = json.loads(video_info_page)
        except ValueError:
            pass
        else:
            if robot.check_sub_key(("status", "result"), video_info_page):
                if int(video_info_page["status"]) == 200:
                    for result in video_info_page["result"]:
                        if robot.check_sub_key(("path", "host", "scheme"), result):
                            return str(result["scheme"]) + str(result["host"]) + str(result["path"])
    return None
Example #6
0
def get_audio_url(audio_id, song_type):
    audio_info_url = "http://service.5sing.kugou.com/song/getPermission?songId=%s&songType=%s" % (audio_id, song_type)
    audio_info_return_code, audio_info = tool.http_request(audio_info_url)[:2]
    if audio_info_return_code == 1:
        try:
            audio_info = json.loads(audio_info)
        except ValueError:
            pass
        else:
            if robot.check_sub_key(("success", "data"), audio_info):
                if audio_info["success"] and robot.check_sub_key(("fileName",), audio_info["data"]):
                    return str(audio_info["data"]["fileName"])
                elif not audio_info["success"]:
                    return ""
    return None
Example #7
0
def get_account_id(account_name):
    search_url = "https://www.instagram.com/web/search/topsearch/?context=blended&rank_token=1&query=%s" % account_name
    for i in range(0, 10):
        search_return_code, search_data = tool.http_request(search_url)[:2]
        if search_return_code == 1:
            try:
                search_data = json.loads(search_data)
            except ValueError:
                continue
            if robot.check_sub_key(("users",), search_data):
                for user in search_data["users"]:
                    if robot.check_sub_key(("user",), user) and robot.check_sub_key(("username", "pk"), user["user"]):
                        if account_name.lower() == str(user["user"]["username"]).lower():
                            return user["user"]["pk"]
        time.sleep(5)
    return None
Example #8
0
def unfollow_account(auth_token, account_id):
    unfollow_url = "https://twitter.com/i/user/unfollow"
    unfollow_data = {"user_id": account_id}
    header_list = {"Cookie": "auth_token=%s;" % auth_token, "Referer": "https://twitter.com/"}
    unfollow_return_code, unfollow_data = tool.http_request(unfollow_url, unfollow_data, header_list)[:2]
    if unfollow_return_code == 1:
        if robot.check_sub_key(("new_state",), unfollow_data) and unfollow_data["new_state"] == "not-following":
            return True
    return False
Example #9
0
def get_one_page_video_data(account_id, page_time):
    video_data_url = "http://wsm.qq.com/weishi/t/other.php?uid=%s&reqnum=%s" % (account_id, VIDEO_COUNT_PER_PAGE)
    if page_time > 0:
        video_data_url += "&pageflag=02&pagetime=%s" % page_time
    else:
        video_data_url += "&pageflag=0"
    header_list = {"Referer": 'http://weishi.qq.com/'}
    video_data_return_code, video_data = tool.http_request(video_data_url, None, header_list)[:2]
    if video_data_return_code == 1:
        try:
            video_data = json.loads(video_data)
        except ValueError:
            pass
        else:
            if robot.check_sub_key(("ret", "data"), video_data) and int(video_data["ret"]) == 0:
                if robot.check_sub_key(("info", "hasNext"), video_data["data"]):
                    return video_data["data"]
    return None
Example #10
0
def get_one_page_image_data(user_id, page_count, api_key, request_id):
    page_data_url = "https://api.flickr.com/services/rest"
    # API文档:https://www.flickr.com/services/api/flickr.people.getPhotos.html
    # 所有可支持的参数
    # extra_data = [
    #     "can_addmeta", "can_comment", "can_download", "can_share", "contact", "count_comments", "count_faves",
    #     "count_views", "date_taken", "date_upload", "description", "icon_urls_deep", "isfavorite", "ispro", "license",
    #     "media", "needs_interstitial", "owner_name", "owner_datecreate", "path_alias", "realname", "rotation",
    #     "safety_level", "secret_k", "secret_h", "url_c", "url_f", "url_h", "url_k", "url_l", "url_m", "url_n",
    #     "url_o", "url_q", "url_s", "url_sq", "url_t", "url_z", "visibility", "visibility_source", "o_dims",
    #     "is_marketplace_printable", "is_marketplace_licensable", "publiceditability"
    # ]
    extra_data = ["date_upload", "url_o"]
    post_data = {
        "per_page": IMAGE_COUNT_PER_PAGE,
        "page": page_count,
        "extras": ",".join(extra_data),
        "get_user_info": 0,
        # "jump_to": "",
        "user_id": user_id,
        "view_as": "use_pref",
        "sort": "use_pref",
        # "viewerNSID": "",
        "method": "flickr.people.getPhotos",
        # "csrf": "",
        "api_key": api_key,
        "format": "json",
        "hermes": 1,
        "reqId": request_id,
        "nojsoncallback": 1,
    }
    page_data_return_code, page_data = tool.http_request(page_data_url, post_data)[:2]
    if page_data_return_code == 1:
        try:
            page_data = json.loads(page_data)
        except ValueError:
            pass
        else:
            if robot.check_sub_key(("stat", "photos"), page_data) and page_data["stat"] == "ok":
                if robot.check_sub_key(("photo", "total"), page_data["photos"]):
                    return page_data
    return None
Example #11
0
def get_one_page_follow_data(suid, page_count):
    follow_list_url = "http://www.miaopai.com/gu/follow?page=%s&suid=%s" % (page_count, suid)
    follow_list_page_return_code, follow_list_data = tool.http_request(follow_list_url)[:2]
    if follow_list_page_return_code == 1:
        try:
            follow_list_data = json.loads(follow_list_data)
        except ValueError:
            pass
        else:
            if robot.check_sub_key(("msg", "stat"), follow_list_data) and follow_list_data["stat"].isdigit():
                return follow_list_data
    return None
Example #12
0
def get_message_page_data(account_name, target_id):
    image_page_url = "https://api.7gogo.jp/web/v2/talks/%s/images" % account_name
    image_page_url += "?targetId=%s&limit=%s&direction=PREV" % (target_id, MESSAGE_COUNT_PER_PAGE)
    image_page_return_code, image_page_data = tool.http_request(image_page_url)[:2]
    if image_page_return_code == 1:
        try:
            image_page_data = json.loads(image_page_data)
        except ValueError:
            pass
        else:
            if robot.check_sub_key(("data",), image_page_data):
                return image_page_data["data"]
    return None
Example #13
0
def get_one_page_video_data(suid, page_count):
    # http://www.miaopai.com/gu/u?page=1&suid=0r9ewgQ0v7UoDptu&fen_type=channel
    media_page_url = "http://www.miaopai.com/gu/u?page=%s&suid=%s&fen_type=channel" % (page_count, suid)
    media_page_return_code, media_page = tool.http_request(media_page_url)[:2]
    if media_page_return_code == 1:
        try:
            media_page = json.loads(media_page)
        except ValueError:
            pass
        else:
            if robot.check_sub_key(("isall", "msg"), media_page):
                return media_page
    return None
Example #14
0
def get_one_page_album_data(page_count):
    album_url = "http://www.zunguang.com/index.php?c=api&yc=blog&ym=getOneBlog"
    post_data = {"bid": page_count}
    album_return_code, album_data = tool.http_request(album_url, post_data, None, None, False)[:2]
    if album_return_code != 1:
        return -1, None
    try:
        album_data = json.loads(album_data)
    except ValueError:
        return -2, None  # JSON decode error
    if robot.check_sub_key(("body",), album_data) and robot.check_sub_key(("blog",), album_data["body"]):
        if not album_data["body"]["blog"]:
            return 2, None  # 相册已被已被删除
        blog_type = int(album_data["body"]["blog"][0]["type"])
        if blog_type == 2:
            return 3, None  # 歌曲类型的相册
        elif blog_type == 3:
            album_body = album_data["body"]["blog"][0]
            if robot.check_sub_key(("title", "attr"), album_body) and robot.check_sub_key(("img",), album_body["attr"]):
                return 1, album_body
        else:
            return 4, blog_type
Example #15
0
def get_media_page_data(account_name, data_tweet_id):
    media_page_url = "https://twitter.com/i/profiles/show/%s/media_timeline" % account_name
    media_page_url += "?include_available_features=1&include_entities=1&max_position=%s" % data_tweet_id
    media_page_return_code, media_page_response = tool.http_request(media_page_url)[:2]
    if media_page_return_code == 1:
        try:
            media_page = json.loads(media_page_response)
        except ValueError:
            pass
        else:
            if robot.check_sub_key(("has_more_items", "items_html", "new_latent_count", "min_position"), media_page):
                return media_page
    return None
Example #16
0
def get_follow_page_data(account_name, auth_token, position_id):
    follow_list_url = "https://twitter.com/%s/following/users?max_position=%s" % (account_name, position_id)
    header_list = {"Cookie": "auth_token=%s;" % auth_token}
    follow_list_return_code, follow_list_data = tool.http_request(follow_list_url, None, header_list)[:2]
    if follow_list_return_code == 1:
        try:
            follow_list_data = json.loads(follow_list_data)
        except ValueError:
            pass
        else:
            if robot.check_sub_key(("min_position", "has_more_items", "items_html"), follow_list_data):
                return follow_list_data
    return None
Example #17
0
def get_one_page_video_data(account_id, page_count):
    # http://www.meipai.com/users/user_timeline?uid=22744352&page=1&count=20&single_column=1
    video_page_url = "http://www.meipai.com/users/user_timeline"
    video_page_url += "?uid=%s&page=%s&count=%s&single_column=1" % (account_id, page_count, VIDEO_COUNT_PER_PAGE)
    video_page_return_code, video_page = tool.http_request(video_page_url)[:2]
    if video_page_return_code == 1:
        try:
            video_page = json.loads(video_page)
        except ValueError:
            pass
        else:
            if robot.check_sub_key(("medias", ), video_page):
                return video_page["medias"]
    return None
Example #18
0
def follow_account(auth_token, account_id):
    follow_url = "https://twitter.com/i/user/follow"
    follow_data = {"user_id": account_id}
    header_list = {"Cookie": "auth_token=%s;" % auth_token, "Referer": "https://twitter.com/"}
    follow_return_code, follow_data = tool.http_request(follow_url, follow_data, header_list)[:2]
    if follow_return_code == 1:
        try:
            follow_data = json.loads(follow_data)
        except ValueError:
            pass
        else:
            if robot.check_sub_key(("new_state",), follow_data) and follow_data["new_state"] == "following":
                return True
    return False
Example #19
0
def get_one_page_media_data(account_id, cursor):
    # https://www.instagram.com/query/?q=ig_user(490060609){media.after(9999999999999999999,12){nodes{code,date,display_src,is_video},page_info}}
    # node支持的字段:caption,code,comments{count},date,dimensions{height,width},display_src,id,is_video,likes{count},owner{id},thumbnail_src,video_views
    media_page_url = "https://www.instagram.com/query/"
    params = "nodes{code,date,display_src,is_video},page_info"
    post_data = "q=ig_user(%s){media.after(%s,%s){%s}}" % (account_id, cursor, IMAGE_COUNT_PER_PAGE, params)
    header_list = {
        "Referer": "https://www.instagram.com/",
        "X-CSRFToken": CSRF_TOKEN,
        "Cookie": "csrftoken=%s" % CSRF_TOKEN,
    }
    media_data_return_code, media_data = tool.http_request(media_page_url, post_data, header_list)[:2]
    if media_data_return_code == 1:
        try:
            media_data = json.loads(media_data)
        except ValueError:
            pass
        else:
            if robot.check_sub_key(("media",), media_data):
                if robot.check_sub_key(("page_info", "nodes"), media_data["media"]):
                    if robot.check_sub_key(("has_next_page", "end_cursor"), media_data["media"]["page_info"]):
                        return media_data["media"]
    return None
Example #20
0
def get_one_page_post_info_list(site_id, post_time):
    # https://deer-vision.tuchong.com/rest/sites/1186455/posts/2016-11-11%2011:11:11?limit=20
    post_page_url = "https://www.tuchong.com/rest/sites/%s/posts/" % site_id
    post_page_url += "%s?limit=%s" % (post_time, IMAGE_COUNT_PER_PAGE)
    post_page_return_code, post_page_data = tool.http_request(post_page_url)[:2]
    if post_page_return_code == 1:
        try:
            post_page_data = json.loads(post_page_data)
        except ValueError:
            pass
        else:
            if robot.check_sub_key(("posts", "result"), post_page_data) and post_page_data["result"] == "SUCCESS":
                return post_page_data["posts"]
    return None
Example #21
0
def get_account_info_from_file():
    if not os.path.exists("account.data"):
        return False
    file_handle = open("account.data", "r")
    account_info = file_handle.read()
    file_handle.close()
    try:
        account_info = json.loads(base64.b64decode(account_info))
    except TypeError:
        account_info = {}
    except ValueError:
        account_info = {}
    if robot.check_sub_key(("email", "password"), account_info):
        return account_info["email"], account_info["password"]
    return None, None
Example #22
0
def get_one_page_video_data(account_page_id, since_id):
    video_album_url = "http://weibo.com/p/aj/album/loading"
    video_album_url += "?type=video&since_id=%s&page_id=%s&page=1&ajax_call=1" % (since_id, account_page_id)
    for i in range(0, 50):
        video_page = auto_redirect_visit(video_album_url)
        if video_page:
            try:
                video_page = json.loads(video_page)
            except ValueError:
                pass
            else:
                if robot.check_sub_key(("code", "data"), video_page):
                    if int(video_page["code"]) == 100000:
                        return video_page[u"data"].encode("utf-8")
        time.sleep(5)
    return None
Example #23
0
def get_one_page_audio_list(user_id, page_count):
    # http://changba.com/member/personcenter/loadmore.php?userid=4306405&pageNum=1
    audio_album_url = "http://changba.com/member/personcenter/loadmore.php?userid=%s&pageNum=%s" % (user_id, page_count)
    audio_album_return_code, audio_album_page = tool.http_request(audio_album_url)[:2]
    if audio_album_return_code == 1:
        try:
            audio_album_page = json.loads(audio_album_page)
        except ValueError:
            pass
        else:
            audio_list = []
            for audio_info in audio_album_page:
                if robot.check_sub_key(("songname", "workid", "enworkid"), audio_info):
                    audio_id = str(audio_info["workid"])
                    audio_name = audio_info["songname"].encode("utf-8")
                    audio_url = str(audio_info["enworkid"])
                    audio_list.append([audio_id, audio_name, audio_url])
            return audio_list
    return None
Example #24
0
def get_follow_by_list(account_id):
    # 从cookies中获取session id的值
    set_session_id()
    # 从页面中获取csrf token的值
    if CSRF_TOKEN is None:
        set_csrf_token()

    cursor = None
    follow_by_list = []
    while True:
        follow_by_page_data = get_one_page_follow_by_list(account_id, cursor)
        if follow_by_page_data is not None:
            for node in follow_by_page_data["nodes"]:
                if robot.check_sub_key(("username",), node):
                    follow_by_list.append(node["username"])
            if follow_by_page_data["page_info"]["has_next_page"]:
                cursor = follow_by_page_data["page_info"]["end_cursor"]
            else:
                break
        else:
            break
    return follow_by_list
Example #25
0
    def run(self):
        global TOTAL_VIDEO_COUNT

        account_id = self.account_info[0]
        if len(self.account_info) >= 3 and self.account_info[2]:
            account_name = self.account_info[2]
        else:
            account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            # 如果需要重新排序则使用临时文件夹,否则直接下载到目标目录
            if IS_SORT:
                video_path = os.path.join(VIDEO_TEMP_PATH, account_name)
            else:
                video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)

            # 获取视频信息列表
            video_info_list = get_video_info_list(account_id)
            if video_info_list is None:
                log.error(account_name + " 视频列表获取失败")
                tool.process_exit()

            video_count = 1
            first_video_id = "0"
            need_make_video_dir = True
            for video_info in video_info_list:
                if not robot.check_sub_key(("item_data",), video_info) or \
                        not robot.check_sub_key(("watch_id", "title"), video_info["item_data"]):
                    log.error(account_name + " 视频信息%s解析失败" % video_info)
                    tool.process_exit()

                # sm30043563
                video_id = str(video_info["item_data"]["watch_id"])

                # 过滤标题中不支持的字符
                video_title = robot.filter_text(video_info["item_data"]["title"])

                # 第一个视频,创建目录
                if need_make_video_dir:
                    if not tool.make_dir(video_path, 0):
                        log.error(account_name + " 创建图片下载目录 %s 失败" % video_path)
                        tool.process_exit()
                    need_make_video_dir = False

                # 获取视频下载地址
                video_url = get_video_url(video_id)
                log.step(account_name + " 开始下载第%s个视频 %s %s" % (video_count, video_id, video_url))
                print video_title
                print "%s %s" % (video_id, video_title)
                file_path = os.path.join(video_path, "%s %s.mp4" % (video_id, video_title))
                if tool.save_net_file(video_url, file_path):
                    log.step(account_name + " 第%s个视频下载成功" % video_count)
                    video_count += 1
                else:
                    log.error(account_name + " 第%s个视频 %s %s 下载失败" % (video_count, video_id, video_url))

            log.step(account_name + " 下载完毕,总共获得%s个视频" % (video_count - 1))

            # 排序
            if IS_SORT:
                if first_video_id != "0":
                    destination_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(video_path, destination_path, int(self.account_info[3]), 4):
                        log.step(account_name + " 视频从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建视频保存目录 %s 失败" % destination_path)
                        tool.process_exit()

            # 新的存档记录
            if first_video_id != "0":
                self.account_info[1] = first_video_id

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_VIDEO_COUNT += video_count - 1
            ACCOUNTS.remove(account_id)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")
Example #26
0
    def run(self):
        global TOTAL_VIDEO_COUNT

        account_id = self.account_info[0]
        if len(self.account_info) >= 4 and self.account_info[3]:
            account_name = self.account_info[3]
        else:
            account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            # 如果需要重新排序则使用临时文件夹,否则直接下载到目标目录
            if IS_SORT:
                video_path = os.path.join(VIDEO_TEMP_PATH, account_name)
            else:
                video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)

            video_count = 1
            page_time = 0
            first_video_time = "0"
            need_make_video_dir = True
            is_over = False
            while not is_over:
                # 获取一页视频信息
                video_data = get_one_page_video_data(account_id, page_time)
                if video_data is None:
                    log.error(account_name + " 视频列表获取失败")
                    tool.process_exit()

                for video_info in video_data["info"]:
                    if not robot.check_sub_key(("newvideos", "id", "timestamp"), video_info):
                        log.error(account_name + " 视频信息 %s 解析失败" % video_info)
                        continue

                    page_time = int(video_info["timestamp"])
                    # 检查是否已下载到前一次的视频
                    if page_time <= int(self.account_info[2]):
                        is_over = True
                        break

                    # 将第一个视频的上传时间做为新的存档记录
                    if first_video_time == "0":
                        first_video_time = str(page_time)

                    # todo 处理如果有多个视频
                    if len(video_info["newvideos"]) != 1:
                        log.error(account_name + " 视频信息 %s 发现多个视频下载信息" % video_info)
                        continue
                    if not robot.check_sub_key(("vid",), video_info["newvideos"][0]):
                        log.error(account_name + " 视频信息 %s 解析vid失败" % video_info)
                        continue

                    # 获取视频下载地址
                    video_url = get_video_url(video_info["newvideos"][0]["vid"], video_info["id"])
                    log.step(account_name + " 开始下载第%s个视频 %s" % (video_count, video_url))

                    # 第一个视频,创建目录
                    if need_make_video_dir:
                        if not tool.make_dir(video_path, 0):
                            log.error(account_name + " 创建图片下载目录 %s 失败" % video_path)
                            tool.process_exit()
                        need_make_video_dir = False

                    file_type = video_url.split(".")[-1].split("?")[0]
                    file_path = os.path.join(video_path, "%04d.%s" % (video_count, file_type))
                    if tool.save_net_file(video_url, file_path):
                        log.step(account_name + " 第%s个视频下载成功" % video_count)
                        video_count += 1
                    else:
                        log.error(account_name + " 第%s个视频 %s 下载失败" % (video_count, video_url))

                    # 达到配置文件中的下载数量,结束
                    if 0 < GET_VIDEO_COUNT < video_data:
                        is_over = True
                        break

                if not is_over:
                    if not video_data["hasNext"]:
                        is_over = True

            log.step(account_name + " 下载完毕,总共获得%s个视频" % (video_count - 1))

            # 排序
            if IS_SORT:
                if first_video_time != "0":
                    destination_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(video_path, destination_path, int(self.account_info[3]), 4):
                        log.step(account_name + " 视频从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建视频保存目录 %s 失败" % destination_path)
                        tool.process_exit()

            # 新的存档记录
            if first_video_time != "0":
                self.account_info[3] = str(int(self.account_info[3]) + video_count - 1)
                self.account_info[4] = first_video_time

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_VIDEO_COUNT += video_count - 1
            ACCOUNTS.remove(account_id)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")
Example #27
0
    def run(self):
        global TOTAL_VIDEO_COUNT

        account_id = self.account_info[0]
        if len(self.account_info) >= 4 and self.account_info[3]:
            account_name = self.account_info[3]
        else:
            account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            # 如果需要重新排序则使用临时文件夹,否则直接下载到目标目录
            if IS_SORT:
                video_path = os.path.join(VIDEO_TEMP_PATH, account_name)
            else:
                video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)

            page_count = 1
            video_count = 1
            first_video_id = "0"
            unique_list = []
            is_over = False
            need_make_download_dir = True
            while not is_over:
                # 获取指定一页的视频信息
                medias_data = get_one_page_video_data(account_id, page_count)
                if medias_data is None:
                    log.error(account_name + " 视频列表获取失败")
                    tool.process_exit()

                for media in medias_data:
                    if not robot.check_sub_key(("video", "id"), media):
                        log.error(account_name + " 第%s个视频信:%s解析失败" % (video_count, media))
                        continue

                    video_id = str(media["id"])

                    # 检查是否图片时间小于上次的记录
                    if int(video_id) <= int(self.account_info[2]):
                        is_over = True
                        break

                    # 新增视频导致的重复判断
                    if video_id in unique_list:
                        continue
                    else:
                        unique_list.append(video_id)
                    # 将第一张图片的上传时间做为新的存档记录
                    if first_video_id == "0":
                        first_video_id = video_id

                    video_url = str(media["video"])
                    log.step(account_name + " 开始下载第%s个视频 %s" % (video_count, video_url))

                    # 第一个视频,创建目录
                    if need_make_download_dir:
                        if not tool.make_dir(video_path, 0):
                            log.error(account_name + " 创建视频下载目录 %s 失败" % video_path)
                            tool.process_exit()
                        need_make_download_dir = False
                        
                    file_path = os.path.join(video_path, "%04d.mp4" % video_count)
                    if tool.save_net_file(video_url, file_path):
                        log.step(account_name + " 第%s个视频下载成功" % video_count)
                        video_count += 1
                    else:
                        log.error(account_name + " 第%s个视频 %s 下载失败" % (video_count, video_url))

                    # 达到配置文件中的下载数量,结束
                    if 0 < GET_VIDEO_COUNT < video_count:
                        is_over = True
                        break

                if not is_over:
                    if len(medias_data) >= VIDEO_COUNT_PER_PAGE:
                        page_count += 1
                    else:
                        # 获取的数量小于请求的数量,已经没有剩余视频了
                        is_over = True

            log.step(account_name + " 下载完毕,总共获得%s个视频" % (video_count - 1))

            # 排序
            if IS_SORT and video_count > 1:
                destination_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)
                if robot.sort_file(video_path, destination_path, int(self.account_info[1]), 4):
                    log.step(account_name + " 视频从下载目录移动到保存目录成功")
                else:
                    log.error(account_name + " 创建视频保存目录 %s 失败" % destination_path)
                    tool.process_exit()

            # 新的存档记录
            if first_video_id != "":
                self.account_info[1] = str(int(self.account_info[1]) + video_count - 1)
                self.account_info[2] = first_video_id

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_VIDEO_COUNT += video_count - 1
            ACCOUNTS.remove(account_id)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")
Example #28
0
    def run(self):
        global TOTAL_IMAGE_COUNT
        global TOTAL_VIDEO_COUNT

        account_id = self.account_info[0]
        if len(self.account_info) >= 6 and self.account_info[5]:
            account_name = self.account_info[5]
        else:
            account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            # 如果需要重新排序则使用临时文件夹,否则直接下载到目标目录
            if IS_SORT:
                image_path = os.path.join(IMAGE_TEMP_PATH, account_name)
                video_path = os.path.join(VIDEO_TEMP_PATH, account_name)
            else:
                image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
                video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)

            # 视频
            video_count = 1
            account_page_id = None
            first_video_url = ""
            is_over = False
            need_make_video_dir = True
            since_id = INIT_SINCE_ID
            while IS_DOWNLOAD_VIDEO and (not is_over):
                # 获取page_id
                if account_page_id is None:
                    account_page_id = get_account_page_id(account_id)
                    if account_page_id is None:
                        log.error(account_name + " 微博主页没有获取到page_id")
                        break

                # 获取指定时间点后的一页视频信息
                video_page_data = get_one_page_video_data(account_page_id, since_id)
                if video_page_data is None:
                    log.error(account_name + " 视频列表解析异常")
                    first_video_url = ""  # 存档恢复
                    break

                # 匹配获取全部的视频页面
                video_play_url_list = get_video_play_url_list(video_page_data)
                log.trace(account_name + "since_id:%s中的全部视频:%s" % (since_id, video_play_url_list))
                for video_play_url in video_play_url_list:
                    # 检查是否是上一次的最后视频
                    if self.account_info[4] == video_play_url:
                        is_over = True
                        break

                    # 将第一个视频的地址做为新的存档记录
                    if first_video_url == "":
                        first_video_url = video_play_url

                    # 获取这个视频的下载地址
                    return_code, video_url_list = get_video_url(video_play_url)
                    if return_code != 1:
                        if return_code == -1:
                            log.error(account_name + " 第%s个视频 %s 没有获取到源地址" % (video_count, video_play_url))
                        elif return_code == -2:
                            log.error(account_name + " 第%s个视频 %s 无法访问" % (video_count, video_play_url))
                        elif return_code == -3:
                            log.error(account_name + " 第%s个视频 %s 暂不支持的视频源" % (video_count, video_play_url))
                        continue
                    log.step(account_name + " 开始下载第%s个视频 %s" % (video_count, video_play_url))

                    # 第一个视频,创建目录
                    if need_make_video_dir:
                        if not tool.make_dir(video_path, 0):
                            log.error(account_name + " 创建图片下载目录 %s 失败" % video_path)
                            tool.process_exit()
                        need_make_video_dir = False

                    video_file_path = os.path.join(video_path, "%04d.mp4" % video_count)
                    for video_url in video_url_list:
                        if tool.save_net_file(video_url, video_file_path):
                            log.step(account_name + " 第%s个视频下载成功" % video_count)
                            video_count += 1
                        else:
                            log.error(account_name + " 第%s个视频 %s 下载失败" % (video_count, video_url))

                    # 达到配置文件中的下载数量,结束
                    if 0 < GET_VIDEO_COUNT < video_count:
                        is_over = True
                        break

                if not is_over:
                    # 获取下一页的since_id
                    since_id = tool.find_sub_string(video_page_data, "type=video&owner_uid=&since_id=", '">')
                    if not since_id:
                        break

            # 有历史记录,并且此次没有获得正常结束的标记,说明历史最后的视频已经被删除了
            if self.account_info[4] != "" and video_count > 1 and not is_over:
                log.error(account_name + " 没有找到上次下载的最后一个视频地址")

            # 图片
            image_count = 1
            page_count = 1
            first_image_time = "0"
            unique_list = []
            is_over = False
            need_make_image_dir = True
            while IS_DOWNLOAD_IMAGE and (not is_over):
                # 获取指定一页图片的信息
                photo_page_data = get_one_page_photo_data(account_id, page_count)
                if photo_page_data is None:
                    log.error(account_name + " 图片列表获取失败")
                    first_image_time = "0"  # 存档恢复
                    break

                log.trace(account_name + "第%s页的全部图片信息:%s" % (page_count, photo_page_data))
                for image_info in photo_page_data["photo_list"]:
                    if not robot.check_sub_key(("pic_host", "pic_name", "timestamp"), image_info):
                        log.error(account_name + " 第%s张图片信息解析失败 %s" % (image_count, image_info))
                        continue

                    # 检查是否图片时间小于上次的记录
                    if int(image_info["timestamp"]) <= int(self.account_info[2]):
                        is_over = True
                        break

                    # 新增图片导致的重复判断
                    if image_info["pic_name"] in unique_list:
                        continue
                    else:
                        unique_list.append(image_info["pic_name"])
                    # 将第一张图片的上传时间做为新的存档记录
                    if first_image_time == "0":
                        first_image_time = str(image_info["timestamp"])

                    image_url = str(image_info["pic_host"]) + "/large/" + str(image_info["pic_name"])
                    log.step(account_name + " 开始下载第%s张图片 %s" % (image_count, image_url))

                    # 获取图片的二进制数据,并且判断这个图片是否是可用的
                    image_status, image_byte = get_image_byte(image_url)
                    if image_status != 1:
                        if image_status == -1:
                            log.error(account_name + " 第%s张图片 %s 下载失败" % (image_count, image_url))
                        elif image_status == -2:
                            log.error(account_name + " 第%s张图片 %s 资源已被删除,跳过" % (image_count, image_url))
                        continue

                    # 第一张图片,创建目录
                    if need_make_image_dir:
                        if not tool.make_dir(image_path, 0):
                            log.error(account_name + " 创建图片下载目录 %s 失败" % image_path)
                            tool.process_exit()
                        need_make_image_dir = False

                    file_type = image_url.split(".")[-1]
                    if file_type.find("/") != -1:
                        file_type = "jpg"
                    image_file_path = os.path.join(image_path, "%04d.%s" % (image_count, file_type))
                    save_image(image_byte, image_file_path)
                    log.step(account_name + " 第%s张图片下载成功" % image_count)
                    image_count += 1

                    # 达到配置文件中的下载数量,结束
                    if 0 < GET_IMAGE_COUNT < image_count:
                        is_over = True
                        break

                if not is_over:
                    # 根据总的图片数量和每页显示的图片数量,计算是否还有下一页
                    if (photo_page_data["total"] / IMAGE_COUNT_PER_PAGE) > (page_count - 1):
                        page_count += 1
                    else:
                        # 全部图片下载完毕
                        is_over = True

            log.step(account_name + " 下载完毕,总共获得%s张图片和%s个视频" % (image_count - 1, video_count - 1))

            # 排序
            if IS_SORT:
                if first_image_time != "0":
                    destination_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(image_path, destination_path, int(self.account_info[1]), 4):
                        log.step(account_name + " 图片从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建图片保存目录 %s 失败" % destination_path)
                        tool.process_exit()
                if first_video_url != "":
                    destination_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(video_path, destination_path, int(self.account_info[3]), 4):
                        log.step(account_name + " 视频从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建视频保存目录 %s 失败" % destination_path)
                        tool.process_exit()

            # 新的存档记录
            if first_image_time != "0":
                self.account_info[1] = str(int(self.account_info[1]) + image_count - 1)
                self.account_info[2] = first_image_time
            if first_video_url != "":
                self.account_info[3] = str(int(self.account_info[3]) + video_count - 1)
                self.account_info[4] = first_video_url

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_IMAGE_COUNT += image_count - 1
            TOTAL_VIDEO_COUNT += video_count - 1
            ACCOUNTS.remove(account_id)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")
Example #29
0
    def run(self):
        global TOTAL_IMAGE_COUNT
        global TOTAL_VIDEO_COUNT

        account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            # 如果需要重新排序则使用临时文件夹,否则直接下载到目标目录
            if IS_SORT:
                image_path = os.path.join(IMAGE_TEMP_PATH, account_name)
                video_path = os.path.join(VIDEO_TEMP_PATH, account_name)
            else:
                image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
                video_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)

            image_count = 1
            video_count = 1
            target_id = INIT_TARGET_ID
            first_post_id = "0"
            is_over = False
            need_make_image_dir = True
            need_make_video_dir = True

            while not is_over:
                # 获取一页日志信息
                message_page_data = get_message_page_data(account_name, target_id)
                if message_page_data is None:
                    log.error(account_name + " 媒体列表解析异常")
                    tool.process_exit()
                # 没有了
                if len(message_page_data) == 0:
                    break

                for message_info in message_page_data:
                    if not robot.check_sub_key(("post",), message_info):
                        log.error(account_name + " 媒体信息解析异常 %s" % message_info)
                        continue
                    if not robot.check_sub_key(("body", "postId"), message_info["post"]):
                        log.error(account_name + " 媒体信息解析异常 %s" % message_info)
                        continue

                    target_id = message_info["post"]["postId"]
                    # 检查是否已下载到前一次的记录
                    if int(target_id) <= int(self.account_info[3]):
                        is_over = True
                        break

                    # 将第一个媒体的postId做为新的存档记录
                    if first_post_id == "0":
                        first_post_id = str(target_id)

                    for media_info in message_info["post"]["body"]:
                        if not robot.check_sub_key(("bodyType",), media_info):
                            log.error(account_name + " 媒体列表bodyType解析异常")
                            continue

                        # bodyType = 1: text, bodyType = 3: image, bodyType = 8: video
                        body_type = int(media_info["bodyType"])
                        if body_type == 1:  # 文本
                            pass
                        elif body_type == 2:  # 表情
                            pass
                        elif body_type == 3:  # 图片
                            if IS_DOWNLOAD_IMAGE:
                                if not robot.check_sub_key(("image",), media_info):
                                    log.error(account_name + " 第%s张图片解析异常%s" % (image_count, media_info))
                                    continue

                                image_url = str(media_info["image"])
                                log.step(account_name + " 开始下载第%s张图片 %s" % (image_count, image_url))

                                # 第一张图片,创建目录
                                if need_make_image_dir:
                                    if not tool.make_dir(image_path, 0):
                                        log.error(account_name + " 创建图片下载目录 %s 失败" % image_path)
                                        tool.process_exit()
                                    need_make_image_dir = False

                                file_type = image_url.split(".")[-1]
                                image_file_path = os.path.join(image_path, "%04d.%s" % (image_count, file_type))
                                if tool.save_net_file(image_url, image_file_path):
                                    log.step(account_name + " 第%s张图片下载成功" % image_count)
                                    image_count += 1
                                else:
                                    log.error(account_name + " 第%s张图片 %s 下载失败" % (image_count, image_url))
                        elif body_type == 8:  # video
                            if IS_DOWNLOAD_VIDEO:
                                if not robot.check_sub_key(("movieUrlHq",), media_info):
                                    log.error(account_name + " 第%s个视频解析异常%s" % (video_count, media_info))
                                    continue

                                video_url = str(media_info["movieUrlHq"])
                                log.step(account_name + " 开始下载第%s个视频 %s" % (video_count, video_url))

                                # 第一个视频,创建目录
                                if need_make_video_dir:
                                    if not tool.make_dir(video_path, 0):
                                        log.error(account_name + " 创建视频下载目录 %s 失败" % video_path)
                                        tool.process_exit()
                                    need_make_video_dir = False

                                file_type = video_url.split(".")[-1]
                                video_file_path = os.path.join(video_path, "%04d.%s" % (video_count, file_type))
                                if tool.save_net_file(video_url, video_file_path):
                                    log.step(account_name + " 第%s个视频下载成功" % video_count)
                                    video_count += 1
                                else:
                                    log.error(account_name + " 第%s个视频 %s 下载失败" % (video_count, video_url))
                        elif body_type == 7:  # 转发
                            pass
                        else:
                            log.error(account_name + " 第%s张图片、第%s个视频,未知bodytype %s, %s" % (image_count, video_count, body_type, media_info))

            # 排序
            if IS_SORT:
                if image_count > 1:
                    destination_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(image_path, destination_path, int(self.account_info[1]), 4):
                        log.step(account_name + " 图片从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建图片保存目录 %s 失败" % destination_path)
                        tool.process_exit()
                if video_count > 1:
                    destination_path = os.path.join(VIDEO_DOWNLOAD_PATH, account_name)
                    if robot.sort_file(video_path, destination_path, int(self.account_info[2]), 4):
                        log.step(account_name + " 视频从下载目录移动到保存目录成功")
                    else:
                        log.error(account_name + " 创建视频保存目录 %s 失败" % destination_path)
                        tool.process_exit()

            # 新的存档记录
            if first_post_id != "0":
                self.account_info[1] = str(int(self.account_info[1]) + image_count - 1)
                self.account_info[2] = str(int(self.account_info[2]) + video_count - 1)
                self.account_info[3] = first_post_id

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_IMAGE_COUNT += image_count - 1
            TOTAL_VIDEO_COUNT += video_count - 1
            ACCOUNTS.remove(account_name)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")
Example #30
0
    def run(self):
        global TOTAL_IMAGE_COUNT

        account_name = self.account_info[0]

        try:
            log.step(account_name + " 开始")

            if account_name.isdigit():
                site_id = account_name
            else:
                site_id = get_site_id(account_name)
            if site_id is None:
                log.error(account_name + " 主页无法访问")
                tool.process_exit()

            if not site_id:
                log.error(account_name + " site id解析失败")
                tool.process_exit()

            image_path = os.path.join(IMAGE_DOWNLOAD_PATH, account_name)

            this_account_total_image_count = 0
            post_count = 0
            first_post_id = "0"
            post_time = "2016-11-16 14:12:00"
            is_over = False
            while not is_over:
                # 获取一页的相册信息列表
                post_info_list = get_one_page_post_info_list(site_id, post_time)
                if post_info_list is None:
                    log.error(account_name + " 相册信息列表无法访问")
                    tool.process_exit()

                # 如果为空,表示已经取完了
                if len(post_info_list) == 0:
                    break

                for post_info in post_info_list:
                    if not robot.check_sub_key(("title", "post_id", "published_at", "images"), post_info):
                        log.error(account_name + " 相册信息解析失败:%s" % post_info)
                        continue

                    post_id = str(post_info["post_id"])

                    # 检查信息页id是否小于上次的记录
                    if int(post_id) <= int(self.account_info[1]):
                        is_over = True
                        break

                    # 将第一个信息页的id做为新的存档记录
                    if first_post_id == "0":
                        first_post_id = post_id

                    # 过滤标题中不支持的字符
                    title = robot.filter_text(post_info["title"])
                    if title:
                        post_path = os.path.join(image_path, "%s %s" % (post_id, title))
                    else:
                        post_path = os.path.join(image_path, post_id)
                    if not tool.make_dir(post_path, 0):
                        # 目录出错,把title去掉后再试一次,如果还不行退出
                        log.error(account_name + " 创建相册目录 %s 失败,尝试不使用title" % post_path)
                        post_path = os.path.join(image_path, post_id)
                        if not tool.make_dir(post_path, 0):
                            log.error(account_name + " 创建相册目录 %s 失败" % post_path)
                            tool.process_exit()

                    image_count = 0
                    for image_info in post_info["images"]:
                        image_count += 1
                        if not robot.check_sub_key(("img_id",), image_info):
                            log.error(account_name + " 相册%s 第%s张图片解析失败" % (post_id, image_count))
                            continue
                        image_url = generate_large_image_url(site_id, image_info["img_id"])
                        log.step(account_name + " 相册%s 开始下载第%s张图片 %s" % (post_id, image_count, image_url))

                        file_path = os.path.join(post_path, "%s.jpg" % image_count)
                        if tool.save_net_file(image_url, file_path):
                            log.step(account_name + " 相册%s 第%s张图片下载成功" % (post_id, image_count))
                        else:
                            log.error(account_name + " 相册%s 第%s张图片 %s 下载失败" % (post_info["post_id"], image_count, image_url))
                    this_account_total_image_count += image_count

                    if not is_over:
                        # 达到配置文件中的下载页数,结束
                        if 0 < GET_PAGE_COUNT < post_count:
                            is_over = True
                        else:
                            # 相册发布时间
                            post_time = post_info["published_at"]
                            post_count += 1

            log.step(account_name + " 下载完毕,总共获得%s张图片" % this_account_total_image_count)

            # 新的存档记录
            if first_post_id != "0":
                self.account_info[1] = first_post_id

            # 保存最后的信息
            tool.write_file("\t".join(self.account_info), NEW_SAVE_DATA_PATH)
            self.thread_lock.acquire()
            TOTAL_IMAGE_COUNT += this_account_total_image_count
            ACCOUNTS.remove(account_name)
            self.thread_lock.release()

            log.step(account_name + " 完成")
        except SystemExit, se:
            if se.code == 0:
                log.step(account_name + " 提前退出")
            else:
                log.error(account_name + " 异常退出")