Exemple #1
0
def get_basic_info(html_info):
    """
    获取游戏的基本信息
    :param html_info:
    :return: dict
    """
    result = {}

    html = html_info

    if html:
        screenshot_url = ""
        developers = ""
        category = ""
        pname = ""
        download = ""
        install_num = ""
        version = ""
        size = 0
        api_level = ""
        icon_url = ""

        soup = BeautifulSoup(html)
        display_name = soup.find("h1", {"class": "app-name"})
        introduction = soup.find("div", {"class": "brief-long"})
        short_desc = soup.find("span", {"class": "head-content"})
        app_tags = soup.find("div", {"class": "nav"})
        screenshot_info = soup.find("div", {"class": "section-body"})
        star_percent = soup.find("span", {"class": "star-percent"})
        params_download_num = soup.find("span", {"class": "download-num"})
        params_platform = soup.find("span", {"class": "params-platform"})
        data_info = soup.find("a", {"class": "inst-btn-big highspeed"})

        if data_info:
            download = data_info["data_url"]
            data_size = data_info["data_size"]
            data_ver_name = data_info["data_versionname"]
            # data_ver_code = data_info["data_versioncode"]
            data_pkg_name = data_info["data_package"]
            icon_url = data_info["data_icon"]

            size = data_size or 0
            version = data_ver_name
            # version_code = data_ver_code or 0
            pname = data_pkg_name

        # 获取显示名
        if display_name:
            display_name = display_name.text

        # 获取简介
        if introduction:
            # print introduction
            introduction = str(introduction).replace("<br />", "$##$")
            introduction = BeautifulSoup(introduction)
            introduction = introduction.text.replace("$##$", "\n")
            introduction = introduction.replace(u"收起", "")

        # 获取标签类型信息
        if app_tags:
            tags_text = app_tags.text
            if tags_text:
                category = tags_text.split("&gt;")[1]

        # 获取截图url
        if screenshot_info:
            for img in screenshot_info.findAll('img'):
                src = img['src']
                if src:
                    screenshot_url += src + "\n"

        #获取下载地址,版本,安装数和大小
        # if download_info:
        #     download = download_info["href"]
        if params_download_num:
            install_num = params_download_num.text
        if params_platform:
            api_level = params_platform.text

        # 获取评分
        if star_percent:
            star_percent = star_percent.get("style")

        # 获取简短描述
        if short_desc:
            short_desc = short_desc.text

        result["display_name"] = display_name
        result["introduction"] = introduction
        result["screenshot_url"] = screenshot_url
        result["developers"] = developers
        result["category"] = category
        result["icon_url"] = icon_url
        result["pkg_name"] = pname
        result["version"] = version
        result["url1"] = download
        result["language"] = ""
        result["version_code"] = 0
        result["install_num"] = utils.format_install_num(install_num)
        result["size"] = size
        result["min_sdk_version"] = utils.format_android_level(api_level)
        result["star_num"] = utils.format_star_num(star_percent)
        result["short_desc"] = short_desc or ""

    return result
def get_basic_info(html_info):

    """
    获取游戏的基本信息
    :param html_info:
    :return: dict
    """
    result = {}

    html = html_info

    if html:
        soup = BeautifulSoup(html)
        name = soup.find("div", {"class": "det-name-int"})
        detail = soup.find("div", {"class": "det-app-data-info"})
        app_tags = soup.find(id="J_DetCate")
        icon_info = soup.find("div", {"class": "det-icon"})
        screenshot_info = soup.findAll("div", {"class": "pic-img-box"})
        developers_info = soup.findAll("div", {"class": "det-othinfo-data"})
        # download_info = soup.find("a", {"class": "det-down-btn"})
        params_size = soup.find("div", {"class": "det-size"})
        # params_download_num = soup.find("div", {"class": "det-ins-num"})
        version_info = soup.find("div", {"class": "det-othinfo-data"})
        star = soup.find("div", {"class": "com-blue-star-num"})

        display_name = ""
        introduction = ""
        screenshot_url = ""
        category = ""
        pname = ""
        download = ""
        install_num = ""
        language = ""
        size = ""
        version = ""
        version_code = ""
        star_num = ""
        icon_url = ""
        developers = ""

        # 获取显示名
        if name:
            display_name = name.text
            if "v" in display_name:
                display_name = display_name.split("v")[0]

        # 获取简介
        if detail:
            introduction = detail.text

        # 获取标签类型信息
        if app_tags:
            category = app_tags.text

        # 获取icon
        if icon_info:
            icon_info = icon_info.find('img')
            icon_url = icon_info['src']

        # 获取截图url
        if screenshot_info:
            for screen in screenshot_info:
                img = screen.find('img')
                src = img['data-src']
                if src:
                    screenshot_url += src + "\n"

        if developers_info:
            developers = developers_info[len(developers_info)-1].text

        #获取下载地址,版本,安装数和大小
        if params_size:
            size = params_size.text
        if version_info:
            version = version_info.text.replace("V", "")
        if star:
            star_num = star.text

        #获取script游戏包名和下载地址
        script_infos = soup.findAll('script')
        for script in script_infos:
            pkg_infos = script.text
            if "var appDetailData = {" in pkg_infos:
                # print pkg_infos
                search = re.search(r"{([\s\S]*)}", pkg_infos, re.M | re.I)
                # print search.group()
                if search:
                    data = search.group()
                    data = data.replace("orgame", '"orgame"', 1)
                    data = data.replace("apkName", '"apkName"', 1)
                    data = data.replace("apkCode", '"apkCode"', 1)
                    data = data.replace("appId", '"appId"', 1)
                    data = data.replace("appName", '"appName"', 1)
                    data = data.replace("iconUrl", '"iconUrl"', 1)
                    data = data.replace("appScore", '"appScore"', 1)
                    data = data.replace("downTimes", '"downTimes"', 1)
                    data = data.replace("downUrl", '"downUrl"', 1)
                    data = data.replace("tipsUpDown", '"tipsUpDown"', 1)
                    pkg = json.loads(data)
                    download = pkg["downUrl"]
                    pname = pkg["apkName"]
                    install_num = pkg["downTimes"]
                    version_code = pkg['apkCode']

        result["display_name"] = display_name
        result["introduction"] = introduction
        result["screenshot_url"] = screenshot_url
        result["developers"] = developers
        result["category"] = category
        result["icon_url"] = icon_url
        result["version"] = version
        result["pkg_name"] = pname
        result["url1"] = download
        result["language"] = language
        result["version_code"] = version_code
        result["install_num"] = utils.format_install_num(install_num)
        result["size"] = utils.format_file_size(size)
        result["star_num"] = utils.format_star_num(star_num, 2)
        result["min_sdk_version"] = 0

    return result
Exemple #3
0
def get_basic_info(html_info):
    result = {}

    html = html_info
    pname = ""
    download = ""
    install_num = ""
    language = ""
    version = ""
    size = ""
    display_name = ""
    introduction = ""
    api_level = ""

    if html:
        soup = BeautifulSoup(html)
        # print soup
        short_desc = soup.find("meta", {"name": "description"})
        #获取下载地址,版本,安装数和大小
        download_info = soup.find("a", {"class": "s_btn s_btn4"})
        details = soup.find("ul", {"class": "s_info"})
        name = soup.find("h1", {"class": "ff f20 fb fl"})
        introduction_info = soup.find("div", {"class": "o-content"})
        screenshot_info = soup.find(id="lstImges")
        icon_star = soup.find("div", {"class": "s_intro_pic fl"})

        screenshot_url = ""
        developers = ""
        category = ""
        icon_url = ""
        star_num = ""

        if download_info:
            download_info = download_info["href"]
            if download_info:
                download = "http://apk.91.com" + download_info
        if details:
            list_li = details.findAll("li")
            for i, li in enumerate(list_li):
                if i == 0:
                    version = li.text.replace(u"版本:", "").replace(u"历史版本", "")
                if i == 1:
                    install_num = li.text.replace(u"下载次数:", "")
                if i == 2:
                    size = li.text.replace(u"文件大小:", "")
                if i == 3:
                    api_level = li.text
                if i == 8:
                    if li:
                        developers = li.text.replace(u"开发商:", "")
                if i == 9:
                    if li:
                        tags = li.findAll("a")
                        for tag in tags:
                            category += tag.text + "\n"
        if icon_star:
            icon = icon_star.find('img')
            star = icon_star.find('a')
            if icon:
                icon_url = icon['src']
            if star:
                star_num = star['class']

        if introduction_info:
            introduction = introduction_info.text

        if name:
            display_name = name.text

        if screenshot_info:
            for img in screenshot_info.findAll('img'):
                src = img['src']
                if src:
                    screenshot_url += src + u"\n"

        result["pkg_name"] = pname
        result["version"] = version
        result["url1"] = download
        result["language"] = language
        result["display_name"] = display_name
        result["introduction"] = introduction
        result["screenshot_url"] = screenshot_url
        result["developers"] = developers
        result["category"] = category
        result["icon_url"] = icon_url
        result["version_code"] = ""
        result["install_num"] = utils.format_install_num(install_num)
        result["size"] = utils.format_file_size(size)
        result["star_num"] = utils.format_star_num(star_num, 2)
        result["min_sdk_version"] = utils.format_android_level(api_level)
        result["short_desc"] = short_desc.get("content")

    return result
Exemple #4
0
def get_basic_info(html_info):

    """
    获取游戏的基本信息
    :param html_info:
    :return: dict
    """
    result = {}

    html = html_info

    if html:
        screenshot_url = ""
        developers = ""
        category = ""
        pname = ""
        download = ""
        install_num = ""
        version = ""
        size = 0
        api_level = ""
        icon_url = ""

        soup = BeautifulSoup(html)
        display_name = soup.find("h1", {"class": "app-name"})
        introduction = soup.find("div", {"class": "brief-long"})
        short_desc = soup.find("span", {"class": "head-content"})
        app_tags = soup.find("div", {"class": "nav"})
        screenshot_info = soup.find("div", {"class": "section-body"})
        star_percent = soup.find("span", {"class": "star-percent"})
        params_download_num = soup.find("span", {"class": "download-num"})
        params_platform = soup.find("span", {"class": "params-platform"})
        data_info = soup.find("a", {"class": "inst-btn-big highspeed"})

        if data_info:
            download = data_info["data_url"]
            data_size = data_info["data_size"]
            data_ver_name = data_info["data_versionname"]
            # data_ver_code = data_info["data_versioncode"]
            data_pkg_name = data_info["data_package"]
            icon_url = data_info["data_icon"]

            size = data_size or 0
            version = data_ver_name
            # version_code = data_ver_code or 0
            pname = data_pkg_name

        # 获取显示名
        if display_name:
            display_name = display_name.text

        # 获取简介
        if introduction:
            # print introduction
            introduction = str(introduction).replace("<br />", "$##$")
            introduction = BeautifulSoup(introduction)
            introduction = introduction.text.replace("$##$", "\n")
            introduction = introduction.replace(u"收起", "")

        # 获取标签类型信息
        if app_tags:
            tags_text = app_tags.text
            if tags_text:
                category = tags_text.split("&gt;")[1]

        # 获取截图url
        if screenshot_info:
            for img in screenshot_info.findAll('img'):
                src = img['src']
                if src:
                    screenshot_url += src + "\n"

        #获取下载地址,版本,安装数和大小
        # if download_info:
        #     download = download_info["href"]
        if params_download_num:
            install_num = params_download_num.text
        if params_platform:
            api_level = params_platform.text

        # 获取评分
        if star_percent:
            star_percent = star_percent.get("style")

        # 获取简短描述
        if short_desc:
            short_desc = short_desc.text

        result["display_name"] = display_name
        result["introduction"] = introduction
        result["screenshot_url"] = screenshot_url
        result["developers"] = developers
        result["category"] = category
        result["icon_url"] = icon_url
        result["pkg_name"] = pname
        result["version"] = version
        result["url1"] = download
        result["language"] = ""
        result["version_code"] = 0
        result["install_num"] = utils.format_install_num(install_num)
        result["size"] = size
        result["min_sdk_version"] = utils.format_android_level(api_level)
        result["star_num"] = utils.format_star_num(star_percent)
        result["short_desc"] = short_desc or ""

    return result
Exemple #5
0
def get_basic_info(html_info):
    result = {}

    html = html_info
    pname = ""
    download = ""
    language = ""
    version = ""
    size = ""
    display_name = ""
    developer = ""
    category = ""
    icon_url = ""
    star_num = ""
    screenshot_url = ""
    introduction = ""

    if html:
        soup = BeautifulSoup(html)

        #获取下载地址,版本,安装数和大小
        app_info = soup.find("div", {"class": "app-info"})
        download_info = soup.find("a", {"class": "download"})
        details = soup.find("div", {"class": "details preventDefault"})
        introductions = soup.find("p", {"class": "pslide"})
        screenshot_info = soup.find(id="J_thumbnail_wrap")

        if download_info and "http://app.mi.com" not in download_info:
            download = "http://app.mi.com" + download_info['href']

        if details:
            list_li = details.find("ul").findAll("li")
            for index, element in enumerate(list_li):
                if index == 1:
                    size = element.text
                if index == 3:
                    version = element.text
                if index == 7:
                    pname = element.text

        if app_info:
            name = app_info.find("h3")
            icon = app_info.find("img")
            categorys = app_info.find("p", {"class": "special-font action"})
            developers = app_info.findAll("p")
            star = app_info.find("div",
                                 attrs={"class": re.compile(r"star1-hover")})
            if name:
                display_name = name.text
            if icon:
                icon_url = icon['src']
            if categorys:
                category = categorys.text
                if "|" in category:
                    category = category.split("|")
                    category = category[0].replace(u"分类:", "")
            if developers:
                if len(developers) == 2:
                    d = developers[0]
                    developer = d.text
            if star:
                star_nums = star['class']
                nums = star_nums.split('-', 2)
                if len(nums) == 3:
                    star_num = nums[2]

        if screenshot_info:
            screenshot_info = screenshot_info.findAll('img')
            for img in screenshot_info:
                src = img['src']
                if src:
                    screenshot_url += src + "\n"

        if introductions:
            introductions = str(introductions).replace("<br />", "$##$")
            introductions = BeautifulSoup(introductions)
            introduction = introductions.text.replace("$##$", "\n")

        result["pkg_name"] = pname
        result["version"] = version
        result["url1"] = download
        result["language"] = language
        result["display_name"] = display_name
        result["introduction"] = introduction
        result["screenshot_url"] = screenshot_url
        result["developers"] = developer
        result["category"] = category
        result["icon_url"] = icon_url
        result["version_code"] = ""
        result["install_num"] = ""
        result["size"] = utils.format_file_size(size)
        result["star_num"] = utils.format_star_num(star_num)
        result["min_sdk_version"] = 0
        result["short_desc"] = ""

    return result
Exemple #6
0
def get_basic_info(html_info):
    result = {}

    html = html_info
    pname = ""
    download = ""
    install_num = ""
    language = ""
    version = ""
    size = ""
    display_name = ""
    introduction = ""
    api_level = ""

    if html:
        soup = BeautifulSoup(html)
        # print soup
        short_desc = soup.find("meta", {"name": "description"})
        #获取下载地址,版本,安装数和大小
        download_info = soup.find("a", {"class": "s_btn s_btn4"})
        details = soup.find("ul", {"class": "s_info"})
        name = soup.find("h1", {"class": "ff f20 fb fl"})
        introduction_info = soup.find("div", {"class": "o-content"})
        screenshot_info = soup.find(id="lstImges")
        icon_star = soup.find("div", {"class": "s_intro_pic fl"})

        screenshot_url = ""
        developers = ""
        category = ""
        icon_url = ""
        star_num = ""

        if download_info:
            download_info = download_info["href"]
            if download_info:
                download = "http://apk.91.com" + download_info
        if details:
            list_li = details.findAll("li")
            for i, li in enumerate(list_li):
                if i == 0:
                    version = li.text.replace(u"版本:", "").replace(u"历史版本", "")
                if i == 1:
                    install_num = li.text.replace(u"下载次数:", "")
                if i == 2:
                    size = li.text.replace(u"文件大小:", "")
                if i == 3:
                    api_level = li.text
                if i == 8:
                    if li:
                        developers = li.text.replace(u"开发商:", "")
                if i == 9:
                    if li:
                        tags = li.findAll("a")
                        for tag in tags:
                            category += tag.text + "\n"
        if icon_star:
            icon = icon_star.find('img')
            star = icon_star.find('a')
            if icon:
                icon_url = icon['src']
            if star:
                star_num = star['class']

        if introduction_info:
            introduction = introduction_info.text

        if name:
            display_name = name.text

        if screenshot_info:
            for img in screenshot_info.findAll('img'):
                src = img['src']
                if src:
                    screenshot_url += src + u"\n"

        result["pkg_name"] = pname
        result["version"] = version
        result["url1"] = download
        result["language"] = language
        result["display_name"] = display_name
        result["introduction"] = introduction
        result["screenshot_url"] = screenshot_url
        result["developers"] = developers
        result["category"] = category
        result["icon_url"] = icon_url
        result["version_code"] = ""
        result["install_num"] = utils.format_install_num(install_num)
        result["size"] = utils.format_file_size(size)
        result["star_num"] = utils.format_star_num(star_num, 2)
        result["min_sdk_version"] = utils.format_android_level(api_level)
        result["short_desc"] = short_desc.get("content")

    return result
Exemple #7
0
def get_basic_info(html_info):

    """
    获取游戏的基本信息
    :param html_info:
    :return: dict
    """
    result = {}

    html = html_info

    if html:
        soup = BeautifulSoup(html)
        display_name = soup.find(id="app-name")
        introduction = soup.find(id="html-brief")
        app_tags = soup.find("div", {"class": "app-tags"})
        icon_url = soup.find("dt")
        short_desc = soup.find("dl", {"class": "clearfix"})

        screenshot_url = ""
        developers = ""
        category = ""
        pname = ""
        download = ""
        install_num = ""
        language = ""
        version_name = ""
        version_code = 0
        size = 0
        star_num = 0
        min_sdk_version = 0

        # 获取显示名
        if display_name:
            display_name = display_name.find("span").text

        # 如果获取不到介绍信息,尝试第二种方式
        if not introduction:
            introduction = soup.find("div", {"class": "infors"})

        # 获取简介,截图,开发商和类型
        if introduction:

            # 获取截图url
            scroll = introduction.find(id="scrollbar")
            desc = introduction.find("div", {"class": "breif"})
            if scroll:
                imgs = scroll["data-snaps"]
                for img in imgs.split(","):
                    if img and "icon.png" not in img:
                        screenshot_url += img + "\n"
            else:
                for img in introduction.findAll("img"):
                    src = img['src']
                    if src and "icon.png" not in src:
                        screenshot_url += src + "\n"

            if desc:
                desc_str = str(desc).replace("<br />", "$##$")
                desc_str = desc_str.replace("</td>", "$##$</td>")
                desc_str = BeautifulSoup(desc_str)
                introduction = desc_str.text.replace("$##$", "\n").replace("&nbsp;", " ")
                introduction = introduction.replace("versioncode", "\nversioncode")
                introduction = introduction.replace("updatetime", "\nupdatetime")
            else:
                introduction = str(introduction).replace("<br />", "$##$")
                introduction = introduction.replace("</td>", "$##$</td>")
                introduction = introduction.replace("</p>", "$##$</p>")
                introduction = BeautifulSoup(introduction)
                introduction = introduction.text.replace("$##$", "\n").replace("&nbsp;", " ")
                introduction = introduction.replace("versioncode", "\nversioncode")
                introduction = introduction.replace("updatetime", "\nupdatetime")

        #获取游戏包名和下载地址
        script_infos = soup.findAll('script')
        for script in script_infos:
            pkg_infos = script.text
            if "var detail = (function () {" in pkg_infos:
                search = re.search(r"return {([\s\S]*)};", pkg_infos, re.M | re.I)
                # print search
                if search:
                    data = search.group().replace("return", "").replace(";", "").replace("'", "\"")
                    pkg = json.loads(data)
                    pname = pkg['pname']
                    download = pkg['downloadUrl']
                    version_code = pkg['vcode']

        #获取语言,版本,安装数和大小
        pf = soup.find("div", {"class": "pf"})
        basic_info = soup.find("div", {"class": "base-info"})

        if pf:
            pf_s3 = pf.findAll("span", {"class": "s-3"})  # 下载量
            star = pf.find("span", {"class": "s-1 js-votepanel"})  # 评分
            if len(pf_s3) == 2:
                install_num = pf_s3[0].text
                size = pf_s3[1].text
                install_num = install_num.replace(u"下载:", "").replace(u"次", "")
            if star:
                star_num = star.text

        if basic_info:
            infos = basic_info.findAll("td")
            for info in infos:
                text = info.text
                # print text
                if u"作者:" in text:
                    developers = text.replace(u"作者:", "")
                if u"语言" in text:
                    language = text.replace(u"语言:", "")
                if u"版本" in text:
                    search = text.split("versioncode")
                    if len(search) > 0:
                        version_name = search[0].replace(u"版本:", "")
                if u"系统:" in text:
                    min_sdk_version = utils.format_android_level(text)

        # 获取标签类型信息
        if app_tags:
            tags = app_tags.findAll("a")
            for tag in tags:
                if "360" in tag.text:
                    continue
                category += tag.text + "\n"

        if icon_url:
            icon_url = icon_url.find("img")
            icon_url = icon_url["src"]

        # 获取简短描述
        if short_desc:
            short_desc = short_desc.find("p")
            if short_desc:
                short_desc = short_desc.text
                short_desc = short_desc.replace(u"【小编点评】", "")

        result["display_name"] = display_name
        result["introduction"] = introduction
        result["screenshot_url"] = screenshot_url
        result["developers"] = developers
        result["category"] = category
        result["icon_url"] = icon_url
        result["pkg_name"] = pname
        result["version"] = version_name
        result["url1"] = download
        result["language"] = language
        result["version_code"] = version_code
        result["install_num"] = utils.format_install_num(install_num)
        result["size"] = utils.format_file_size(size)
        result["star_num"] = utils.format_star_num(star_num)
        result["min_sdk_version"] = min_sdk_version
        result["short_desc"] = short_desc or ""

    return result
Exemple #8
0
def get_basic_info(html_info):
    result = {}

    html = html_info
    pname = ""
    download = ""
    install_num = ""
    language = ""
    version = ""
    size = ""
    display_name = ""
    category = ""
    icon_url = ""
    star_num = ""
    screenshot_url = ""
    api_level = ""

    if html:
        soup = BeautifulSoup(html)

        #获取下载地址,版本,安装数和大小
        app_version = soup.find(id="appSoftName")
        details = soup.find("div", {"class": "code_box_border"})
        introduction = soup.find(id="softIntroduce")
        screenshot_info = soup.find(id="screenImgUl")
        icon_star = soup.find("div", {"class": "detail_content"})

        if app_version:
            version = app_version.text
            if "(" in version:
                display_name = version.split("(")[0]
            search = re.search(r"\(.*\)", version, re.M | re.I)
            version = search.group().replace("(", "").replace(")", "")

        if details:
            spans = details.findAll("span", {"class": "font14"})
            for i, span in enumerate(spans):
                if i == 1:
                    install_num = span.text
                    if install_num:
                        install_num = install_num.replace(u"热度", "")
                if i == 3:
                    size = span.text
                if i == 4:
                    category = span.text
                if i == 6:
                    language = span.text

            # api_level = details.find("span", {"class": "font14 d_gj_line left"})
            api_level = details.find("span", {"class": "font14 detailMiniSdk d_gj_line left"})
            if api_level:
                    api_level = api_level.text

            download_info = details.find("a", {"class": "link_btn"})
            if download_info:
                download = download_info['href']
                if "appdown" in download:
                    pname_info = download.split("/")
                    download = "http://apk.hiapk.com" + download
                    if len(pname_info) > 2:
                        pname = pname_info[2].split("?")[0]

        if icon_star:
            icon = icon_star.find("img")
            star = icon_star.find("div", attrs={"class": re.compile(r"star_bg  star_m")})
            if icon:
                icon_url = icon['src']
            if star:
                if star:
                    star_num = star['class']

        if screenshot_info:
            screenshot_info = screenshot_info.findAll('img')
            for img in screenshot_info:
                src = img['src']
                if src:
                    screenshot_url += src + "\n"

        if introduction:
            # print introduction
            introduction = introduction.text

        result["pkg_name"] = pname
        result["version"] = version
        result["url1"] = download
        result["language"] = language
        result["display_name"] = display_name
        result["introduction"] = introduction
        result["screenshot_url"] = screenshot_url
        result["developers"] = ""
        result["category"] = category
        result["icon_url"] = icon_url
        result["version_code"] = ""
        result["install_num"] = utils.format_install_num(install_num)
        result["size"] = utils.format_file_size(size)
        result["star_num"] = utils.format_star_num(star_num, 0.2)
        result["min_sdk_version"] = utils.format_android_level(api_level)
        result["short_desc"] = ""

    return result
Exemple #9
0
def get_basic_info(html_info):
    """
    获取游戏的基本信息
    :param html_info:
    :return: dict
    """
    result = {}

    html = html_info

    if html:
        soup = BeautifulSoup(html)
        display_name = soup.find(id="app-name")
        introduction = soup.find(id="html-brief")
        app_tags = soup.find("div", {"class": "app-tags"})
        icon_url = soup.find("dt")
        short_desc = soup.find("dl", {"class": "clearfix"})

        screenshot_url = ""
        developers = ""
        category = ""
        pname = ""
        download = ""
        install_num = ""
        language = ""
        version_name = ""
        version_code = 0
        size = 0
        star_num = 0
        min_sdk_version = 0

        # 获取显示名
        if display_name:
            display_name = display_name.find("span").text

        # 如果获取不到介绍信息,尝试第二种方式
        if not introduction:
            introduction = soup.find("div", {"class": "infors"})

        # 获取简介,截图,开发商和类型
        if introduction:

            # 获取截图url
            scroll = introduction.find(id="scrollbar")
            desc = introduction.find("div", {"class": "breif"})
            if scroll:
                imgs = scroll["data-snaps"]
                for img in imgs.split(","):
                    if img and "icon.png" not in img:
                        screenshot_url += img + "\n"
            else:
                for img in introduction.findAll("img"):
                    src = img['src']
                    if src and "icon.png" not in src:
                        screenshot_url += src + "\n"

            if desc:
                desc_str = str(desc).replace("<br />", "$##$")
                desc_str = desc_str.replace("</td>", "$##$</td>")
                desc_str = BeautifulSoup(desc_str)
                introduction = desc_str.text.replace("$##$", "\n").replace(
                    "&nbsp;", " ")
                introduction = introduction.replace("versioncode",
                                                    "\nversioncode")
                introduction = introduction.replace("updatetime",
                                                    "\nupdatetime")
            else:
                introduction = str(introduction).replace("<br />", "$##$")
                introduction = introduction.replace("</td>", "$##$</td>")
                introduction = introduction.replace("</p>", "$##$</p>")
                introduction = BeautifulSoup(introduction)
                introduction = introduction.text.replace("$##$", "\n").replace(
                    "&nbsp;", " ")
                introduction = introduction.replace("versioncode",
                                                    "\nversioncode")
                introduction = introduction.replace("updatetime",
                                                    "\nupdatetime")

        #获取游戏包名和下载地址
        script_infos = soup.findAll('script')
        for script in script_infos:
            pkg_infos = script.text
            if "var detail = (function () {" in pkg_infos:
                search = re.search(r"return {([\s\S]*)};", pkg_infos,
                                   re.M | re.I)
                # print search
                if search:
                    data = search.group().replace("return", "").replace(
                        ";", "").replace("'", "\"")
                    pkg = json.loads(data)
                    pname = pkg['pname']
                    download = pkg['downloadUrl']
                    version_code = pkg['vcode']

        #获取语言,版本,安装数和大小
        pf = soup.find("div", {"class": "pf"})
        basic_info = soup.find("div", {"class": "base-info"})

        if pf:
            pf_s3 = pf.findAll("span", {"class": "s-3"})  # 下载量
            star = pf.find("span", {"class": "s-1 js-votepanel"})  # 评分
            if len(pf_s3) == 2:
                install_num = pf_s3[0].text
                size = pf_s3[1].text
                install_num = install_num.replace(u"下载:", "").replace(u"次", "")
            if star:
                star_num = star.text

        if basic_info:
            infos = basic_info.findAll("td")
            for info in infos:
                text = info.text
                # print text
                if u"作者:" in text:
                    developers = text.replace(u"作者:", "")
                if u"语言" in text:
                    language = text.replace(u"语言:", "")
                if u"版本" in text:
                    search = text.split("versioncode")
                    if len(search) > 0:
                        version_name = search[0].replace(u"版本:", "")
                if u"系统:" in text:
                    min_sdk_version = utils.format_android_level(text)

        # 获取标签类型信息
        if app_tags:
            tags = app_tags.findAll("a")
            for tag in tags:
                if "360" in tag.text:
                    continue
                category += tag.text + "\n"

        if icon_url:
            icon_url = icon_url.find("img")
            icon_url = icon_url["src"]

        # 获取简短描述
        if short_desc:
            short_desc = short_desc.find("p")
            if short_desc:
                short_desc = short_desc.text
                short_desc = short_desc.replace(u"【小编点评】", "")

        result["display_name"] = display_name
        result["introduction"] = introduction
        result["screenshot_url"] = screenshot_url
        result["developers"] = developers
        result["category"] = category
        result["icon_url"] = icon_url
        result["pkg_name"] = pname
        result["version"] = version_name
        result["url1"] = download
        result["language"] = language
        result["version_code"] = version_code
        result["install_num"] = utils.format_install_num(install_num)
        result["size"] = utils.format_file_size(size)
        result["star_num"] = utils.format_star_num(star_num)
        result["min_sdk_version"] = min_sdk_version
        result["short_desc"] = short_desc or ""

    return result
Exemple #10
0
def get_basic_info(html_info):
    """
    获取游戏的基本信息
    :param html_info:
    :return: dict
    """
    result = {}

    html = html_info

    if html:
        soup = BeautifulSoup(html)
        name = soup.find("div", {"class": "det-name-int"})
        detail = soup.find("div", {"class": "det-app-data-info"})
        app_tags = soup.find(id="J_DetCate")
        icon_info = soup.find("div", {"class": "det-icon"})
        screenshot_info = soup.findAll("div", {"class": "pic-img-box"})
        developers_info = soup.findAll("div", {"class": "det-othinfo-data"})
        # download_info = soup.find("a", {"class": "det-down-btn"})
        params_size = soup.find("div", {"class": "det-size"})
        # params_download_num = soup.find("div", {"class": "det-ins-num"})
        version_info = soup.find("div", {"class": "det-othinfo-data"})
        star = soup.find("div", {"class": "com-blue-star-num"})

        display_name = ""
        introduction = ""
        screenshot_url = ""
        category = ""
        pname = ""
        download = ""
        install_num = ""
        language = ""
        size = ""
        version = ""
        version_code = ""
        star_num = ""
        icon_url = ""
        developers = ""

        # 获取显示名
        if name:
            display_name = name.text
            if "v" in display_name:
                display_name = display_name.split("v")[0]

        # 获取简介
        if detail:
            introduction = detail.text

        # 获取标签类型信息
        if app_tags:
            category = app_tags.text

        # 获取icon
        if icon_info:
            icon_info = icon_info.find('img')
            icon_url = icon_info['src']

        # 获取截图url
        if screenshot_info:
            for screen in screenshot_info:
                img = screen.find('img')
                src = img['data-src']
                if src:
                    screenshot_url += src + "\n"

        if developers_info:
            developers = developers_info[len(developers_info) - 1].text

        #获取下载地址,版本,安装数和大小
        if params_size:
            size = params_size.text
        if version_info:
            version = version_info.text.replace("V", "")
        if star:
            star_num = star.text

        #获取script游戏包名和下载地址
        script_infos = soup.findAll('script')
        for script in script_infos:
            pkg_infos = script.text
            if "var appDetailData = {" in pkg_infos:
                # print pkg_infos
                search = re.search(r"{([\s\S]*)}", pkg_infos, re.M | re.I)
                # print search.group()
                if search:
                    data = search.group()
                    data = data.replace("orgame", '"orgame"', 1)
                    data = data.replace("apkName", '"apkName"', 1)
                    data = data.replace("apkCode", '"apkCode"', 1)
                    data = data.replace("appId", '"appId"', 1)
                    data = data.replace("appName", '"appName"', 1)
                    data = data.replace("iconUrl", '"iconUrl"', 1)
                    data = data.replace("appScore", '"appScore"', 1)
                    data = data.replace("downTimes", '"downTimes"', 1)
                    data = data.replace("downUrl", '"downUrl"', 1)
                    data = data.replace("tipsUpDown", '"tipsUpDown"', 1)
                    pkg = json.loads(data)
                    download = pkg["downUrl"]
                    pname = pkg["apkName"]
                    install_num = pkg["downTimes"]
                    version_code = pkg['apkCode']

        result["display_name"] = display_name
        result["introduction"] = introduction
        result["screenshot_url"] = screenshot_url
        result["developers"] = developers
        result["category"] = category
        result["icon_url"] = icon_url
        result["version"] = version
        result["pkg_name"] = pname
        result["url1"] = download
        result["language"] = language
        result["version_code"] = version_code
        result["install_num"] = utils.format_install_num(install_num)
        result["size"] = utils.format_file_size(size)
        result["star_num"] = utils.format_star_num(star_num, 2)
        result["min_sdk_version"] = 0

    return result
Exemple #11
0
def perpare_data(info, download_url, file_size, ver_code, ver_name, channel,
                 apk_info):
    if not info:
        return
    doc = info["docV2"]
    #if channel == "GG官方":
    market_channel = channel
    if channel == "samsung":
        market_channel = "三星"
    game_name = doc["title"]
    pkg_name = apk_info["pkg_name"]
    game_desc = doc["descriptionHtml"]
    if game_desc:
        game_desc = game_desc.replace("<br>",
                                      "\n").replace("<p>",
                                                    "\n").replace("<p>", "\n")
    app_details = doc['details']['appDetails']
    game_types = app_details["appCategory"][0]
    try:
        game_types = google_game_type[game_types]
    except:
        game_types = '其他'
    downloaded_cnts = doc["details"]["appDetails"]["numDownloads"]
    developer = doc['details']['appDetails']['developerName']
    utils.check_developer(developer)
    game_language = "多国语言"

    screen_shot_urls = ""
    icon_url = ""
    images = doc["image"]
    if images:
        for image in images:
            image_type = image["imageType"]
            image_url = image["imageUrl"]
            if image_type == 4:
                icon_url = image_url
            if image_type == 1:
                screen_shot_urls += image_url + "\n"

    is_crack_apk = 1  # 破解版
    min_sdk = ""
    star_num = doc["aggregateRating"]["starRating"]
    now = str(int(time.time()))

    label_info = dict()
    pkg_info = dict()

    game_name = ftoj(game_name)
    game_desc = ftoj(game_desc)

    #if channel == "GG官方":
    g_name = game_name + u"(%s)" % channel
    if channel == "samsung":
        g_name = game_name + u"(samsung)"
    #g_name = game_name + u"(GG官方)"
    #adrooy
    #g_name = game_name + u"(samsung)"
    game_id = utils.gen_label_info_id(g_name)
    g_name = g_name.replace(u"(GG官方)", "")
    if ver_name:
        ver_name = filter(lambda ch: ch in '0123456789.', ver_name)
    if 'gameid' in apk_info:
        game_id = apk_info['gameid']
    label_info["game_id"] = game_id
    label_info["game_name"] = g_name
    label_info["game_types"] = game_types
    label_info["origin_types"] = game_types
    label_info["screen_shot_urls"] = screen_shot_urls
    label_info["icon_url"] = icon_url
    label_info["detail_desc"] = game_desc
    label_info["star_num"] = utils.format_star_num(str(star_num), 2)
    label_info["download_counts"] = utils.format_install_num(downloaded_cnts)
    label_info["game_language"] = game_language
    label_info["now"] = now
    label_info["file_size"] = file_size
    label_info["ver_name"] = ver_name
    label_info["developer"] = developer

    pkg_info["market_channel"] = market_channel
    pkg_info["game_name"] = g_name
    pkg_info["pkg_name"] = pkg_name
    pkg_info["ver_code"] = ver_code
    pkg_info["ver_name"] = ver_name
    pkg_info["file_size"] = file_size
    pkg_info["download_urls"] = download_url.strip()
    pkg_info["game_desc"] = game_desc
    pkg_info["game_types"] = game_types
    pkg_info["origin_types"] = game_types
    pkg_info["downloaded_cnts"] = utils.format_install_num(downloaded_cnts)
    pkg_info["game_language"] = game_language
    pkg_info["screen_shot_urls"] = screen_shot_urls
    pkg_info["icon_url"] = icon_url
    pkg_info["now"] = now
    pkg_info["is_crack_apk"] = is_crack_apk
    if "ggvercode" not in apk_info:
        apk_info["ggvercode"] = "null"
    apk_id = utils.gen_pkg_info_id(0, pkg_name, ver_name, market_channel,
                                   apk_info["ggvercode"])
    pkg_info["apk_id"] = apk_id
    pkg_info["game_id"] = game_id
    pkg_info[
        "url4details"] = "https://play.google.com/store/apps/details?id=%s" % pkg_name
    #print apk_id, game_id
    #import sys
    #sys.exit()
    return label_info, pkg_info
Exemple #12
0
def get_basic_info(html_info):
    result = {}

    html = html_info
    pname = ""
    download = ""
    language = ""
    version = ""
    size = ""
    display_name = ""
    developer = ""
    category = ""
    icon_url = ""
    star_num = ""
    screenshot_url = ""
    introduction = ""

    if html:
        soup = BeautifulSoup(html)

        #获取下载地址,版本,安装数和大小
        app_info = soup.find("div", {"class": "app-info"})
        download_info = soup.find("a", {"class": "download"})
        details = soup.find("div", {"class": "details preventDefault"})
        introductions = soup.find("p", {"class": "pslide"})
        screenshot_info = soup.find(id="J_thumbnail_wrap")

        if download_info and "http://app.mi.com" not in download_info:
            download = "http://app.mi.com" + download_info['href']

        if details:
            list_li = details.find("ul").findAll("li")
            for index, element in enumerate(list_li):
                if index == 1:
                    size = element.text
                if index == 3:
                    version = element.text
                if index == 7:
                    pname = element.text

        if app_info:
            name = app_info.find("h3")
            icon = app_info.find("img")
            categorys = app_info.find("p", {"class": "special-font action"})
            developers = app_info.findAll("p")
            star = app_info.find("div", attrs={"class": re.compile(r"star1-hover")})
            if name:
                display_name = name.text
            if icon:
                icon_url = icon['src']
            if categorys:
                category = categorys.text
                if "|" in category:
                    category = category.split("|")
                    category = category[0].replace(u"分类:", "")
            if developers:
                if len(developers) == 2:
                    d = developers[0]
                    developer = d.text
            if star:
                star_nums = star['class']
                nums = star_nums.split('-', 2)
                if len(nums) == 3:
                    star_num = nums[2]

        if screenshot_info:
            screenshot_info = screenshot_info.findAll('img')
            for img in screenshot_info:
                src = img['src']
                if src:
                    screenshot_url += src + "\n"

        if introductions:
            introductions = str(introductions).replace("<br />", "$##$")
            introductions = BeautifulSoup(introductions)
            introduction = introductions.text.replace("$##$", "\n")

        result["pkg_name"] = pname
        result["version"] = version
        result["url1"] = download
        result["language"] = language
        result["display_name"] = display_name
        result["introduction"] = introduction
        result["screenshot_url"] = screenshot_url
        result["developers"] = developer
        result["category"] = category
        result["icon_url"] = icon_url
        result["version_code"] = ""
        result["install_num"] = ""
        result["size"] = utils.format_file_size(size)
        result["star_num"] = utils.format_star_num(star_num)
        result["min_sdk_version"] = 0
        result["short_desc"] = ""

    return result
def perpare_data(info, download_url, file_size, ver_code, ver_name, channel, apk_info):
    if not info:
        return
    doc = info["docV2"]
    #if channel == "GG官方":
    market_channel = channel
    if channel == "samsung":
         market_channel = "三星"
    game_name = doc["title"]
    pkg_name = apk_info["pkg_name"]
    game_desc = doc["descriptionHtml"]
    if game_desc:
        game_desc = game_desc.replace("<br>", "\n").replace("<p>", "\n").replace("<p>", "\n")
    app_details = doc['details']['appDetails']
    game_types = app_details["appCategory"][0]
    try:
        game_types = google_game_type[game_types]
    except:
        game_types = '其他'
    downloaded_cnts = doc["details"]["appDetails"]["numDownloads"]
    developer = doc['details']['appDetails']['developerName']
    utils.check_developer(developer)
    game_language = "多国语言"

    screen_shot_urls = ""
    icon_url = ""
    images = doc["image"]
    if images:
        for image in images:
            image_type = image["imageType"]
            image_url = image["imageUrl"]
            if image_type == 4:
                icon_url = image_url
            if image_type == 1:
                screen_shot_urls += image_url + "\n"

    is_crack_apk = 1  # 破解版
    min_sdk = ""
    star_num = doc["aggregateRating"]["starRating"]
    now = str(int(time.time()))

    label_info = dict()
    pkg_info = dict()

    game_name = ftoj(game_name)
    game_desc = ftoj(game_desc)
    
    #if channel == "GG官方":
    g_name = game_name + u"(%s)" % channel
    if channel == "samsung":
          g_name = game_name + u"(samsung)"
    #g_name = game_name + u"(GG官方)"
    #adrooy
    #g_name = game_name + u"(samsung)"
    game_id = utils.gen_label_info_id(g_name)
    g_name = g_name.replace(u"(GG官方)", "")
    if ver_name:
        ver_name = filter(lambda ch: ch in '0123456789.', ver_name)
    if 'gameid' in apk_info:
        game_id = apk_info['gameid']
    label_info["game_id"] = game_id
    label_info["game_name"] = g_name
    label_info["game_types"] = game_types
    label_info["origin_types"] = game_types
    label_info["screen_shot_urls"] = screen_shot_urls
    label_info["icon_url"] = icon_url
    label_info["detail_desc"] = game_desc
    label_info["star_num"] = utils.format_star_num(str(star_num), 2)
    label_info["download_counts"] = utils.format_install_num(downloaded_cnts)
    label_info["game_language"] = game_language
    label_info["now"] = now
    label_info["file_size"] = file_size
    label_info["ver_name"] = ver_name
    label_info["developer"] = developer

    pkg_info["market_channel"] = market_channel
    pkg_info["game_name"] = g_name
    pkg_info["pkg_name"] = pkg_name
    pkg_info["ver_code"] = ver_code
    pkg_info["ver_name"] = ver_name
    pkg_info["file_size"] = file_size
    pkg_info["download_urls"] = download_url.strip()
    pkg_info["game_desc"] = game_desc
    pkg_info["game_types"] = game_types
    pkg_info["origin_types"] = game_types
    pkg_info["downloaded_cnts"] = utils.format_install_num(downloaded_cnts)
    pkg_info["game_language"] = game_language
    pkg_info["screen_shot_urls"] = screen_shot_urls
    pkg_info["icon_url"] = icon_url
    pkg_info["now"] = now
    pkg_info["is_crack_apk"] = is_crack_apk
    if "ggvercode" not in apk_info:
        apk_info["ggvercode"] = "null"
    apk_id = utils.gen_pkg_info_id(0, pkg_name, ver_name, market_channel, apk_info["ggvercode"])
    pkg_info["apk_id"] = apk_id
    pkg_info["game_id"] = game_id
    pkg_info["url4details"] = "https://play.google.com/store/apps/details?id=%s" % pkg_name
    #print apk_id, game_id
    #import sys
    #sys.exit()
    return label_info, pkg_info