コード例 #1
0
def get_app_details(app_id):
    # print("Getting details for app with id '{}'".format(app_id))
    details = {}
    app_page_url = "https://play.google.com/store/apps/details?id=" + app_id + "&hl=en_GB"
    raw_html = simple_get(app_page_url)
    if not raw_html:
        return app_id, None
    soup = BeautifulSoup(raw_html, 'html.parser')
    title = soup.find("div", {"class": "id-app-title"}).text
    details['title'] = title.strip()
    genre = soup.find("span", {"itemprop": "genre"}).text
    details['genre'] = genre.strip()
    desc = soup.find("div", {"itemprop": "description"}).text
    desc = desc.replace('<br>', '\n')
    details['description'] = desc.strip()
    score = soup.find("div", {"class": "score"})
    details['score'] = score.text.strip() if score else None

    meta_info = soup.findAll("div", {"class": "meta-info"})
    for info in meta_info:
        info_title = info.find("div", {"class": "title"}).text
        info_content = info.find(attrs={"class": "content"}).text
        details[info_title.strip()] = info_content.strip()

    # pprint(details)
    print("App details retrieved for app with id '{}'".format(app_id))

    return app_id, details
コード例 #2
0
def get_html(url):
    print('Getting URL...')
    try:
        raw_html = get_url.simple_get(url)
        print('Retrieved URL!')
    except:
        return
    return raw_html
コード例 #3
0
def get_similar_properties(Zoopla_url):
    house = simple_get(Zoopla_url)
    house_html = BeautifulSoup(house, 'html.parser')

    similar_properties = house_html.find_all("a",
                                             class_="ui-property-card__link",
                                             href=True)

    return similar_properties
コード例 #4
0
def get_apps_on_page(url):
    app_ids = set()
    raw_html = simple_get(url, use_proxies=False)
    soup = BeautifulSoup(raw_html, 'html.parser')
    for a in soup.select('a'):
        signature = "/store/apps/details?id="
        if a['href'].startswith(signature):
            app_id = a['href'][len(signature):]
            app_ids.add(app_id)
    return app_ids
コード例 #5
0
def get_sub_categorys(url):
    sub_categories = {}
    raw_html = simple_get(url, use_proxies=False)
    html = BeautifulSoup(raw_html, 'html.parser')
    data = html.findAll('h2')
    for h2 in data:
        for a in h2.select('a'):
            name = a.text.strip()
            url = a['href']
            if name != "See more" and name != "Recommended for you":
                sub_categories[name] = url
    # pprint(sub_categories)
    return sub_categories
コード例 #6
0
def get_house_views(Zoopla_url):
    '''
    For a given house URL on zoopla will return the number of page hits in the last 30 days
    and the number of page views since the house was listed.
    '''
    house = simple_get(Zoopla_url)
    house_html = BeautifulSoup(house, 'html.parser')

    views = house_html.find_all("span", class_="dp-view-count__value")

    last_30_days = get_int_from_string(views[0].text)
    since_listed = get_int_from_string(views[1].text)

    return last_30_days, since_listed
コード例 #7
0
def get_houses_in_postcode_segment(postcode):
    zoopla_url = "https://www.zoopla.co.uk/for-sale/property/{}/?page_size=100".format(
        postcode)
    #print(zoopla_url)
    houses = BeautifulSoup(simple_get(zoopla_url), 'html.parser')
    house_links = houses.find_all("h2", class_="listing-results-attr")

    totalviews = 0
    c = 0

    for link in house_links:
        url = (link.find('a', href=True)['href'])
        l = "https://www.zoopla.co.uk{}".format(url)
        last_30, all_views = get_house_views(l)
        address = get_address(l)
        #print("{a} has had {v} views in the last 30 days".format(a=address,v=last_30))
        totalviews += last_30
        c = c + 1
    if c > 0:
        avg_views = totalviews / c
    else:
        avg_views = 0
    #print("the average views in the last 30 days for {postcode} is: {a}".format(postcode=postcode, a=avg_views))
    return avg_views
コード例 #8
0
        if str(int(time) - 1173) in ignore_list[str1]:
            return True
    return False


for location in location_list:
    print("============================")
    print(location['loc'] + ':')
    print("============================")
    date = datetime.datetime.now().strftime("%m%%2F%d%%2F%Y")
    for time in location['time']:
        print("----------------------------")
        print("Time: " + time)
        print("----------------------------")
        time = str(int(time) + 1173)
        #print(base_url.format(location['loc'], time, date))

        raw_html = simple_get(base_url.format(location['loc'], time, date))
        html = BeautifulSoup(raw_html, 'html.parser')
        for station in html.find_all(class_="menu__station"):
            station_name = station.find('h2').text
            if not check_ignore_list(station_name, time):
                print(station_name + ":")
                for meal_item in station.find_all("li"):
                    name = meal_item.find(class_="item__name")
                    name_pre = "\t- "
                    if name and not name.find("a"):
                        print(name_pre + name.string.strip())
                    elif name and name.find("a"):
                        print(name_pre + name.find("a").string.strip())
コード例 #9
0
    raw_html = simple_get(url, use_proxies=False)
    soup = BeautifulSoup(raw_html, 'html.parser')
    for a in soup.select('a'):
        signature = "/store/apps/details?id="
        if a['href'].startswith(signature):
            app_id = a['href'][len(signature):]
            app_ids.add(app_id)
    return app_ids


if __name__ == "__main__":
    main_page_url = "https://play.google.com"

    # get main categories
    app_store_page = main_page_url + "/store/apps"
    raw_html = simple_get(app_store_page, use_proxies=False)
    categories = get_app_categories(raw_html)
    pprint(categories)

    # get sub categories (test)
    # url = "https://play.google.com/store/apps/category/COMMUNICATION"
    # sub_cats = get_sub_categorys(url)

    # get all app ids from page (test)
    # url = "https://play.google.com/store/apps/collection/promotion_cp_messaging_apps?clp=SjIKIQobcHJvbW90aW9uX2NwX21lc3NhZ2luZ19hcHBzEAcYAxINQ09NTVVOSUNBVElPTg%3D%3D:S:ANO1ljIyLhU"
    # print(get_apps_on_page(url))

    # get app ids from multiple pages
    all_app_ids = set()
    for category in categories:
        c_url = main_page_url + category
コード例 #10
0
def get_address(Zoopla_url):
    house = simple_get(Zoopla_url)
    house_html = BeautifulSoup(house, 'html.parser')

    return house_html.find_all("h2",
                               class_="ui-property-summary__address")[0].text