def video_read(video_id, app_id, video_href):
    view_total = ''
    view_likes = ''
    view_dislikes = ''
    comments = ''
    param = {
        'v':video_id,
        }
    url_body = urllib.urlencode(param)
    url = '/%s?%s'%(youtube_root, url_body)
    print '** youtube : %s **'%(url)
    status, body = youtube_http_get(url)
    if status == 404:
        db_app.db_execute_g(db_sql.sql_video_update_404, (str(datetime.now()), 1, app_id, video_href))
        return 
    if status != 200:
        raise Exception('youtube http connection status:%s'%(str(status)))
    soup = BeautifulSoup(body)
    view_total_fa = soup.find_all(name='span', attrs={'class':'watch-view-count'})
    for view_total_f in view_total_fa:
        if view_total_f.strong != None and view_total_f.strong.text != None:
            view_total = view_total_f.strong.text.strip()
    view_likes_fa = soup.find_all(name='span', attrs={'class':'likes'})
    for view_likes_f in view_likes_fa:
        view_likes = view_likes_f.text.strip()
    view_dislikes_fa = soup.find_all(name='span', attrs={'class':'dislikes'})
    for view_dislikes_f in view_dislikes_fa:
        view_dislikes = view_dislikes_f.text.strip()
    comments_fa = soup.find_all(name='span', attrs={'class':'comments-section-stat'})
    for comments_f in comments_fa:
        comments = comments_f.text.replace('(', '').replace(')', '').strip()
    print view_total, view_likes, view_dislikes, comments
    db_app.db_execute_g(db_sql.sql_video_update, (view_total, view_likes, view_dislikes, comments, str(datetime.now()), 1, app_id, video_href))
def review_read_loop(app_id, page_num, review_type, review_sort_order):
    params = {
        'id':app_id, 
        'reviewSortOrder':review_sort_order,
        'reviewType':review_type,
        'pageNum':page_num
        }
    param = urllib.urlencode(params)
    url = '/store/getreviews'
    print param, url
    status, body = android_https_post(url, param)
    if status == 404:
        print '==: 404'
        db_app.db_execute_g(db_sql.sql_review_read_status_update, (app_id, ))
        return status, page_num
    if status != 200:
        print 'app review https connection error: %s'%(str(status))
        return status, page_num
        #raise Exception('app getreview ajax status != 200')
    body = body.lstrip(")]}'").strip()
    try:
        review_read(app_id, body)
        db_app.db_execute_g(db_sql.sql_review_read_update, (page_num, app_id, ))
        page_num = int(page_num) + 1
    except Exception as e:
        err.except_p(e)
    return status, page_num
예제 #3
0
def s_task_review_read_main(client_id, limit=1):
    rows = db_app.db_get_g(db_sql.sql_review_read_get_server_task, (limit, ))
    i_t = len(rows)
    i = 0
    jobs = {}
    for row in rows:
        i = i + 1
        #print '%d of %d'%(i, i_t), 
        app_id = row[0]
        page_num = row[1]
        #page_num = 490
        review_type = row[2]
        review_sort_order = row[3]
        job = {
            'app_id':app_id,
            'pageNum':page_num,
            'review_type':review_type,
            'review_sort_order':review_sort_order
            }
        jobs[i] = job
    print jobs
    for job in jobs:
        j = jobs[job]
        app_id = j['app_id']
        db_app.db_execute_g(db_sql.sql_review_read_update_server_task, (client_id, str(datetime.now()), app_id, ))
    return jobs
def app_read_screenshot(app_id, soup):
    screenshots_fa = soup.find_all(name='div', attrs={'class':'doc-overview-screenshots'})
    for screenshots_f in screenshots_fa:
        screenshot_fa = screenshots_f.find_all(name='img', attrs={'itemprop':'screenshots'})
        for screenshot_f in screenshot_fa:
            if screenshot_f.has_key('src'):
                screenshot = screenshot_f['src'].strip()
                db_app.db_execute_g(db_sql.sql_app_screenshot_insert, (app_id, screenshot))
예제 #5
0
def s_cron_all_review_read():
    #client_id = 'dtc'
    rows = db_app.db_get_g(db_sql.sql_review_read_get_cron_all, (client_id, ))
    for row in rows:
        app_id = row[0]
        #print app_id
        db_app.db_execute_g(db_sql.sql_review_read_update_cron, (str(datetime.now()), app_id, ))
    return len(rows)
def app_read_video(app_id, soup):
    videos_fa = soup.find_all(name='div', attrs={'class':'doc-overview-videos'})
    for videos_f in videos_fa:
        video_fa = videos_f.find_all(name='param', attrs={'name':'movie'})
        for video_f in video_fa:
            if video_f.has_key('value'):
                video = video_f['value'].strip()
                db_app.db_execute_g(db_sql.sql_app_video_insert, (app_id, video))
def review_read(app_id, body):
    j = json.loads(body)
    if j.has_key('htmlContent'):
        contents = j['htmlContent'].strip()
        soup = BeautifulSoup(contents)
        review_fa = soup.find_all(name='div', attrs={'class':'doc-review'})
        for review_f in review_fa:
            review_author = ''
            review_date = ''
            review_device = ''
            review_version = ''
            review_id = ''
            review_rating = ''
            review_title = ''
            review_text = ''
            author_fa = review_f.find_all(name='span', attrs={'class':'doc-review-author'})
            for author_f in author_fa:
                review_author = author_f.strong.text.strip()
            date_fa = review_f.find_all(name='span', attrs={'class':'doc-review-date'})
            for date_f in date_fa:
                review_date = date_f.text.replace('-', '').strip()
                if date_f.next_sibling != None:
                    if type(date_f.next_sibling) != bs4.element.NavigableString:
                        continue
                    device_version = date_f.next_sibling.replace('-', '').strip()
                    device_version = device_version.split('with version')
                    if len(device_version) == 2:
                        review_device = device_version[0].strip()
                        review_version = device_version[1].strip()
                    if len(device_version) == 1:
                        review_device = device_version[0].strip()
            id_fa = review_f.find_all(name='div', attrs={'class':'goog-inline-block review-permalink'})
            for id_f in id_fa:
                if id_f.parent.has_key('href'):
                    review_id = id_f.parent['href'].strip()
                    review_id = urlparse.urlparse(review_id).query
                    review_id = urlparse.parse_qs(review_id)
                    if review_id.has_key('reviewId') and len(review_id['reviewId'])>0:
                        review_id = review_id['reviewId'][0]
                    else:
                        review_id = ''
            rating_fa = review_f.find_all(name='div', attrs={'class':'ratings goog-inline-block'})
            for rating_f in rating_fa:
                if rating_f.has_key('title'):
                    review_rating = rating_f['title'].strip()
                    review_rating = review_rating.split(' ')
                    if len(review_rating) >= 2:
                        review_rating = review_rating[1].strip()
            title_fa = review_f.find_all(name='h4', attrs={'class':'review-title'})
            for title_f in title_fa:
                review_title = title_f.text.strip()
            text_fa = review_f.find_all(name='p', attrs={'class':'review-text'})
            for text_f in text_fa:
                review_text = review_text + text_f.text.strip() + ' '
            if review_id != '':
                db_app.db_execute_g(db_sql.sql_review_insert, (review_id, app_id, review_author, review_date, review_device, review_version, review_title, review_text, review_rating, str(datetime.now()),))
예제 #8
0
def s_sync_review_read_main(client_id, results):
    #client_id = 'dtc'
    #results = c_app_review.c_sync_review_read_main()
    i = 0
    for result in results:
        i = i + 1
        r = results[result]
        app_id = r['app_id']
        page_num = r['pageNum']
        read_status = r['read_status']
        db_app.db_execute_g(db_sql.sql_review_read_update_server_sync, (page_num, read_status, str(datetime.now()), app_id, ))
    return i
def app_read_tab_permission(app_id, soup):
    perm_group_title = ''
    tab_permissions_fa = soup.find_all(name='div', attrs={'class':'doc-specs padded-content2'})
    if len(tab_permissions_fa) <= 0:
        raise Exception('app tab permission len <= 0')
    tab_permissions_fa = tab_permissions_fa[0]
    perm_fa = tab_permissions_fa.find_all(name='li', attrs={'class':'doc-permission-group'})
    for perm_f in perm_fa:
        for pc in perm_f.contents:
            if pc.has_key('class'):
                pcc = pc['class']
                if 'doc-permission-group-title' in pcc:
                    perm_group_title = pc.text.strip()
                if 'doc-permission-description' in pcc:
                    perm_each_desc = pc.text.strip()
                    db_app.db_execute_g(db_sql.sql_app_perm_insert, (app_id, perm_group_title, perm_each_desc))
def app_read_banner(app_id, soup):
    banner_title = ''
    banner_developer_href = ''
    banner_developer_name = ''
    banner_icon_src = ''
    rating_figure = ''
    raters = ''
    price = ''
    banner_title_fa = soup.find_all(name='td', attrs={'class':'doc-banner-title-container'})
    if len(banner_title_fa) == 1:
        banner_title_f = banner_title_fa[0]
        if banner_title_f.h1 != None:
            banner_title = banner_title_f.h1.text
        if banner_title_f.a != None:
            if banner_title_f.a.has_key('href'):
                banner_developer_href = banner_title_f.a['href'].strip()
            if banner_title_f.a.text != None:
                banner_developer_name = banner_title_f.a.text.strip()
    banner_icon_fa = soup.find_all(name='div', attrs={'class':'doc-banner-icon'})
    for banner_icon in banner_icon_fa:
        if banner_icon.img != None:
            if banner_icon.img.has_key('src'):
                banner_icon_src = banner_icon.img['src'].strip()
    banner_annotation_fa = soup.find_all(name='div', attrs={'class':'badges-badge-title goog-inline-block'})
    for banner_annotation in banner_annotation_fa:
        banner_annotation_text = banner_annotation.text.strip()
        db_app.db_execute_g(db_sql.sql_app_awards_insert, (app_id, banner_annotation_text))
    rating_price_fa = soup.find_all(name='td', attrs={'class':'doc-details-ratings-price'})
    if len(rating_price_fa) == 1:
        rating_fa = rating_price_fa[0].find_all(name='div', attrs={'class':'ratings goog-inline-block'})
        for rating_f in rating_fa:
            if rating_f.has_key('title'):
                rating_title = rating_f['title'].strip()
                rating_figure = rating_title.split(' ')
                if len(rating_figure) >= 2:
                    rating_figure = rating_figure[1].strip()
            if rating_f.next_sibling != None:
                raters_f = rating_f.next_sibling
                if raters_f.text != None:
                    raters = raters_f.text
                    raters = raters.replace('(', '').replace(')', '').strip()
        price_fa = rating_price_fa[0].find_all(name='span', attrs={'class':'buy-button-price'})
        for price_f in price_fa:
            price = price_f.text
            price = price.upper().replace('BUY', '').strip()
    db_app.db_execute_g(db_sql.sql_app_banner_update, (banner_title, banner_icon_src, banner_developer_name, banner_developer_href, rating_figure, raters, price, app_id))
def app_read_metadata(app_id, soup):
    meta_update = ''
    meta_current = ''
    meta_require = ''
    meta_install = ''
    meta_size = ''
    meta_category = ''
    meta_rating = ''
    metadata_fa = soup.find_all(name='div', attrs={'class':'doc-metadata'})
    for metadata_f in metadata_fa:
        meta_google_plus_fa = metadata_f.find_all(name='div', attrs={'class':'plus-share-container'})
        for meta_google_plus_f in meta_google_plus_fa:
            if len(meta_google_plus_f.contents)>0 and meta_google_plus_f.contents[0].has_key('href'):
                meta_google_plus_href = meta_google_plus_f.contents[0]['href'].strip()
                db_app.db_execute_g(db_sql.sql_app_google_plus_insert, (app_id, meta_google_plus_href))
        meta_update_fa = metadata_f.find_all(name='dt', text='Updated:')
        if len(meta_update_fa) > 0 and meta_update_fa[0].next_sibling != None:
            meta_update_f = meta_update_fa[0].next_sibling
            if meta_update_f.time != None:
                meta_update = meta_update_f.time.text.strip()
        meta_current_fa = metadata_f.find_all(name='dt', text='Current Version:')
        if len(meta_current_fa) > 0 and meta_current_fa[0].next_sibling != None:
            meta_current_f = meta_current_fa[0].next_sibling
            meta_current = meta_current_f.text.strip()
        meta_require_fa = metadata_f.find_all(name='dt', text='Requires Android:')
        if len(meta_require_fa) > 0 and meta_require_fa[0].next_sibling != None:
            meta_require_f = meta_require_fa[0].next_sibling
            meta_require = meta_require_f.text.strip()
        meta_category_fa = metadata_f.find_all(name='dt', text='Category:')
        if len(meta_category_fa) > 0 and meta_category_fa[0].next_sibling != None:
            meta_category_f = meta_category_fa[0].next_sibling
            meta_category = meta_category_f.text.strip()
        meta_install_fa = metadata_f.find_all(name='dt', text='Installs:')
        if len(meta_install_fa) > 0 and meta_install_fa[0].next_sibling != None:
            meta_install_f = meta_install_fa[0].next_sibling
            meta_install = meta_install_f.text
            meta_install = meta_install.upper().replace('LAST 30 DAYS', '').strip()
        meta_size_fa = metadata_f.find_all(name='dt', text='Size:')
        if len(meta_size_fa) > 0 and meta_size_fa[0].next_sibling != None:
            meta_size_f = meta_size_fa[0].next_sibling
            meta_size = meta_size_f.text.strip()
        meta_rating_fa = metadata_f.find_all(name='dt', text='Content Rating:')
        if len(meta_rating_fa) > 0 and meta_rating_fa[0].next_sibling != None:
            meta_rating_f = meta_rating_fa[0].next_sibling
            meta_rating = meta_rating_f.text.strip()
    db_app.db_execute_g(db_sql.sql_app_metadata_update, (meta_update, meta_current, meta_require, meta_install, meta_size, meta_category, meta_rating, app_id))
예제 #12
0
def s_sync_review_main(client_id, results):
    #client_id = 'dtc'
    #results = c_app_review.c_sync_review_main()
    print client_id
    i = 0
    for result in results:
        i = i + 1
        r = results[result]
        review_id = r['review_id']
        app_id = r['app_id']
        reviewer = r['reviewer']
        date = r['date']
        device = r['device']
        version = r['version']
        title = r['title']
        comment = r['comment']
        review_star = r['review_star']
        db_app.db_execute_g(db_sql.sql_review_insert_server_sync, (review_id, app_id, reviewer, date, device, version, title, comment, review_star, ))
    return i
def s_task_category_read_main():
    client_id = 'dtc'
    limit = 10
    rows = db_zoom.db_get_g(db_sql.sql_zoom_cate_read_get_server, (limit, ))
    i_t = len(rows)
    i = 0
    jobs = {}
    for row in rows:
        i = i + 1
        print '%d of %d'%(i, i_t)
        cate_path = row[0]
        cate_param = row[1]
        print cate_path, cate_param
        job = {'cate_path':cate_path, 'cate_param':cate_path}
        jobs[i] = job
    for j in jobs:
        job = jobs[j]
        cate_path = job['cate_path']
        db_app.db_execute_g(db_sql.sql_zoom_cate_read_update_server_task, (client_id, cate_path, ))
    return jobs
def google_plus_read(app_id, google_plus_href):
    params = {
        'url':google_plus_href,
        }
    param = urllib.urlencode(params)
    url = '/u/0/_/+1/fastbutton?%s'%(param)
    #print param, url
    status, body = plus_https_get(url)
    if status == 404:
        print '==: 404'
        db_app.db_execute_g(db_sql.sql_app_google_plus_update, ('-1', str(datetime.now()), app_id, google_plus_href, ))
        return
    if status != 200:
        raise Exception('app google plus https connection error: %s'%(str(status)))
    soup = BeautifulSoup(body)
    div_fa = soup.find_all(name='div', attrs={'id':'aggregateCount'})
    for div_f in div_fa:
        google_plus_figure = div_f.text.strip()
        print google_plus_figure
        db_app.db_execute_g(db_sql.sql_app_google_plus_update, (google_plus_figure, str(datetime.now()), app_id, google_plus_href, ))
def app_read(app_id):
    try:
        url = '/%s/details?id=%s'%(android_root, app_id)
        print '** app %s **'%(url)
        status, body = android_https_get(url)
        #print status, body
        if status == 404:
            print '== 404'
            db_app.db_execute_g(db_sql.sql_app_read_update, (1, str(datetime.now()), app_id))
            return 
        if status != 200:
            raise Exception('app read https connection error: %s'%(str(status)))
        soup = BeautifulSoup(body)
        app_read_banner(app_id, soup)
        app_read_tab_overview(app_id, soup)
        app_read_tab_review(app_id, soup)
        app_read_tab_permission(app_id, soup)
        db_app.db_execute_g(db_sql.sql_app_read_update, (1, str(datetime.now()), app_id))
        #util.sleep()
    except Exception as e:
        err.except_p(e)
def categories_read_main():
    url = '/'
    print '** categories main %s **'%(url)
    status, body = zoom_http_get(url)
    if status != 200:
        raise Exception('zoom app home http connection errir:%s'%(str(status)))
    soup = BeautifulSoup(body)
    if soup.body.text.strip().find('Access not allowed. If you think this is an error, please contact us at [email protected]') > 0:
        raise Exception('Access not allowed. If you think this is an error, please contact us at [email protected]')
    divs = soup.body.find_all(name='div', attrs={'id':'categories-list'})
    for div in divs:
        for d in div:
            if d.name.strip() != 'div':
                continue
            cate_group_name = d.h3.text.strip()
            ul = d.ul
            for li in ul:
                if li.a != None and li.a.has_key('href'):
                    cate_name = li.a.text.strip()
                    cate_path = li.a['href'].strip()
                    print cate_group_name, cate_name, cate_path
                    db_app.db_execute_g(db_sql.sql_zoom_cate_insert, (cate_group_name, cate_name, cate_path, str(datetime.now())))
def app_read_tab_review(app_id, soup): ## needs to work out
    rating_0 = ''
    rating_1 = ''
    rating_2 = ''
    rating_3 = ''
    rating_4 = ''
    rating_5 = ''
    tab_review = soup.find_all(name='div', attrs={'class':'doc-reviews padded-content2'})
    if len(tab_review) <= 0:
        raise Exception('app tab review len <= 0')
    tab_review = tab_review[0]
    review_head_fa = tab_review.find_all(name='div', attrs={'class':'reviews-heading-container'})
    for review_head_f in review_head_fa:
        user_rating_fa = review_head_f.find_all(name='div', attrs={'class':'user-ratings'})
        if len(user_rating_fa) <= 0:
            return 
            #raise Exception('app tab review user rating len <= 0')
        user_rating_fa = user_rating_fa[0]
        rating_tr_fa = user_rating_fa.find_all(name='span', attrs={'class':'histogram-label'})
        for rating_tr_f in rating_tr_fa:
            rating_star = rating_figure = 'None'
            if rating_tr_f.has_key('data-rating'):
                rating_star = rating_tr_f['data-rating'].strip()
            if rating_tr_f.parent != None and rating_tr_f.parent.next_sibling != None:
                rating_figure = rating_tr_f.parent.next_sibling
                rating_figure = rating_figure.text.strip()
            if rating_star == '0':
                rating_0 = rating_figure
            if rating_star == '1':
                rating_1 = rating_figure
            if rating_star == '2':
                rating_2 = rating_figure
            if rating_star == '3':
                rating_3 = rating_figure
            if rating_star == '4':
                rating_4 = rating_figure
            if rating_star == '5':
                rating_5 = rating_figure
    db_app.db_execute_g(db_sql.sql_app_rating_update, (rating_0, rating_1, rating_2, rating_3, rating_4, rating_5, app_id))
def app_read_overview(app_id, soup):
    desc = ''
    developer_website = ''
    developer_email = ''
    developer_privacy = ''
    overview_fa = soup.find_all(name='div', attrs={'class':'doc-overview'})
    for overview_f in overview_fa:
        desc_fa = overview_f.find_all(name='div', attrs={'id':'doc-original-text'})
        for desc_f in desc_fa:
            desc = desc_f.text.strip()
        developer_website_fa = overview_f.find_all(name='a', text="Visit Developer's Website")
        for developer_website_f in developer_website_fa:
            if developer_website_f.has_key('href'):
                developer_website = developer_website_f['href'].strip()
        developer_email_fa = overview_f.find_all(name='a', text='Email Developer')
        for developer_email_f in developer_email_fa:
            if developer_email_f.has_key('href'):
                developer_email = developer_email_f['href']
                developer_email = developer_email.replace('mailto:', '').strip()
        developer_privacy_fa = overview_f.find_all(name='a', text='Privacy Policy')
        for developer_privacy_f in developer_privacy_fa:
            if developer_privacy_f.has_key('href'):
                developer_privacy = developer_privacy_f['href'].strip()
    db_app.db_execute_g(db_sql.sql_app_overview_update, (desc, developer_website, developer_email, developer_privacy, app_id))
예제 #19
0
def review_read_main_init():
    rows = db_app.db_get_g(db_sql.sql_review_read_app_get, ())
    for row in rows:
        app_id = row[0]
        db_app.db_execute_g(db_sql.sql_review_read_insert, (app_id,))