def review_read_loop(app_id, page_num, review_type, review_sort_order): params = { 'id':app_id, 'reviewSortOrder':review_sort_order, 'reviewType':review_type, 'pageNum':page_num } param = urllib.urlencode(params) url = '/store/getreviews' print param, url ### only for test, this part #if int(page_num)>2: # db_review.db_execute_g(db_sql.sql_review_read_status_update, (app_id, )) # return 404, page_num ### status, body = android_https_post(url, param) if status == 404: print '==: 404' db_review.db_execute_g(db_sql.sql_review_read_status_update, (app_id, )) return status, page_num if status != 200: print 'app review https connection error: %s'%(str(status)) return status, page_num #raise Exception('app getreview ajax status != 200') body = body.lstrip(")]}'").strip() try: review_read(app_id, body) db_review.db_execute_g(db_sql.sql_review_read_update, (page_num, app_id, )) page_num = int(page_num) + 1 except Exception as e: err.except_p(e) return status, page_num
def main(): db_init() try: categories_read_main() ## comment this if run after first time category_read_main() ## comment this if run after first time app_read_main() except Exception as e: err.except_p(e)
def main(): db_app.db_init() finish = False while finish == False: try: finish = video_read_main() except Exception as e: err.except_p(e)
def app_read_main(): finish = True try: finish = app_read_main_temp() return finish except Exception as e: err.except_p('e') finish = False return finish
def main(): db_init() finish = False ## comment this if run after first time while finish == False: try: categories_read_main() finish = category_read_main() except Exception as e: err.except_p(e)
def main(): db_init() try: language_read_main() ## comment this if run after first time home_read_main() ## comment this if run after first time home_read_max_main() ## comment this if run after first time cate_read_main() cate_link_read_main() except Exception as e: err.except_p(e)
def task_app_review_get(): page = page_get(client_id, 'task', '', '') if page is None: err.except_p('page is none') try: j = json.loads(page) print 'TASK review:', len(j) app_review.c_task_review_read_main(j) except Exception as e: err.except_p(e)
def main(): db_app.db_init() finish = False review_read_main_init() finish = review_read_main() while finish == False: try: finish = review_read_main() except Exception as e: err.except_p(e)
def google_plus_read_main(): finish = True rows = db_app.db_get_g(db_sql.sql_app_google_plus_get, ()) for row in rows: finish = False app_id = row[0] google_plus_href = row[1] try: google_plus_read(app_id, google_plus_href, ) util.sleep() except Exception as e: err.except_p(e)
def category_read_main(): finish = True rows = db_play.db_get_g(db_sql.sql_cate_read_get, ()) for row in rows: finish = False cate_name = row[0] cate_path = row[1] cate_param = row[2] cate_type = row[3] try: category_read(cate_path, cate_name, cate_type, cate_param) db_play.db_execute_g(db_sql.sql_cate_read_update, (cate_name, cate_path, cate_param, cate_type, )) except Exception as e: err.except_p(e) util.sleep() return finish
def cate_link_read_main(): global lib_host_http global lib_conn_http rows = db_lib.db_get_g(db_sql.sql_lib_lang_cate_link_read_get, ()) for row in rows: lang_href = row[0] cate_path = row[1] cate_param = row[2] link_href = row[3] if lib_host_http != lang_href.replace('http://', '').replace('/', '').strip(): lib_host_http = lang_href.replace('http://', '').replace('/', '').strip() lib_conn_http = http.get_conn_http(lib_host_http) try: cate_link_read(link_href, lang_href, cate_path, cate_param) util.sleep_i(1) except Exception as e: err.except_p(e)
def developer_read_store(developer_href, start_num): url = '%s&start=%d&num=12'%(developer_href, start_num) print '** developer %s **'%(url) try: status, body = android_https_get(url) if status == 404: print '== 404' db_developer.db_execute_g(db_sql.sql_developer_store_read_status_update, (developer_href, )) return False if status != 200: raise Exception('app read https connection error: %s'%(str(status))) soup = BeautifulSoup(body) developer_read_store_website(developer_href, soup) developer_read_store_app(developer_href, soup) db_developer.db_execute_g(db_sql.sql_developer_store_start_num_update, (start_num, developer_href,)) ## record this page has been successfully read return True except Exception as e: err.except_p(e) return False
def related_read(app_id): try: url = '/%s/details?id=%s'%(android_root, app_id) print '** related %s **'%(url) status, body = android_https_get(url) #print status, body if status == 404: print '== 404' db_related.db_execute_g(db_sql.sql_related_read_update, (1, str(datetime.now()), app_id)) return if status != 200: raise Exception('related read https connection error: %s'%(str(status))) soup = BeautifulSoup(body) related_view(app_id, soup) related_install(app_id, soup) db_related.db_execute_g(db_sql.sql_related_read_update, (1, str(datetime.now()), app_id)) util.sleep() except Exception as e: err.except_p(e)
def video_read_main(): finish = True rows = db_app.db_get_g(db_sql.sql_video_get, ()) i_t = len(rows) i = 0 for row in rows: i = i + 1 print '%d of %d'%(i, i_t), finish = False app_id = row[0] video_href = row[1] view_total = row[2] video_href_d = video_href.split('/')[-1] video_id = video_href_d.split('?')[0].strip() try: video_read(video_id, app_id, video_href) util.sleep() except Exception as e: err.except_p(e) return finish
def cate_read_main(): global lib_host_http global lib_conn_http rows = db_lib.db_get_g(db_sql.sql_lib_lang_cate_read_get, ()) i_t = len(rows) i = 0 for row in rows: i = i + 1 print '%d of %d'%(i, i_t), lang_href = row[0] cate_path = row[1] cate_param = row[2] if lib_host_http != lang_href.replace('http://', '').replace('/', '').strip(): lib_host_http = lang_href.replace('http://', '').replace('/', '').strip() lib_conn_http = http.get_conn_http(lib_host_http) try: cate_read(lang_href, cate_path, cate_param) util.sleep_i(1) except Exception as e: err.except_p(e)
def home_read_main(): global lib_host_http global lib_conn_http rows = db_lib.db_get_g(db_sql.sql_lib_lang_get, ()) for row in rows: lang_href = row[0] lang_title = row[1] lib_host_http = lang_href.replace('http://', '').replace('/', '').strip() lib_conn_http = http.get_conn_http(lib_host_http) ''' # if working on horizon pc, uncommit this if and commit above two lines. if lib_host_http != lang_href.replace('http://', '').replace('/', '').strip(): lib_host_http = lang_href.replace('http://', '').replace('/', '').strip() lib_conn_http = http.get_conn_http(lib_host_http) ''' try: home_read(lang_href) db_lib.db_execute_g(db_sql.sql_lib_lang_update, (lang_href,)) util.sleep_i(1) except Exception as e: err.except_p(e)
def app_read(app_id): try: url = '/%s/details?id=%s'%(android_root, app_id) print '** app %s **'%(url) status, body = android_https_get(url) #print status, body if status == 404: print '== 404' db_app.db_execute_g(db_sql.sql_app_read_update, (1, str(datetime.now()), app_id)) return if status != 200: raise Exception('app read https connection error: %s'%(str(status))) soup = BeautifulSoup(body) app_read_banner(app_id, soup) app_read_tab_overview(app_id, soup) app_read_tab_review(app_id, soup) app_read_tab_permission(app_id, soup) db_app.db_execute_g(db_sql.sql_app_read_update, (1, str(datetime.now()), app_id)) #util.sleep() except Exception as e: err.except_p(e)
def home_read_max_main(): global lib_host_http global lib_conn_http rows = db_lib.db_get_g(db_sql.sql_lib_lang_cate_get, ()) for row in rows: lang_href = row[0] cate_path = row[1] cate_title = row[2] cate_param_max = row[3] lib_host_http = lang_href.replace('http://', '').replace('/', '').strip() lib_conn_http = http.get_conn_http(lib_host_http) ''' # if working on horizon pc, uncommit this if, otherwise, commit above two lines. if lib_host_http != lang_href.replace('http://', '').replace('/', '').strip(): lib_host_http = lang_href.replace('http://', '').replace('/', '').strip() lib_conn_http = http.get_conn_http(lib_host_http) ''' try: home_read_max(lang_href, cate_path, cate_title) except Exception as e: err.except_p(e) print '======='
def website_read(developer_website, real_href): url = real_href try: opener = urllib2.build_opener() opener.addHeaders = [('User-agent', 'Mozilla/5.1 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/9.0.1')] f = opener.open(url, timeout=20) body = f.read() ''' print body refresh_url = meta_redirect(body) while refresh_url: f = opener.open(url, timeout=5) body = f.read() refresh_url = meta_redirect(body) print refresh_url ''' soup = BeautifulSoup(body) website_twitter(developer_website, soup) website_facebook(developer_website, soup) website_youtube(developer_website, soup) website_google_plus(developer_website, soup) db_developer.db_execute_g(db_sql.sql_developer_website_read_status_update, (developer_website, )) except Exception as e: err.except_p(e)