def home_read_max(lang_href, menu_href, menu_name):        
    url = menu_href
    print '** home cate max %s %s **'%(menu_name, url)
    status, body = lib_http_get(url)
    if status != 200:
        raise Exception('lib home_read_sub http error %s'%(str(status)))
    soup = BeautifulSoup(body)
    div_fa = soup.find_all(name='div', attrs={'class':'itmCatLstContent'})
    if len(div_fa) > 0:
        div_f = div_fa[0]
        if div_f.a != None and div_f.a.has_key('href'):
            cate_href = div_f.a['href'].strip()
            cate_name = div_f.a.text.strip()
            p_num_read(lang_href, menu_href, cate_href, cate_name)
            util.sleep_i(1)
def cate_link_read_main():
    global lib_host_http
    global lib_conn_http
    rows = db_lib.db_get_g(db_sql.sql_lib_lang_cate_link_read_get, ())
    for row in rows:
        lang_href = row[0]
        cate_path = row[1]
        cate_param = row[2]
        link_href = row[3]
        if lib_host_http != lang_href.replace('http://', '').replace('/', '').strip():
            lib_host_http = lang_href.replace('http://', '').replace('/', '').strip()
            lib_conn_http = http.get_conn_http(lib_host_http)
        try:
            cate_link_read(link_href, lang_href, cate_path, cate_param)
            util.sleep_i(1)
        except Exception as e:
            err.except_p(e)
def home_read_main():
    global lib_host_http
    global lib_conn_http
    rows = db_lib.db_get_g(db_sql.sql_lib_lang_get, ())
    for row in rows:
        lang_href = row[0]
        lang_title = row[1]
        lib_host_http = lang_href.replace('http://', '').replace('/', '').strip()
        lib_conn_http = http.get_conn_http(lib_host_http)
        ''' # if working on horizon pc, uncommit this if and commit above two lines. 
        if lib_host_http != lang_href.replace('http://', '').replace('/', '').strip():
            lib_host_http = lang_href.replace('http://', '').replace('/', '').strip()
            lib_conn_http = http.get_conn_http(lib_host_http)
        '''
        try:
            home_read(lang_href)
            db_lib.db_execute_g(db_sql.sql_lib_lang_update, (lang_href,))
            util.sleep_i(1)
        except Exception as e:
            err.except_p(e)
def cate_read_main():
    global lib_host_http
    global lib_conn_http
    rows = db_lib.db_get_g(db_sql.sql_lib_lang_cate_read_get, ())
    i_t = len(rows)
    i = 0
    for row in rows:
        i = i + 1
        print '%d of %d'%(i, i_t), 
        lang_href = row[0]
        cate_path = row[1]
        cate_param = row[2]
        if lib_host_http != lang_href.replace('http://', '').replace('/', '').strip():
            lib_host_http = lang_href.replace('http://', '').replace('/', '').strip()
            lib_conn_http = http.get_conn_http(lib_host_http)
        try:
            cate_read(lang_href, cate_path, cate_param)
            util.sleep_i(1)
        except Exception as e:
            err.except_p(e)
def category_read(cate_name, cate_path, cate_param):
    #cate_param = 16270
    status = 200
    while status == 200:
        url = '%s/?p=%s'%(cate_path, cate_param)
        #url = '%s/?p=%s&nav=halloween'%(cate_path, cate_param) # for special cases
        print '** zoom category %s %s **'%(cate_param, cate_path)
        status, body = zoom_http_get(url)
        if status == 404:
            print '==: %s '%(str(status))
            db_zoom.db_execute_g(db_sql.sql_zoom_cate_read_param_update, (cate_param, cate_path, ))
            db_zoom.db_execute_g(db_sql.sql_zoom_cate_read_update, (cate_path, ))
            break
        if status != 200:
            raise Exception('zoom app category https connection error: %s'%(str(status)))
        soup = BeautifulSoup(body)
        if soup.body.text.strip().find('Access not allowed. If you think this is an error, please contact us at [email protected]') > 0:
            raise Exception('Access not allowed. If you think this is an error, please contact us at [email protected]')
        ul_fa = soup.find_all(name='ul', attrs={'id':'apps-list'})
        for li_f in ul_fa:
            a_fa = li_f.find_all(name='a', attrs={'class':'goTo'})
            for a_f in a_fa:
                if a_f.has_key('href'):
                    a_href = a_f['href'].strip()
                    a_title = a_f.text.strip()
                    db_zoom.db_execute_g(db_sql.sql_zoom_app_insert, (a_title, a_href, ))
        db_zoom.db_execute_g(db_sql.sql_zoom_cate_read_param_update, (cate_param, cate_path, ))
        # 16290
        finish = True
        next_fa = soup.find_all(name='li', attrs={'class':'next'})
        for next_f in next_fa:
            finish = False
        if finish == True:
            print '== no next'
            db_zoom.db_execute_g(db_sql.sql_zoom_cate_read_update, (cate_path, ))
            break
        cate_param = str(int(cate_param)+10)
        # update cate_param
        util.sleep_i(10)
        #break
    print cate_path
def app_read_main():
    finish = True
    rows = db_zoom.db_get_g(db_sql.sql_zoom_app_get, ())
    i_t = len(rows)
    i = 0
    for row in rows:
        i = i + 1
        print '%d of %d'%(i, i_t), 
        finish = False
        app_name = row[0]
        app_path = row[1]
        app_id = row[2]
        app_read_status = row[3]
        url = app_path.replace('.html', '_download.html').strip()
        #url = app_path.replace('.html', '_download.html?nav=halloween').strip()
        print '** zoom app %s **'%(url)
        status, body = zoom_http_get(url)
        if status == 404:
            print '== 404'
            db_zoom.db_execute_g(db_sql.sql_zoom_app_update, ('', app_path, ))
            continue
        if status != 200:
            print 'exception'
            continue
        soup = BeautifulSoup(body)
        if soup.body.text.strip().find('Access not allowed. If you think this is an error, please contact us at [email protected]') > 0:
            raise Exception('Access not allowed. If you think this is an error, please contact us at [email protected]')
        ### from here
        app_id = None
        divs_fa = soup.find_all(name='span', attrs={'class':'package'})
        for divs_f in divs_fa:
            app_id = divs_f.text.replace('Package ', '').strip()
            print app_id
            db_zoom.db_execute_g(db_sql.sql_app_insert, (app_id, ))
        db_zoom.db_execute_g(db_sql.sql_zoom_app_update, (app_id, app_path))
        util.sleep_i(10)
    return finish
    db_app.db_init()
    print '** start db_merge **'
    db_app.db_merge(db_play.db_path, db_app.db_path)
    print
    db_app.db_merge(db_lib.db_path, db_app.db_path)
    print 
    db_app.db_merge(db_zoom.db_path, db_app.db_path)
    print 
    print '** end db_merge **'

def app_read():
    app_read_overview.main()
    app_read_youtube.main()
    app_read_google_plus.main()
    #app_read_review.main()

def cate_read():
    cate_read_google_play.main()
    cate_read_android_zoom.main()
    cate_read_androlib.main()
    

if __name__ == '__main__':
    #main()
    for i in range(1, 3):
        main()
        util.sleep_i(10)
        print '==== main %d ====='%(i)