コード例 #1
def p_num_read(lang_href, menu_href, cate_href, cate_name):
    url = '%s'%(cate_href)
    print '** cate page max p_num %s %s **'%(cate_name, url)
    status, body = lib_http_get(url)
    if status != 200:
        raise Exception('=== p_num_read http error %s '%(str(status)))
    soup = BeautifulSoup(body)
    last_fa = soup.find_all(name='div', attrs={'class':'pg_suiv'})
    if len(last_fa) <= 0:
        raise Exception('p_num_read len(last_fa) <= 0')
    last_ps = []
    last_next = -1
    last_final = -1
    for last_f in last_fa:
        last_a_fa = last_f.find_all(name='a')
        for last_a in last_a_fa:
            if last_a.has_key('href'):
                last_a_href = last_a['href'].strip()
                last_a_qs = urlparse.urlparse(last_a_href).query
                last_a_num = urlparse.parse_qs(last_a_qs)
                if last_a_num.has_key('p') and len(last_a_num['p'])>0:
                    last_p = last_a_num['p'][0]
    if len(last_ps) == 2:
        last_next = last_ps[0]
        last_final = last_ps[1]
    if last_final == -1:
        raise Exception('lib p_num_read last_final == -1')
    for i in range(0, last_final+1):
        db_lib.db_execute_g(db_sql.sql_lib_lang_cate_read_insert, (lang_href, cate_href, i, str(datetime.now())))
    db_lib.db_execute_g(db_sql.sql_lib_lang_cate_update, (last_final, lang_href, menu_href)) # becareful the menu_href here is different to cate_href
    print last_ps, last_next, last_final
コード例 #2
def language_read_main():
    url = '/'
    print '** language option %s **'%(url)
    status, body = lib_http_get(url)
    if status != 200:
        raise Exception('lib lang htp error %s'%(str(status)))
    soup = BeautifulSoup(body)
    divs = soup.find_all(name='div', attrs={'class':'flagt'})
    for div in divs:
        for d in div:
            if d.name == 'a' and d.has_key('href'):
                lang_href = d['href'].strip()
                lang_title = d['title'].strip()
                db_lib.db_execute_g(db_sql.sql_lib_lang_insert, (lang_href, lang_title, str(datetime.now())))
コード例 #3
def home_read(lang_href):
    url = '/' ### looks like proxy needs to have /
    print '** home %s | %s **'%(url, lang_href)
    status, body = lib_http_get(url)
    if status != 200:
        raise Exception('lib home_read http error %s'%str(status))
    soup = BeautifulSoup(body)
    menu_fa = soup.find_all(name='a', attrs={'class':'menulink'})
    if len(menu_fa) != 2:
        raise Exception('lib home_read len(menu_fa) != 2')
    for menu_f in menu_fa:
        if menu_f.has_key('href'):
            menu_href = menu_f['href'].strip()
            menu_name = menu_f.text.strip()
            #print menu_name, menu_href
            db_lib.db_execute_g(db_sql.sql_lib_lang_cate_insert, (lang_href, menu_href, menu_name, str(datetime.now())))
コード例 #4
def cate_link_read(link_href, lang_href, cate_path, cate_param):
    url = link_href
    print '** lib cate_link_read %s **'%(url)
    status, body = lib_http_get(url)
    if status != 200:
        raise Exception('lib cate_read http error %s'%(str(status)))
    ### edit bellow
    soup = BeautifulSoup(body)
    div_fa = soup.find_all(name='div', attrs={'class':'ui-widget-content ui-corner-all assetextra'})
    for div_f in div_fa:
        b_fa = div_f.find_all(name='b')
        if len(b_fa)>3:
            b_f = b_fa[2]
            app_id = b_f.text.strip()
            db_lib.db_execute_g(db_sql.sql_app_insert, (app_id,))
            db_lib.db_execute_g(db_sql.sql_lib_lang_cate_link_read_update, (app_id, lang_href, cate_path, cate_param))
            print app_id
コード例 #5
def cate_read(lang_href, cate_path, cate_param):
    url = '%s?p=%s'%(cate_path, cate_param)
    print '** lib cate_read %s **'%(url)
    status, body = lib_http_get(url)
    if status != 200:
        raise Exception('lib cate_read http error %s'%(str(status)))
    soup = BeautifulSoup(body)
    a_fa = soup.find_all(name='a', attrs={'class':'asLsttitle'})
    for a_f in a_fa:
        if a_f.has_key('href'):
            link_href = a_f['href'].strip()
            link_name = a_f.text.strip()
            link_id = '0'
            if a_f.has_key('id'):
                link_id = a_f['id'].strip()
            #print link_href, link_name
            db_lib.db_execute_g(db_sql.sql_lib_lang_cate_link_read_insert, (lang_href, cate_path, cate_param, link_href, link_name, link_id))
    db_lib.db_execute_g(db_sql.sql_lib_lang_cate_read_update, (lang_href, cate_path, cate_param))
コード例 #6
def home_read_main():
    global lib_host_http
    global lib_conn_http
    rows = db_lib.db_get_g(db_sql.sql_lib_lang_get, ())
    for row in rows:
        lang_href = row[0]
        lang_title = row[1]
        lib_host_http = lang_href.replace('http://', '').replace('/', '').strip()
        lib_conn_http = http.get_conn_http(lib_host_http)
        ''' # if working on horizon pc, uncommit this if and commit above two lines. 
        if lib_host_http != lang_href.replace('http://', '').replace('/', '').strip():
            lib_host_http = lang_href.replace('http://', '').replace('/', '').strip()
            lib_conn_http = http.get_conn_http(lib_host_http)
            db_lib.db_execute_g(db_sql.sql_lib_lang_update, (lang_href,))
        except Exception as e: