Ejemplo n.º 1
0
def carwing_content(start, cartoon_id):
    sql = 'SELECT c.chapter_id, chapter_name, chapter_url, ct.cartoon_name from chapter c LEFT JOIN cartoon_title ct on ct.cartoon_id = c.cartoon_id where c.chapter_sort >= '\
          +str(start)+' and c.cartoon_id = '+cartoon_id+' ORDER BY chapter_sort'
    list = selectsql(sql)
    for item in list:
        cartoon_name = item[3]
        chapter_id = item[0]
        chapter_name = item[1].replace('?', '')
        url = baseurl + item[2]
        html = load_url(url)
        title_pattern = r'L2ZpbGV.{20}'
        link = re.search(title_pattern, html).group()
        tag = True
        i = 0
        while i < len(picture_ids):
            if i > 8:
                param = 'deimgtxtimg.js?txtimg=' + link + picture_ids[
                    i] + imglist[1] + '&lid=' + str(i)
            else:
                param = 'deimgtxtimg.js?txtimg=' + link + picture_ids[
                    i] + imglist[0] + '&lid=' + str(i)
            insertSql = 'insert into content (content_url,content_sort,chapter_id) value (\'' + param + '\', ' + str(
                i + 1) + ', ' + str(chapter_id) + ')'
            tag = paqu(picUrl + param,
                       'E:/漫画/' + cartoon_name + "/" + chapter_name, str(i),
                       tag, insertSql)
            # insertsql(insertSql)
            if not tag:
                print('------------------第%d集完成:共%d页--------------' %
                      (start, i))
                break
            i += 1
            print('第%d集:第%d页完成' % (start, i))
        start += 1
Ejemplo n.º 2
0
def paquindex(url, key):
    html = load_url(url)
    name_pattern = r'[te]" id="detxt">.{10,100}</span>'
    url_pattern = r'listde.php\?act=list&aid=[0-9]{2,3}'
    name_list = re.findall(name_pattern, html)
    url_list = re.findall(url_pattern, html)
    i = 0
    while i < len(url_list):
        url_list[i] = url_list[i].replace('"', '')
        name_list[i] = name_list[i][14:].replace('</span>', '')
        name_list[i] = aesDecrypt(key, name_list[i])
        nameArray.append(name_list[i])
        urlArray.append(url_list[i])
        i += 1
Ejemplo n.º 3
0
def crawling_chapter_one(base_url, cartoon_id):
    url = base_url + '0'
    html = load_url(url)
    url_pattern = r'style.php\?act=style&aid=.{3}&cid=[0-9]{4,6}'
    title_pattern = r'detxt\">.{20,100}</span>'
    url_list, title_list = analysis_html(html, url_pattern, title_pattern)
    sql = 'insert into chapter (chapter_name,chapter_url,chapter_sort,project_id,cartoon_id) values'
    base = 1
    t = 0
    while t < len(url_list):
        sql = sql + ' (\'' + title_list[t] + '\',\'' + url_list[
            t] + '\',' + str(t + base) + ',1,' + cartoon_id + '),'
        t += 1
    sql = sql.strip(',')
    insertsql(sql)
    return True