def NovelCrawl_Main(count): url = r'http://www.yousuu.com/booklist' proxies = fetchproxies() for i in range(count): try: header = get_header() proxy = random.choice(proxies) url = CrawlNovelList(url, header, proxy) except Exception as e: print 'error' print e.message pass sql = 'select distinct novellisturl from pagenovel ' a = MySQLHelper() NovelListUrl = a.SqlFecthAll(sql) a.CloseCon() print len(NovelListUrl) #proxies = fetchproxies() for i in NovelListUrl: try: header = get_header() proxy = random.choice(proxies) print i[0] CrawlNovel(i[0], header, proxy) except Exception as e: print 'error' print e.message pass a = MySQLHelper() sql = 'select distinct novelurl from novelurl' NovelUrl = a.SqlFecthAll(sql) a.CloseCon() print len(NovelUrl) #proxies = fetchproxies() for i in NovelUrl: try: header = get_header() proxy = random.choice(proxies) print i[0] CrawlNovelData(i[0], header, proxy) except Exception as e: print 'error' print e.message pass
def Main_CrawlNovelData(pp): sql = 'select distinct novelurl from novelurl order by rundate desc limit %s offset %s' % ( 300, int(pp) * 300) a = MySQLHelper() NovelUrl = a.SqlFecthAll(sql) a.CloseCon() proxies = fetchproxies(40) for i in NovelUrl: try: header = get_header() proxy = random.choice(proxies) print i[0] CrawlNovelData(i[0], header, proxy) except Exception as e: print 'error' print e.message pass
#coding= utf-8 import re import time import cx_Oracle from MySQLHelper import MySQLHelper a = MySQLHelper() sql = 'select * from fun.noveldata order by rundate desc ' rawdata = a.SqlFecthAll(sql) a.CloseCon() conn = cx_Oracle.connect('system/Syy19930119@localhost:1521/orcl') cursor = conn.cursor() try: cursor.execute('drop table SYY_NOVELDATA') except: pass sql = """ CREATE TABLE system.SYY_NOVELDATA (\ RUNDATE DATE\ ,NOVELURL VARCHAR2(250)\ ,NOVELNAME VARCHAR2(250)\ ,NOVEL_ORGURL VARCHAR2(250)\ ,TAG_CATEGORY VARCHAR2(250)\ ,AUTHOR VARCHAR2(250)\ ,NOVELRANK VARCHAR2(250)\ ,WORLDCOUNT VARCHAR2(250)\ ,SECTIONCOUNT VARCHAR2(250)\