#!/usr/bin/pythom from dytt8.dytt8 import dytt8 t = dytt8(4) print t.list_url() for url in t.http_url(): print url
#!/usr/bin/python #coding=utf-8 import threading from Queue import Queue from dytt8.dytt8 import dytt8 from xunbo.xunbo import xunbo #下面的dy 就是dytt8的这个class的一个引用, dy = dytt8(10) xunbo = xunbo(3) ftp_urls = [] class ThreadUrl(threading.Thread): ''' 封装多线程库,用来多线程跑啊 ''' def __init__(self,queue,site): threading.Thread.__init__(self) self.queue = queue self.site = site #传递的是一个class的实例或者引用 def run(self): while True: try: url = self.queue.get() t = self.site.ftp_url(url) if len(t) > 1: ftp_urls.append(t) except:
#codeing=utf-8 from dytt8.dytt8 import dytt8 import sqlite3 db = sqlite3.connect('./spider.db') link = db.cursor() link.execute( 'slect * from sqlite_master where type ="table" and name="ftp_url";') if not link.fetchone(): link.execute( """CREAT TABLE 'ftp_url'('id' INTEGER PRIMARY KEY NOT NULL,'url' varchar(120) DEFAULT NULL)""" ) db.commit() print("开始爬取") dytt = dytt8(5) # for url in urls: # sql="""INSERT INTO 'ftp_url' value(NULL,'%s');"""%(url) # link.execute(sql) db.commit() db.close()
#!/usr/bin/python #coding=utf-8 import threading from Queue import Queue from dytt8.dytt8 import dytt8 from xunbo.xunbo import xunbo #下面的dy 就是dytt8的这个class的一个引用, dy = dytt8(10) xunbo = xunbo(3) ftp_urls = [] class ThreadUrl(threading.Thread): ''' 封装多线程库,用来多线程跑啊 ''' def __init__(self, queue, site): threading.Thread.__init__(self) self.queue = queue self.site = site #传递的是一个class的实例或者引用 def run(self): while True: try: url = self.queue.get() t = self.site.ftp_url(url) if len(t) > 1: ftp_urls.append(t) except:
#!/usr/bin/python #coding=utf-8 from dytt8.dytt8 import dytt8 from xunbo.xunbo import xunbo from mythread import mythread dytt8 = dytt8(5) #print "开始抓取迅播前三页的电影链接。。。" #xunbo = xunbo(3) print "begin print" ftp_urls = mythread(dytt8) file = open('test.txt','w+') for ftp_url in ftp_urls: file.write((ftp_url + '\n').encode('utf-8')) file.flush() file.close()