def getFtpFileMain(self): while True: ml = MyLog() scanSN = ScanSernum() SN = scanSN.scanMain() snlist = [] if "/" in SN: #Input file with open(SN,'r') as snfile: for asn in snfile.readlines(): if "FDO" in asn: snlist.append(asn.strip()) else: snlist.append(SN) myarea = ScanAreaType() logarea = myarea.askArea() logtype = myarea.asklogType().lower() if "&" in logtype: logtype = "FDO" #Copy mview and mtype else: logtype = logtype[1:] #the logtype finally value is type or view logList = [] for sn in snlist: pcsn = GetPCSN() pcsntuple = pcsn.getMain(sn) childsn = pcsntuple[0] parentsn = pcsntuple[1] self.chDir() getlogpath = GetLogPath() childFolderList = getlogpath.getMain(parentsn,childsn,logarea) input_str = '>>>>Start find SN: %s/%s test log in ftp server'%(childsn, parentsn) ml.info(input_str) print UseStyle(input_str, fore='blue') for childFolder in childFolderList: if "KFCR" in childFolder: KFCRYear = int(childFolder.split('/')[1]) KFCRWeek = int(childFolder.split('/')[3][2:]) #Rename the KFCR log name during copying it to ftp server on 2018/3/2 if KFCRYear <= 2018 and KFCRWeek < 10 or KFCRYear == 2017: logtype = "FDO" #Both mtype and mview will in zip file, eg:FDO2114B0KD_1491454226.zip myftp = LoginFTP() ftp = myftp.ftpserver() ftp.cwd('/') #Enter the FTP top folder ftp.cwd(self.baseFtpFolder) try: ftp.cwd(childFolder) for fileList in ftp.nlst(): if (childsn in fileList or parentsn in fileList) and logtype in fileList: input_str = '--->Copy file:%s to %s'%(fileList,os.getcwd()) print UseStyle(input_str, fore='blue') ml.info(input_str) if not os.path.lexists(fileList): #Log already exist in /usr/auto/testlog mydlfile = DLFtpFile() mydlfile.downloadFile(ftp, fileList) logList.append(fileList) except Exception, err: #The script still running although met error outStr = r"!!!No file in C:/Backup/BU3/%s in FTP server, ERR:%s"%(childFolder,err) print UseStyle(outStr, fore='black', back='yellow') if len(childFolderList) == 1: break finally: pass
def process_item(self, item, spider): m1 = MyLog() cityName = item['cityName'].encode('utf8') img = os.path.basename(item['img']) week = item['week'].encode('utf8') weather = item['weather'].encode('utf8') shidu = item['shidu'].encode('utf8') air = item['air'].encode('utf8') m1.info('进行mysql存储') conn = MySQLdb.connect( host='localhost', port=3306, user='******', password='******', db='scrapyDB', charset='utf8' ) cur = conn.cursor() cur.execute("insert into weather(cityName,img,week,weather,shidu,air) values(%s,%s,%s,%s,%s,%s)", (cityName,img,week,weather,shidu,air)) cur.close() conn.commit() conn.close() m1.info('mysql存储完成') return item
class TestTime(object): def __init__(self): self.log=MyLog() self.testTime() self.testLocaltime() self.testSleep() self.testStrftime() def testTime(self): self.log.info(u'开始测试time.time()函数') print(u'当前时间戳为:time.time()=%f'%time.time()) print(u'这里返回的是一个浮点型的数值,它是从1970纪元后经过的浮点秒数') print('\n') def testLocaltime(self): self.log.info(u'开始测试time.localtime()函数') print(u'当前本地时间为:time.localtime()= %s'%str(time.localtime())) print(u'这里返回的是一个struct_time结构的元组') print('\n') def testSleep(self): self.log.info(u'开始测试time.sleep()函数') print(u'这是个计时器:time.sleep(5)') print(u'闭上眼睛数上5s就可以') time.sleep(5) print('\n') def testStrftime(self): self.log.info(u'开始测试time.strftime()函数') print(u'这个函数返回的是一个格式化的时间') print(u'time.strftime("%%Y-%%m-%%d %%X",time.localtime())= %s'%time.strftime("%Y-%m-%d %X",time.localtime())) print('\n')
class TestTime(object): def __init__(self): self.log = MyLog() self.testTime() self.testLocaltime() self.testSleep() self.testStrftime() def testTime(self): self.log.info(u'開始測試time.time()函數') print(u'現在時間戳為:time.time() = %f' % time.time()) print(u'這裡返回的是一個浮點型的數值,它是從1970紀元後經過的浮點秒數') print('\n') def testLocaltime(self): self.log.info(u'開始測試time.localtime()函數') print(u'現在本地時間為:time.localtime() = %s' % time.localtime()) print(u'這裡返回的是一個struct_time結構的元組') print('\n') def testSleep(self): self.log.info(u'開始測試time.sleep()函數') print(u'這是個計時器:time.sleep(5)') print(u'閉上眼睛數上5秒就可以了') time.sleep(5) print('\n') def testStrftime(self): self.log.info(u'開始測試time.strftime()函數') print(u'這個函數返回的是一個格式化的時間') print('time.strftime("%%Y-%%m-%%d %%X",time.localtime()) = %s' % time.strftime("%Y-%m-%d %X", time.localtime())) print('\n')
def process_item(self, item, spider): m1 = MyLog() today = time.strftime('%Y%m%d', time.localtime()) fileName = 'weather' + today + '.txt' m1.info('同步开始') with open(fileName, 'a') as fp: fp.write(item['cityName'].encode('utf-8') + '\t') fp.write(item['weather'].encode('utf-8') + '\t') imgName = os.path.basename(item['img']) fp.write(imgName + '\t') if os.path.exists(imgName): pass else: with open(imgName, 'wb') as fp: response = urllib2.urlopen(item['img']) fp.write(response.read()) fp.write(item['shidu'].encode('utf-8') + '\t') fp.write(item['air'].encode('utf-8') + '\n\n') time.sleep(1) m1.info('同步结束') return item
#!/usr/bin/env python # -*- coding: utf-8 -*- from myLog import MyLog if __name__ == '__main__': ml = MyLog() ml.debug("I am the debug message") ml.info("I am the info message") ml.warn("I am the warn message") ml.error("I am the error message") ml.critical("I am the critical message")
#!/usr/bin/env python #-*- coding: utf-8 -*- __author__ = 'hstking [email protected]' from myLog import MyLog if __name__ == '__main__': ml = MyLog() ml.debug('I am debug message') ml.info('I am info message') ml.warn('I am warn message') ml.error('I am error message') ml.critical('I am critical message')
class DownloadYinyuetaiMv(object): def __init__(self): clear() self.tip() self.log = MyLog() self.title = 'unknow' self.packageSize = 1024 * 1024 self.mvPlayUrl = self.getMvPlayUrl() def getMvPlayUrl(self): '''获取音乐台mv的播放地址 ''' self.log.info('获取mv的播放地址') self.mvPlayUrl = raw_input( '输入音乐台中MV的播放地址\n如http://v.yinyuetai.com/video/615494:\n') self.checkMvPlayUrl(self.mvPlayUrl) def checkMvPlayUrl(self, url): '''检查输入的mv播放地址是否有效 ''' self.log.info('检查mv播放地址') try: id = url.replace('http://v.yinyuetai.com/video/', '') idNum = int(id) except ValueError: self.log.error('输入的mv播放地址有误,退出程序') res = urllib2.urlopen(url, timeout=5) mat = re.compile(r'<h3 class="fl f18">(.*?)</h3>') self.title = re.findall(mat, res.read())[0] print('MV:%s' % self.title) downUrl = self.getMvDownloadUrl(id) self.downloadMv(downUrl) def getMvDownloadUrl(self, id): '''获取mv的下载地址 ''' self.log.info('获取mv下载地址') url = 'http://www.yinyuetai.com/insite/get-video-info?flex=true&videoId=' + id try: res = urllib2.urlopen(url, timeout=5) except: self.log.error('网页连接错误') mat = re.compile( r'http://h.?.yinyuetai.com/uploads/videos/common/.*?\.flv') urls = re.findall(mat, res.read()) return urls[-1] def downloadMv(self, url): '''开始下载mv ''' fileName = './' + self.title + '.mp4' res = urllib2.urlopen(url, timeout=5) self.log.info('开始下载MV %s' % fileName) rSize = int(dict(res.headers).get('content-length')) t1 = time.time() with open(fileName, 'wb') as fp: st = res.read(self.packageSize) offset = 0 while st: fp.write(st) st = res.read(self.packageSize) offset += len(st) p = multiprocessing.Process(target=self.pLen, args=( fileName, offset, rSize, )) p.start() t2 = time.time() time.sleep(2) print(u'\n下载时间共%ds\n' % (t2 - t1)) def pLen(self, fileName, offset, rSize): if offset < rSize: print('%s\t%dbytes/%dbytes\r' % (fileName, offset, rSize)), time.sleep(1) def tip(self): print('|' + '-' * 40) print('|' + u'这是一个下载音悦台MV的脚本') print('|' + '-' * 40)
class DownloadYinyuetaiMv(object): def __init__(self): clear() self.tip() self.log = MyLog() self.title = 'unknow' self.packageSize = 1024*1024 self.mvPlayUrl = self.getMvPlayUrl() def getMvPlayUrl(self): '''获取音乐台mv的播放地址 ''' self.log.info('获取mv的播放地址') self.mvPlayUrl = raw_input('输入音乐台中MV的播放地址\n如http://v.yinyuetai.com/video/615494:\n') self.checkMvPlayUrl(self.mvPlayUrl) def checkMvPlayUrl(self,url): '''检查输入的mv播放地址是否有效 ''' self.log.info('检查mv播放地址') try: id = url.replace('http://v.yinyuetai.com/video/','') idNum = int(id) except ValueError: self.log.error('输入的mv播放地址有误,退出程序') res = urllib2.urlopen(url,timeout=5) mat = re.compile(r'<h3 class="fl f18">(.*?)</h3>') self.title = re.findall(mat,res.read())[0] print('MV:%s' %self.title) downUrl = self.getMvDownloadUrl(id) self.downloadMv(downUrl) def getMvDownloadUrl(self,id): '''获取mv的下载地址 ''' self.log.info('获取mv下载地址') url = 'http://www.yinyuetai.com/insite/get-video-info?flex=true&videoId=' + id try: res = urllib2.urlopen(url,timeout=5) except: self.log.error('网页连接错误') mat = re.compile(r'http://h.?.yinyuetai.com/uploads/videos/common/.*?\.flv') urls = re.findall(mat,res.read()) return urls[-1] def downloadMv(self,url): '''开始下载mv ''' fileName = './' + self.title + '.mp4' res = urllib2.urlopen(url,timeout=5) self.log.info('开始下载MV %s' %fileName) rSize = int(dict(res.headers).get('content-length')) t1 = time.time() with open(fileName,'wb') as fp: st = res.read(self.packageSize) offset = 0 while st: fp.write(st) st = res.read(self.packageSize) offset += len(st) p = multiprocessing.Process(target=self.pLen,args=(fileName,offset,rSize,)) p.start() t2 = time.time() time.sleep(2) print(u'\n下载时间共%ds\n' %(t2 - t1)) def pLen(self,fileName,offset,rSize): if offset < rSize: print('%s\t%dbytes/%dbytes\r' %(fileName,offset,rSize)), time.sleep(1) def tip(self): print('|' + '-'*40) print('|' + u'这是一个下载音悦台MV的脚本') print('|' + '-'*40)
from myLog import MyLog if __name__ == '__main__': ml = MyLog() ml.debug("1'm a debug message") ml.info("I'm an info message") ml.warn("I'm a warn message") ml.error("I'm an error message") ml.critical("I'm a critical message")
class WBSpider(object): ''' 属性: username:微博的用户名 password:微博的密码 driver:浏览器,默认是PhantomJS ''' def __init__(self, username, password): self.log = MyLog() #获得打印日志对象 self.username = username self.password = password self.driver = webdriver.Chrome() self.driver.implicitly_wait(5) #静静等待10s self.isLogin = 0 self.uid = "" ''' 析构函数 在销毁该类的实例的时候将浏览器关闭。 ''' def __del__(self): self.driver.close() #关闭浏览器 ''' 登录微博的函数 登陆成功则属性isLogin为1,否则为0 ''' def loginWeibo(self): #输入用户名/密码登录 self.driver.get("http://login.sina.com.cn/") self.driver.implicitly_wait(5) elem_user = self.driver.find_element_by_name("username") #找到用户名输入框 elem_user.send_keys(self.username) #传送用户名 #找到密码输入框 elem_pwd = self.driver.find_element_by_name("password") elem_pwd.send_keys(self.password) #传送密码 try: time.sleep(5) elem_pwd.send_keys(Keys.RETURN) #直接传送回车键 time.sleep(2) self.log.info('登陆成功...') self.isLogin = 1 #是否登录的标志 except: self.Log.error("Login Error") self.isLogin = 0 #是否登录的标志 ''' 设置需要爬虫微博主的Uid ''' def setUid(self, Uid): self.uid = Uid ''' 获取微博 PageNum:输入爬取微博的页数 返回:微博的列表 ''' def getWeibo(self, PageNum): total = PageNum #判断不成立的条件 if self.isLogin == 0: self.log.error("没有登录微博!") return if self.uid == "": self.log.error("待爬取的微博主的uid为空,请设置!") return if PageNum < 0: self.log.error("页数设置不合法") return #开始爬取 weiboList = [] url = "http://weibo.com/" + self.uid self.driver.get(url) self.driver.implicitly_wait(5) #爬取名称 self.log.debug("准备访问个人网站....." + str(url)) self.log.info('个人详细信息') #用户id print(u'用户id: ' + self.uid) self.driver.implicitly_wait(5) #昵称 str_name = self.driver.find_element_by_xpath( "//div[@class='pf_username']/h1") name = str_name.text #str_name.text是unicode编码类型 self.log.info("昵称:" + str(name)) self.driver.implicitly_wait(5) try: while (1): #让selenium直接滚动到下一页,用来获取“下一页”按钮 print("正在爬取第" + str(total - PageNum + 1) + "页") next_page = None try: next_page = self.driver.find_element_by_link_text('下一页') except: next_page = None Count = 0 while (next_page is None): try: next_page = self.driver.find_element_by_link_text( '下一页') except: next_page = None Count = Count + 1 print(Count) time.sleep(3) self.driver.execute_script( "window.scrollTo(0, document.body.scrollHeight);") time.sleep(3) if Count == 200: break #获取微博元素 weiboelem = self.driver.find_elements_by_xpath( "//div[@action-type='feed_list_item']/div[@node-type='feed_content']/div[@class='WB_detail']/div[@node-type='feed_list_content']" ) #将微博元素列表转换成字符串并加入到微博列表中 for i in range(len(weiboelem)): weiboList.append(weiboelem[i].text) #获得下一页按钮并点击,此处可能会出现加载不出来下一页按钮的异常 if (next_page is None): break if (PageNum == 0): self.log.info("到达尾页") break #下一页按钮被覆盖,不能clickable ActionChains(self.driver).move_to_element(next_page).click( next_page).perform() next_page.click() Pagenum = Pagenum - 1 self.driver.implicitly_wait(5) except: self.log.error("爬取异常") finally: return weiboList
#!/usr/bin/env python #-*- coding:utf-8 -*- from myLog import MyLog if __name__ == "__main__": ml = MyLog() ml.debug('debug') ml.info('info') ml.warn('warn') ml.error('error') ml.critical('critical')