def check(self): config = self.setConfig() aheaders = {'Host':'m.twfanti.com'} #aparams = { 'sort':'desc'} CW = self.Sec['check word'] if not crab.Config.has_section(self.secName) : self.logging ('\t未定義 {} Config\r\n'.format (self.secName)) return 2 #if len(self.Sec['keyword']) > 20: url = self.Sec['Href'] #--end if try : res = MYSITE (url , headers = aheaders) tempD = res.soup.select('span')[1] chkD = str(tempD.text.strip()) if chkD == CW : self.logging('\t{0} {1} 未有更新\n'.format(self.Sec['Name'],chkD)) return 0 else : msg = '{0} 發佈更新 {1}\n'.format(self.Sec['Name'],chkD) A = Book1(config) A.getChapters() bkhref = config['bsurl']+config['sUrl'].format(str(config['startChptr'])) self.notify('{0} 發佈更新 [{1}]({2})\n#小說更新 #{3}\n'.format(self.Sec['Name'],chkD, bkhref , config['title'])) self.logging('\t{}'.format(msg)) self.Sec['check word'] = chkD self.Sec['Last Update'] = self.chk_date except Exception as e: print (e) self.logging (str(e)) self.logging ('\t{}網站連結失敗\r\n'.format(self.secName)) finally : pass
def check(self): if not crab.Config.has_section(self.secName): self.logging('\t未定義 {} Config\r\n'.format(self.secName)) return 2 url = 'http://{}/{}'.format(self.Sec['Site'], self.Sec['Page']) status = self.Sec['Site Status'] try: res = MYSITE(url) tempD = res.soup.select( self.Sec['Crab word'])[self.Sec.getint('Crab raw')] chkD = tempD.text self.Sec['Site Status'] = 'OK' except: self.logging('\t網站連結失敗\r\n') self.Sec['Site Status'] = 'Error' finally: chkName = self.Sec['Name'] if self.Sec['Site Status'] == 'Error': #若網站服務異常 # if status == 'OK' : #若上次檢查時是好的 msgtxt = '{}網站服務異常'.format(self.secName) self.logging('\t{}\r\n'.format(msgtxt)) # self.notify (msgtxt) del res return 1 else: #網站服務正常 if status == 'Error': #若上次檢查時是異常 msgtxt = '{}網站服務恢復'.format(self.secName) #列印日誌 self.logging('\t{}\r\n'.format(msgtxt)) # self.notify (msgtxt) #找 <fieldset> uri = self.Sec['Last Uri'] DD = chkD keyVal = self.Sec['check word'] LastUpdate = self.Sec['Last Update'] if keyVal == DD: #無更新 pass else: #有更新 # 取得連結 msgtxt = '{}{}已更新'.format(chkName, DD) self.logging('\t{}\r\n'.format(msgtxt)) self.Sec['check word'] = DD self.Sec['Last Update'] = self.chk_date self.notify(msgtxt) del res return 0
def check(self): aheaders = { 'Cookie': '__cfduid=daf56fbfbdd23f35e5d37891a102af8c11548041550; Hm_lvt_dfa59ae97c988b755b7dfc45bbf122ae=1569204591,1569206478; ftwwwacgsoucom=1; Hm_lpvt_dfa59ae97c988b755b7dfc45bbf122ae=1569206486', 'Host': 'www.36dm.club', 'Upgrade-Insecure-Requests': '1' } aparams = { 'bound': 'content', 'local': '1', 'keyword': '', 'sort_id': '0', 'field': 'title', 'node_id': '0', 'external': 'google' } CW = 'table#listTable tr.alt1 td' url = 'https://www.36dm.club/' if not crab.Config.has_section(self.secName): self.logging('\t未定義 {} Config\r\n'.format(self.secName)) return 2 #if len(self.Sec['keyword']) > 20: url = url + 'search.php' aparams.clear() sleep(15) #--end if aparams['keyword'] = self.Sec['keyword'] Last_Update = self.Sec['Last Update'] try: res = MYSITE(url, headers=aheaders, params=aparams) tempD = res.soup.select(CW)[0] chkD = str(tempD.text) if chkD == Last_Update: self.logging('\t自{0}後,{1} 未有更新\n'.format( chkD, self.Sec['Name'])) return 0 else: msg = '{0} 於 {1} 發佈更新\n'.format(self.Sec['Name'], chkD) self.Sec['Last Update'] = chkD self.notify(msg + '\n#影片發佈\n') self.logging('\t{}'.format(msg)) except Exception as e: self.logging('\t{}網站連結失敗\r\n'.format(self.secName)) finally: pass
def check(self): if not crab.Config.has_section(self.secName): self.logging('\t未定義 {} Config\r\n'.format(self.secName)) return 2 url = 'http://{}/{}'.format(self.Sec['Site'], self.Sec['Page']) url_ = 'http://{}/vidlist/{}'.format( self.Sec['Site'], self.Sec['Page'].split('btdy/dy')[1]) status = self.Sec['Site Status'] try: res = MYSITE(url) tempD = res.soup.select( self.Sec['Crab word'])[self.Sec.getint('Crab raw')] chkD = tempD.text self.Sec['Site Status'] = 'OK' except: self.logging('\t網站連結失敗\r\n') self.Sec['Site Status'] = 'Error' finally: chkName = self.Sec['Name'] if self.Sec['Site Status'] == 'Error': #若網站服務異常 # if status == 'OK' : #若上次檢查時是好的 msgtxt = '{}網站服務異常'.format(self.secName) self.logging('\t{}\r\n'.format(msgtxt)) # self.notify (msgtxt) del res return 1 else: #網站服務正常 if status == 'Error': #若上次檢查時是異常 msgtxt = '{}網站服務恢復'.format(self.secName) #列印日誌 self.logging('\t{}\r\n'.format(msgtxt)) # self.notify (msgtxt) #找 <fieldset> uri = self.Sec['Last Uri'] DD = chkD keyVal = self.Sec['check word'] LastUpdate = self.Sec['Last Update'] with MyDS() as MDS: MDS.CONNECT(crab.dsm) if keyVal == DD: #無更新 if self.Sec['DL Status'] == 'downloading': #上次仍在下載中 if MDS.SID != '': #Download Station 己連上 crab.lock.acquire() with HQCDB(crab.DBFile, self.secName) as tempDB: tasks = tempDB.GetNotFinished() if tasks != []: #處理未完成更新的工作 for task in tasks: #取得工作的狀態 info = MDS.GetInfo(task[0]) if info['success']: DL_status = info['data'][ 'tasks'][0]['status'] #DL_title = info['data']['tasks'][0]['title'] DL_uri = info['data']['tasks'][ 0]['additional']['detail'][ 'uri'] if (task[1] != 'Down') and ( task[1] != DL_status): #若狀態有變 #更新狀態 tempDB.Update([ task[0], DL_uri, DL_status ]) else: tempDB.UpdateStatus( [task[0], 'Down']) #重新取回所有工作狀態 tasks = tempDB.GetNotFinished() if tasks == []: #全部下載完成 self.Sec['DL Status'] = 'finished' msgtxt = '{}{} 已下載完成'.format( chkName, DD) # self.notify (msgtxt) for task in tempDB.GetTasks().keys(): tempDB.UpdateStatus([task, 'Down']) MDS.Delete(task) #列印日誌 msgtxt = ('\t{}\r\n'.format(msgtxt)) else: #下載中 #列印日誌 msgtxt = ( '\t{}{} 還有{}個下載排程\r\n'.format( chkName, DD, len(tasks))) del tempDB crab.lock.release() self.logging(msgtxt) else: #無法連上Download Station self.logging('\t無法連結 NAS\r\n') else: #上次已完成下載 #列印日誌 self.logging('\t自 {} 後,{} 沒有更新\r\n'.format( LastUpdate, chkName + DD)) else: #有更新 # 取得連結 Files = [] res.get(url_) tempf = res.soup.select('div.p_list li span a.d1') tempt = res.soup.select('div.p_list li a.ico_1') for i in range(len(tempf)): Files.append( [tempf[i]['href'], tempt[i]['title'].strip()]) # for file in res.soup.select('div.p_list li span a.d1'): # Files.append(file['href']) if Files != []: crab.lock.acquire() with HQCDB(crab.DBFile, self.secName) as tempDB: if keyVal == '': tempDB.ClearDB() tempcount = 0 for File in Files: uri = File[0] title = File[1].strip() if not tempDB.IsExistFile(uri): if MDS.AddTask( uri=uri, des=self.Sec['destination']): tempcount += 1 temptask = MDS.List( )['data']['tasks'][-1] tempDB.AddFile([ uri, temptask['id'], temptask['status'], title ]) if tempcount > 0: msgtxt = '{}{}已新增 {} 個下載排程'.format( chkName, DD, tempcount) self.Sec['DL Status'] = 'downloading' else: msgtxt = '{}{}已更新,但無新增下載連結'.format(chkName, DD) del tempDB crab.lock.release() self.logging('\t{}\r\n'.format(msgtxt)) self.Sec['check word'] = DD self.Sec['Last Update'] = self.chk_date self.Sec['Last Uri'] = uri self.notify(msgtxt) else: msgtxt = '網頁服務異常' del Files del res del MDS return 0
def check (self): if not crab.Config.has_section(self.secName) : self.logging('\t未定義 {} Config\r\n'.format (self.secName)) return 2 hd= { 'Host': 'www.gscq.me' ,'Upgrade-Insecure-Requests' : '1' ,'Cookie' :'bbs_token=RVz5QEmUkfLAIfn8RPO4egxAtbgGslmQr0Kc0M17Uz1aLZtm; bbs_sid=s1lemcj23bsbhgg9tfbt705r92' } url = 'https://{}/{}'.format(self.Sec['Site'],self.Sec['Page']) status = self.Sec['Site Status'] try : with MYSITE (url,headers = hd) as res : tempD = res.soup.select (self.Sec['Crab word'])[self.Sec.getint('Crab raw')] chkD = str(tempD) #找 <fieldset> FileSets = res.soup.find_all ('fieldset') self.headers = res.headers.copy() uri = self.Sec['Last Uri'] self.Sec['Site Status'] = 'OK' except : self.logging ('\t網站連結失敗\r\n') self.Sec['Site Status'] = 'Error' finally : chkName = self.Sec['Name'] if self.Sec['Site Status'] == 'Error' : #若網站服務異常 # if status == 'OK' : #若上次檢查時是好的 try : msgtxt = '{}網站服務異常'.format(self.secName) self.logging ('\t{}\r\n'.format(msgtxt)) # self.notify (msgtxt) del res except : pass finally : return 1 else : #網站服務正常 if status == 'Error': #若上次檢查時是異常 msgtxt = '{}網站服務恢復'.format(self.secName) #列印日誌 self.logging ('\t{}\r\n'.format(msgtxt)) # self.notify (msgtxt) SLD = self.Sec['Section Left Delimiter'] SRD = self.Sec['Section Right Delimiter'] tempDD = chkD.split(SLD)[self.Sec.getint('check Section')] DD = '{}{}{}'.format(SLD,tempDD.split(SRD)[0],SRD) keyVal = self.Sec['check word'] LastUpdate = self.Sec['Last Update'] with MyDS() as MDS: MDS.CONNECT(crab.dsm) if keyVal == DD : #無更新 if self.Sec['DL Status'] == 'downloading' : #上次仍在下載中 if MDS.SID != '' : #Download Station 己連上 crab.lock.acquire() with HQCDB(crab.DBFile,self.secName) as tempDB: tasks = tempDB.GetNotFinished() if tasks != [] : #處理未完成更新的工作 for task in tasks: #取得工作的狀態 info = MDS.GetInfo(task[0]) if info['success'] : DL_status = info['data']['tasks'][0]['status'] #DL_uri = info['data']['tasks'][0]['additional']['detail']['uri'] if (task[1] != 'Down') and (task[1] != DL_status) : #若狀態有變 #更新狀態 tempDB.UpdateStatus([task[0],DL_status]) else : tempDB.UpdateStatus([task[0],'Down']) #重新取回所有工作狀態 tasks = tempDB.GetNotFinished() if tasks == [] : #全部下載完成 self.Sec['DL Status'] = 'finished' msgtxt = '{}{} 已下載完成'.format (chkName,DD) # self.notify (msgtxt) for task in tempDB.GetTasks().keys(): tempDB.UpdateStatus ([task,'Down']) MDS.Delete(task) #列印日誌 msgtxt = ('\t{}\r\n'.format(msgtxt)) else : #下載中 #列印日誌 msgtxt = ('\t{}{} 還有{}個下載排程\r\n'.format(chkName,DD,len(tasks))) del tempDB crab.lock.release() self.logging (msgtxt) else : #無法連上Download Station self.logging ('\t無法連結 NAS\r\n') else : #上次已完成下載 #列印日誌 self.logging ('\t自 {} 後,{} 沒有更新\r\n'.format(LastUpdate,chkName+DD)) else : #有更新 if FileSets != [] : # 取得HQC連結 FileSet = str(FileSets[0]) soupHref = BeautifulSoup(FileSet,'html.parser') #挑出 <ul class='attachlist'> <li> <a> Files = soupHref.select('ul.attachlist li a') crab.lock.acquire() try : #用於計算新增多少個排程 tempcount = 0 with HQCDB(crab.DBFile,self.secName) as tempDB: if keyVal == '' : tempDB.ClearDB() #處理每一條連結 for File in Files: uri = 'https://{}/{}'.format(self.Sec['Site'],File['href']) title = File.text.strip() if not tempDB.IsExistFile(uri) : #如果連結不存在於資料庫 #設定檔案名稱 if crab.plat_Linux : torrentPath = '/volume1/{}/{}'.format(self.Sec['destination'],'Torrent') if not os.path.exists(torrentPath) : os.makedirs(torrentPath) tfn = '{}/{}'.format(torrentPath,title) fn = '{}/{}.{}'.format(torrentPath,'HQC',title.split('.')[-1]) else : try : torrentPath = '{}\\{}'.format(crab.Config['DEBUG']['Windows Work Path'],'Torrent') except : torrentPath = 'C:\\MyCoding\\D_Check\\Torrent' if not os.path.exists(torrentPath) : os.makedirs(torrentPath) tfn = '{}\\{}'.format(torrentPath,title) fn = '{}\\{}.{}'.format(torrentPath,'HQC',title.split('.')[-1]) #取得torrent連結 try : res1 = requests.get(uri,headers = self.headers,verify = False ,timeout = 15) #torrent 存檔 with open ( tfn,'wb') as ff : ff.write (res1._content) with open ( fn,'wb') as ff : ff.write (res1._content) except Exception as e : print ('torrent get Exception:' ,e) del ff #將torrent 加入排程 if MDS.AddTask (file = fn ,des = self.Sec['destination']) : tempcount += 1 temptask = MDS.List()['data']['tasks'][-1] tempDB.AddFile ([uri,temptask['id'],temptask['status'],title]) msgtxt = '{}{}已新增 {} 個下載排程'.format (chkName,DD,tempcount) del tempDB except Exception as e : print (e) crab.lock.release() self.logging ('\t{}\r\n'.format (msgtxt)) if tempcount > 0 : self.Sec['check word'] = DD self.Sec['Last Update'] = self.chk_date self.Sec['Last Uri'] = uri self.Sec['DL Status'] = 'downloading' self.notify (msgtxt) else : msgtxt = '網頁服務異常' return 0
def check(self): ''' return 0 : 網站服務正常 return 1 : 網站服務異常 return 2 : Config 未設定 Config 參數如下 Site : 網站 Page : 網頁 Name : 名稱 Crab word : 解析字詞 check word : 檢測字詞 Last Update : 上次更新日期 Last Uri : 前次的下載網址 Generate TXT : 產生link文字檔 DL Path : 下載影片檔的路徑 ''' if not crab.Config.has_section(self.secName): self.logging('\t未定義 {} Config\r\n'.format(self.secName)) return 2 res = MYSITE(self.url) ep = res.soup.select('div.vodh span')[0].text if self.Sec['keyword'] not in res.soup.select('div.vodh h2')[0].text: self.logging("{}\t找不到資源\n".format(self.chk_date)) return 0 if ep == self.Sec['check word']: self.logging("\t{},沒有更新\n".format(self.Sec['Name'])) return 0 _uris = [x['value'] for x in res.soup.select(self.Sec['Crab word'])] ''' _uris = list (map(lambda x: x['value'] , res.soup.select(self.Sec['Crab word']))) 將元素(x)的特定值('value')輸出重新組成(map(express,iterator))新的列表(list()) * map()的輸出是疊代器(iterator) ''' _indexNew = (_uris.index(self.Sec['Last Uri']) + 1) if self.Sec['Last Uri'] in _uris else 0 if _indexNew < (len(_uris)): with DPLfile(self.Sec['destination']) as DPL: def appendx(x): crab.lock.acquire() DPL.append(x) _DPL = DPLfile(crab.Config['LOG']['OK Log File'].format( self.chk_date.split('(')[0])) _DPL.append(x) del _DPL crab.lock.release() list( map( lambda x: appendx(x) if not DPL.isexist(x) else DPL.bypass(), _uris[_indexNew:])) ''' 如果檔案中不存在x連結(not DPL.isexist(x))就新增連結(DPL.append(x)),否則略過(DPL.bypass()) DPL.append(x) if not DPL.isexist(x) else DPL.bypass() * lambda 組成的函式,可以立刻輸入參數執行 (lambda : param1,param2.. : express(param1,param2..))(input1,input2..) ''' if self.GenTXT: DPL.toTXT() self.GenTXT = 'no' msg = '{},已增加連結'.format(self.Sec['Name']) msgs = [ '[新連結{}]({})'.format(i + 1, x) for (i, x) in enumerate(_uris[_indexNew:]) ] self.Sec['check word'] = ep self.Sec['Last Update'] = self.chk_date self.Sec['Last Uri'] = _uris[-1] self.logging('\t' + msg + '\n') self.notify(msg) [self.notify(x) for x in msgs] if self.ShowLink else None if 'DL Path' in [x for x in self.Sec.keys()]: #if crab.Config.has_option(self.secName,'DL Path'): if self.Sec['DL Path'] != '': with MyDS() as MDS: MDS.CONNECT(crab.dsm) [ MDS.AddTask(uri=x, des=self.Sec['DL Path']) for x in _uris[_indexNew:] ] else: self.logging("\t{}沒有更新\n".format(self.Sec['Name'])) return 0
def check(self): ''' return 0 : 網站服務正常 return 1 : 網站服務異常 return 2 : Config 未設定 Config 參數如下 Site : 網站 Page : 網頁 Name : 名稱 Crab word : 解析字詞 Crab raw : 解析欄位 (table 使用;否則為 0) Section Left Delimiter : 左邊界字元 Section Right Delimiter : 右邊界字元 check Section : 檢核區塊 check word : 檢測字詞 Last Update : 上次更新日期 Site Status : 上次檢測狀態 Last Uri : 前次的下載網址 DL ID : 下載工作ID DL Status : 下載狀態 ''' aheaders = { 'Cookie': '__cfduid=daf56fbfbdd23f35e5d37891a102af8c11548041550; Hm_lvt_dfa59ae97c988b755b7dfc45bbf122ae=1569204591,1569206478; ftwwwacgsoucom=1; Hm_lpvt_dfa59ae97c988b755b7dfc45bbf122ae=1569206486', 'Host': 'www.36dm.club', 'Upgrade-Insecure-Requests': '1' } aparams = { 'bound': 'content', 'local': '1', 'keyword': '', 'sort_id': '0', 'field': 'title', 'node_id': '0', 'external': 'google' } if not crab.Config.has_section(self.secName): self.logging('\t未定義 {} Config\r\n'.format(self.secName)) return 2 # url = Config.get (secName,'Url') urla = 'https://{}/'.format(self.Sec['Site']) #if len(self.Sec['keyword']) >20 : urla = urla + 'search.php' aparams.clear() sleep(15) #---end if aparams['keyword'] = self.Sec['keyword'] status = self.Sec['Site Status'] try: res = MYSITE(urla, headers=aheaders, params=aparams) # res.get(urla,headers = self.headers ,params = self.params) tempD = res.soup.select( self.Sec['Crab word'])[self.Sec.getint('Crab raw')] chkD = str(tempD.text) uri = self.Sec['Last Uri'] self.Sec['Site Status'] = 'OK' except Exception as e: self.logging('\t{}網站連結失敗\r\n'.format(self.secName)) self.Sec['Site Status'] = 'Error' finally: chkName = self.Sec['Name'] if self.Sec['Site Status'] == 'Error': #若網站服務異常 # if status == 'OK' : #若上次檢查時是好的 msgtxt = '{}網站服務異常'.format(self.secName) self.logging('\t{}\r\n'.format(msgtxt)) # self.notify (msgtxt) del res return 1 else: #網站服務正常 if status == 'Error': #若上次檢查時是異常 msgtxt = '{}網站服務恢復'.format(self.secName) #列印日誌 self.logging('\t{}\r\n'.format(msgtxt)) # self.notify (msgtxt) SLD = self.Sec['Section Left Delimiter'] SRD = self.Sec['Section Right Delimiter'] tempDD = chkD.split(SLD)[self.Sec.getint('check Section')] DD = '{}{}{}'.format(SLD, tempDD.split(SRD)[0], SRD) keyVal = self.Sec['check word'] LastUpdate = self.Sec['Last Update'] with MyDS() as MDS: MDS.CONNECT(crab.dsm) if keyVal == DD: #無更新 if self.Sec['DL Status'] == 'downloading': #上次仍在下載中 if MDS.SID != '': #Download Station 己連上 TK_id = self.Sec['DL ID'] DL_Info = MDS.GetInfo(TK_id) if DL_Info['success'] == True: #找到任務 if DL_Info['data']['tasks'][0][ 'status'] == 'finished': #下載完成 TK_Info = DL_Info['data']['tasks'][0] TK_status = TK_Info['status'] TK_title = TK_Info['title'].strip() TK_uri = TK_Info['additional'][ 'detail']['uri'] crab.lock.acquire() with HQCDB(crab.DBFile, self.secName) as tempDB: if tempDB.IsExistTask(TK_id): tempDB.Update([ TK_id, TK_uri, TK_status, TK_title ]) # tempDB.DelTask(TK_id) del tempDB crab.lock.release() del TK_Info msgtxt = '{}{} 已下載完成'.format( chkName, DD) MDS.Delete(self.Sec['DL ID']) self.Sec['DL ID'] = '' self.Sec['DL Status'] = 'finished' # self.notify (msgtxt) #列印日誌 self.logging('\t{}\r\n'.format(msgtxt)) else: #下載中 #列印日誌 self.logging('\t{}{} 下載中\r\n'.format( chkName, DD)) else: #找不到任務 crab.lock.acquire() with HQCDB(crab.DBFile, self.secName) as tempDB: if tempDB.IsExistTask(TK_id): tempDB.DelTask(TK_id) del tempDB crab.lock.release() self.Sec['DL ID'] = '' self.Sec['DL Status'] = '' self.logging( '\t{}{} NAS 任務已被手動移除\r\n'.format( chkName, DD)) del DL_Info else: #無法連上Download Station self.logging('\t無法連結 NAS\r\n') else: #上次已完成下載 #列印日誌 self.logging('\t自 {} 後,{} 沒有更新\r\n'.format( LastUpdate, chkName + DD)) else: #有更新 #取得ACG資料磁力鏈結 try: title = chkD.strip() + '.torrent' res.get('http://{}/{}'.format( self.Sec['Site'], tempD['href'])) #挑出 <a id='magnet'> uri = res.soup.select('a#magnet')[0]['href'] tUri = res.soup.select('a#download')[0]['href'] torrentURI = 'http://{}/{}'.format( self.Sec['Site'], tUri) if crab.plat_Linux: torrentPath = '/volume1/{}/{}'.format( self.Sec['destination'], 'Torrent') if not os.path.exists(torrentPath): os.makedirs(torrentPath) tfn = '{}/{}'.format(torrentPath, title) fn = '{}/{}.{}'.format(torrentPath, 'HQC', title.split('.')[-1]) else: try: torrentPath = '{}\\{}'.format( crab.Config['DEBUG'] ['Windows Work Path'], 'Torrent') except: torrentPath = 'C:\\MyCoding\\D_Check\\Torrent' if not os.path.exists(torrentPath): os.makedirs(torrentPath) tfn = '{}\\{}'.format(torrentPath, title) fn = '{}\\{}.{}'.format( torrentPath, 'HQC', title.split('.')[-1]) #取得torrent連結 try: res1 = requests.get(torrentURI, headers=aheaders, timeout=15) #torrent 存檔 with open(tfn, 'wb') as ff: ff.write(res1._content) with open(fn, 'wb') as ff: ff.write(res1._content) except Exception as e: print('torrent get Exception:', e) del ff except: self.logging('\t{}無法取得磁力鏈結\r\n'.format( self.secName)) self.Sec['check word'] = DD self.Sec['Last Update'] = self.chk_date msgtxt = '{}{} 發佈'.format(chkName, DD) if (uri != self.Sec['Last Uri']) and ( MDS.SID != '') and (uri != ''): ##Download Station 己連上 且 有新的連結 if MDS.AddTask( uri=uri, des=self.Sec['destination']) == True: #成功加入 Download List msgtxt = '{},已加入下載排程'.format(msgtxt) TK_Info = MDS.List()['data']['tasks'][-1] TK_status = TK_Info['status'] TK_id = TK_Info['id'] TK_title = tempD.text.strip() TK_uri = TK_Info['additional']['detail']['uri'] crab.lock.acquire() with HQCDB(crab.DBFile, self.secName) as tempDB: tempDB.AddFile( [TK_uri, TK_id, TK_status, TK_title]) del tempDB crab.lock.release() del TK_Info self.Sec['Last Uri'] = uri self.Sec['DL ID'] = MDS.List( )['data']['tasks'][-1]['id'] self.Sec['DL Status'] = 'downloading' self.notify(msgtxt + '\n#影片發佈') #列印日誌 self.logging('\t{}\r\n'.format(msgtxt)) del res return 0
def check(self): ''' return 0 : 程式正常完成 return 1 : 網站服務異常 return 2 : Config 未設定 Config 參數如下 Url : 爬取網址 update date : 資料更新日期 version : 資料版本 S_Link : 轉址位址 Site Status : 上次檢測時網站狀態 ''' if not crab.Config.has_section(self.secName): self.logging ('\t未定義 {} Config\r\n'.format (self.secName)) return 2 temp_hd = {\ 'Accept':'text/html,application/xhtml;q=0.9,image/webp,*/*;q=0.8' , \ 'Accept-Encoding':'gzip, deflate' ,\ 'Accept-Language':'zh-TW,zh;q=0.8,en-US;q=0.5,en;q=0.3' ,\ 'Connection':'keep-alive' ,\ 'Cookie':'ASPSESSIONIDSCQSTABC=LIFMIBDAKCPBIMNMLEMMPHHB' ,\ "Host":'www.ucppweb.com' ,\ 'Upgrade-Insecure-Requests':'1' ,\ 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; ) Gecko/20100101 Firefox/65.0' \ } #===============程式起始============================================== status = self.Sec['Site Status'] try : with MYSITE (self.Sec['Url']) as DD : ver = DD.soup.select('table#ProceM1_dalist1_ctl00_dalist2 tr td tr td')[1].text # update_date = DD.soup.select('table#ProceM1_dalist1_ctl00_dalist2 tr td tr td')[3].text tempURL = DD.soup.select('table#ProceM1_dalist1_ctl00_dalist2 tr td tr td a')[0]['href'] with requests.get (tempURL , headers = temp_hd , allow_redirects = False ,timeout = 10 ) as temp_res: S_Link = str (temp_res.headers['Location']) self.Sec['Site Status'] = 'OK' except : self.Sec['Site Status'] = 'Error' finally : if self.Sec['Site Status'] == 'Error' : self.logging ('\t{}網站服務異常\r\n'.format (self.secName)) if status == 'OK' : self.notify ('{} 網站服務異常'.format(self.secName)) return 1 else : if status == 'Error': self.logging ('\t{}網站服務恢復\r\n'.format (self.secName)) self.notify ('{} 網站服務恢復'.format(self.secName)) DocName = ver update_in_DB = self.Sec['update date'] Doc_in_DB = self.Sec['version'] link_in_DB = self.Sec['S_Link'] if S_Link == link_in_DB : self.logging ('\t自 {} 後,大D沒有更新\r\n'.format (update_in_DB)) else : if Doc_in_DB == DocName : tempDoc = '子版大D' else : tempDoc = DocName self.Sec['update date'] = self.chk_date self.Sec['version'] = DocName self.Sec['S_Link'] = S_Link msgtxt = '{} 發佈'.format (tempDoc) self.logging ('\t{}\r\n'.format(msgtxt)) self.notify (msgtxt) return 0