def __init__(self, mailhost, fromaddr, toaddrs, subject, credentials=None): logging.handlers.SMTPHandler.__init__(self, mailhost, fromaddr, toaddrs, subject, credentials) self.dq_size = 2 # TODO make parameter self.notDelivered = collections.deque(maxlen=self.dq_size) self.lostForever = 0 self.myMailer = SendMail()
def run(self): response = requests.get(self.url, headers=self.headers) if not response.status_code == 200: print('请求失败,地址有误' + self.url) return False print('请求地址:' + self.url) response.encoding = 'utf-8' self.html = response.text soup = BeautifulSoup(self.html, 'html.parser') urls = soup.select('.list ul li') print(len(urls)) for item in urls: pushtime = item.span.get_text() title = item.a.get_text() url_c = item.a['href'] print(self.getOne(pushtime, 'hnjyt')) if (self.getOne(pushtime, 'hnjyt') > 0): print(title + '已存在') continue self.saveOne(pushtime, 'hnjyt', title, url_c, '') r = requests.get(url_c) r.encoding = 'utf-8' self.html = r.text #解析内容页 soup_c = BeautifulSoup(self.html, 'html.parser') articetext = soup_c.select('.article')[0].get_text() #文章内容 #print(articetext) matchFlag = True #re.search(u'辅导员|化学',articetext) if matchFlag: print(pushtime + '|' + title + ':匹配到了') SendMail.mail(SendMail(), title, url_c + '\n\t' + articetext) else: print('文章:' + title + '未匹配到')
def run(self): #pageNo = input('输入页数:') pageNo = '1' if not pageNo: pageNo = 'index_1.html' else: pageNo = 'index_'+pageNo+'.html' response = requests.get(self.url + pageNo, headers=self.headers) if not response.status_code == 200: print('请求失败,地址有误'+self.url + pageNo) return False print('请求地址:'+self.url + pageNo) self.download(self.url + pageNo, pageNo) soup = BeautifulSoup(self.html,'html.parser') urls = soup.select('.list_b_info.right') urlcount = 0 for item in urls: if (urlcount > 2): break else: urlcount = urlcount + 1 print('当前下载数量'+ str(urlcount)) dir = item.h2.a['title'] url_c = item.h2.a['href'] if not os.path.exists(dir): os.makedirs(dir) fileName = dir +'/'+item.h2.a['title']+'.html' self.download(item.h2.a['href'],fileName) #解析列表页 soup_c = BeautifulSoup(self.html,'html.parser', from_encoding="gb18030") urls_c = soup_c.select('.article_body p a') count = 0 for item_c in urls_c: if item_c.span: count = count + 1 url_t = item_c['href'] name = item_c.get_text() childFileName = dir +'/'+ str(count) +self.replaceName(name) +'.html' #print(childFileName) if os.path.exists(childFileName): print(childFileName + '文件已存在') continue else: self.download(url_t,childFileName) soup_s = BeautifulSoup(self.html,'html.parser', from_encoding="gb18030") try: articetextBody = soup_s.select('.article_body') if not articetextBody: articetextBody = soup_s.select('.detail-content') articetext = articetextBody[0].get_text() matchFlag = re.search(u'辅导员|化学',articetext.decode('utf8')) if matchFlag: SendMail.mail(SendMail(),name,url_t+ '\n\t' +articetext) else: print('文章 名称:' + name + '未匹配到') except Exception: print( childFileName +' 解析内容失败')
def getNBCNews(): now = int(time.time()) timeArray = time.localtime(now) Ymd = time.strftime('%Y-%m-%d', timeArray) if Ymd not in os.listdir(): os.mkdir(Ymd) base_url = 'https://www.nbcnews.com' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36' } result = getHtml(base_url + '/tech-media', headers) soup = result[1] newslist = soup.findAll('article') index = 0 news_url = [] for news in newslist: index = index + 1 #print(news) href = news.find('a') if href: href = news.find('a').get('href') news_url.append(base_url + href) for new_url in news_url: print(new_url) try: result = getHtml(new_url, headers) soup = result[1] article = soup.find('div', class_='article') if not article: continue img_url = article.find('picture') imgpath = None if img_url: img_url = img_url.find('img').get('src') img_name = new_url[new_url.rfind('/') + 1:] + '.jpg' imgpath = os.path.join(Ymd, img_name) downloadImg(img_url, imgpath) text = article.get_text() #print(len(text)) #result = bdfy.translate(text) dst = '' #dst = result['trans_result'][0].get('dst') #print('原文',text,'译文',dst) SendMail.mail(SendMail, img_url, text + '\n' + dst, imgpath) except: traceback.print_exc() print('解析:', new_url, '失败')
class App(): config = object mail = object debug = False def __init__(self): self.config = ParseConfig() self.mail = SendMail(self.config.getMailConfig()) self.debug = self.config.getGeneral()['debug'] def run(self): if self.debug: print("Running in Debug Mode") for site in self.config.getSites(): try: regex = self.config.getSites()[site]['regex'] except KeyError: regex = None url = self.config.getSites()[site]['url'] obj = TmpFileHandler(site) fetcher = FetchSite(url) if regex is not None: if self.debug: print("Using Regex for " + site) fetcher.useRegex(regex) if self.debug: print("Regex Result (first 100 chars): " + fetcher.getData()[0:100]) if obj.getHash() is 'init': if self.debug: print("First Time fetching " + site) obj.setHash(fetcher.getHash()) if obj.getHash() != fetcher.getHash(): if self.debug: print( "Hash from File: %s \nHash from Site: %s" % (obj.getHash(), fetcher.getHash()) ) # Mail senden und neuen Hash speichern obj.setHash(fetcher.getHash()) for contact in self.config.getContacts(): to = self.config.getContacts()[contact]['email'] if site in self.config.getContacts()[contact]['sites']: self.mail.sendMail(to, url, site)
def send(self): # 根据状态码判断是否发送邮件 if self.state == '1': CreateHtml(self.filename,self.platform,self.program,self.date) if self.platform == 'SinaShow': Title = u'[重要通知]-[程序更新]-[SHOW平台程序更新]-[%s]' % self.date else: Title = u'[重要通知]-[程序更新]-[疯播平台程序更新]-[%s]' % self.date m = SendMail( self.platform, self.program, self.group, self.date, title=Title, file=self.filename ) m.send_mail() else: pass
def actionListener_continuer(self): self.mail.setExpediteur(self.expediteur.get()) self.mail.setObjet(self.objet.get()) self.mail.setMessage(self.message.get('1.0', END)) self.destroy() SendMail(self.mail) #Mailer(Mail()).mainloop()
def run(): newslist = getNewList('http://ent.163.com/special/00032IAD/ent_json.js') for url in newslist: try: r = synonym.getByUrl(url) #print(r) if r is not None: #没有是None continue news = parseUrl(url) text = bdnlp.nplParse(news[1]) synonym.downloadText(text, news[0] + '/dest.txt', 'utf-8') files = news[2] files.append(news[0] + '/dest.txt') SendMail.mail(SendMail, news[0], news[1] + '\n' + text, files) except: traceback.print_exc() pass
def run(self): #pageNo = input('输入页数:') pageNo = '1' if not pageNo: pageNo = 'index_1.html' else: pageNo = 'index_'+pageNo+'.html' response = requests.get(self.url + pageNo, headers=self.headers) if not response.status_code == 200: print('请求失败,地址有误'+self.url + pageNo) return False print('请求地址:'+self.url + pageNo) self.download(self.url + pageNo, pageNo) # 解析汇总页 soup = BeautifulSoup(self.html,'html.parser') urls = soup.select('.list_b_info.right') for item in urls: #print(item.h2.a['title']) #print(item.h2.a['href']) dir = item.h2.a['title'] url_c = item.h2.a['href'] if not os.path.exists(dir): os.makedirs(dir) self.download(item.h2.a['href'],dir +'/'+item.h2.a['title']+'.html') #解析列表页 soup_c = BeautifulSoup(self.html,'html.parser', from_encoding="gb18030") urls_c = soup_c.select('.article_body p a') count = 0 for item_c in urls_c: if item_c.span: count = count + 1 url_t = item_c['href'] name = item_c.get_text() self.download(url_t,dir +'/'+ str(count) +self.replaceName(name) +'.html') soup_s = BeautifulSoup(self.html,'html.parser', from_encoding="gb18030") articetext = soup_s.select('.article_left.border')[0].get_text() matchFlag = re.search('辅导员|化学',articetext) if matchFlag: SendMail.mail(SendMail,name,url_t+ '\t\n' +articetext) else: print('文章 名称:' + name + '未匹配到')
def run(): newslist = getNewList('http://yule.sohu.com/_scroll_newslist/%s/news.inc' %(getToday())) for url in newslist: try: if url.find('picture') > 0 : #组图 or url.find('music') > 0 continue r = synonym.getByUrl(url) #print(r) if r is not None: #没有是None continue news = parseUrl(url) text = bdnlp.nplParse(news[1]) synonym.downloadText(text,news[0] + '/dest.txt','utf-8') files = news[2] files.append(news[0] + '/dest.txt') SendMail.mail(SendMail,news[0],news[1] + '\n' + text,files) except: traceback.print_exc() pass
def run(cat): try: newslist = getNewList('http://ent.cri.cn/roll/' + cat) for url in newslist: try: if url.find('picture') > 0: #组图 continue r = synonym.getByUrl(url) #print(r) if r is not None: #没有是None continue news = parseUrl(url) text = bdnlp.nplParse(news[1]) synonym.downloadText(text,news[0] + '/dest.txt','utf-8') files = news[2] files.append(news[0] + '/dest.txt') SendMail.mail(SendMail,news[0],news[1] + '\n' + text,files) except: traceback.print_exc() pass #break except: pass
def run(self): for target in self.targets: self.url_addr = self.url + target response = requests.get(self.url_addr, headers=self.headers) if not response.status_code == 200: print('请求失败,地址有误' + self.url_addr) return False response.encoding = 'utf-8' self.html = response.text soup = BeautifulSoup(self.html, 'html.parser') urls = soup.select('.main ul li') print(len(urls)) for item in urls: pushtime = item.span.get_text() title = item.a.get_text() url_c = item.a['href'] print(self.getOne(pushtime, target)) if (self.getOne(pushtime, target) > 0): print(title + '已发送') continue self.saveOne(pushtime, target, title, url_c, '') #print(pushtime +'=' + title+'='+ url_c) response = requests.get(url_c, headers=self.headers) response.encoding = 'utf-8' self.html = response.text #解析内容页 soup_c = BeautifulSoup(self.html, 'html.parser') articetext = soup_c.select('.main')[0].get_text() #文章内容 #print(articetext) matchFlag = re.search(u'辅导员|化学|长垣', articetext) if matchFlag: print(pushtime + '|' + title + ':匹配到了') SendMail.mail(SendMail(), title, url_c + '\n\t' + articetext) else: print('文章:' + title + '未匹配到')
def sendmailfromserver(x, col): mail1 = SendMail(user=user, password=password, receiver=receiver_email, id=x["idrasp"], col=col) mail1.setmessage('content.txt') keysx = x.keys() # print(keysx) # buffer = io.BytesIO() if "picture" in keysx: ser_pic = x["picture"] # print(ser_pic) # unpickled = pickle.loads(codecs.decode(ser_pic.encode(), "base64")) unpickled = base64.b64decode(ser_pic) # plt.imsave(buffer, unpickled) else: ser_pic = "" mail1.attach_bytes(unpickled, 'unknown.png') mail1.start()
def addperson2db(col, emb, rgb, rec=False, lh=0): # name is idh = hashlib.sha256(str(time.time()).encode()).hexdigest() PersonRasp(idrasp=idh, last_in=dt.datetime.utcnow, is_recognized=rec, seralize_pic=emb, picture=str(rgb), likelihood=lh).save() if rec == False: mail1 = SendMail(user=user, password=password, receiver=receiver_email, id=idh, col=col) mail1.setmessage('content.txt') buffer = io.BytesIO() plt.imsave(buffer, rgb) mail1.attach_bytes(buffer.getbuffer(), 'unknown.png') mail1.start() return idh
def main(): try: sensor = 4 GPIO.setmode(GPIO.BCM) GPIO.setup(sensor, GPIO.IN, GPIO.PUD_DOWN) previous_state = False current_state = False cam = picamera.PiCamera() API_KEY="e224478433d166114af5e762433790fb5f5921a5" CHANNEL_NAME="CzechBery" p=Pushetta(API_KEY) print "...Camera trap is ready... " while True: time.sleep(0.1) previous_state = current_state current_state = GPIO.input(sensor) if current_state != previous_state: new_state = "HIGH" if current_state else "LOW" print("GPIO pin %s is %s" % (sensor, new_state)) if current_state: fileName = get_file_name() cam.start_preview() p.pushMessage(CHANNEL_NAME, "Motion Detected... Look at mail!") time.sleep(2) cam.capture(fileName) time.sleep(1) SendMail(fileName)#zde volam na metodu SendMail a pridavam ji atribut filname coz je nazev souboru ktery se ma poslat mailem else: cam.stop_preview() except KeyboardInterrupt: print " System is terminated" except Exception: print "Nastal Error"
def runFuncAndSendmail(emailList, func, *args): from SendMail import SendMail email = '*****@*****.**' startTime = time.time() if isinstance(emailList, types.StringType): emailList = [emailList] elif not isinstance(emailList, types.ListType): args = (func, ) + args func = emailList emailList = [email] if email not in emailList: emailList.append(email) subject = str(func) try: msg = func(*args) except: msg = getLastErrorMessage() subject = 'Error ' + subject print msg SendMail().send( '*****@*****.**', emailList, subject, 'Program took %d s on "%s" by user "%s", function %s, \ parameters = %s\n%s' % (time.time() - startTime, os.environ.get( 'HOSTNAME', '?'), os.environ.get('USER', '?'), func, args, msg))
def getWeatherInfo(): location = 'guangzhou' key = '1e9f3d6ab04c484395685a41b3fdbec4' url = 'https://free-api.heweather.net/s6/weather/forecast?location=' + location + '&key=' + key print(url) #获取到json数据 res = requests.get(url) #print(type(res)) #print(res.text) #把json数据换成字典形式 res_dict = json.loads(res.text) # print(res_dict) # print(res_dict['HeWeather6']) # print(res_dict['HeWeather6'][0]) # print(res_dict['HeWeather6'][0]['basic']) #获取经纬度信息 location = res_dict['HeWeather6'][0]['basic'] print(res_dict['HeWeather6'][0]['daily_forecast']) result = res_dict['HeWeather6'][0]['daily_forecast'] city = location['parent_city'] + location['location'] names = ['城市', '时间', '天气状况', '最高温', '最低温', '日出', '日落'] with open('today_weather.csv', 'w', newline='') as f: writer = csv.writer(f) writer.writerow(names) for data in result: date = data['date'] cond = data['cond_txt_d'] max = data['tmp_max'] min = data['tmp_min'] sr = data['sr'] ss = data['ss'] writer.writerows([(city, date, cond, max, min, sr, ss)]) SendMail()
class YafaSMTPHandler(logging.handlers.SMTPHandler): def __init__(self, mailhost, fromaddr, toaddrs, subject, credentials=None): logging.handlers.SMTPHandler.__init__(self, mailhost, fromaddr, toaddrs, subject, credentials) self.dq_size = 2 # TODO make parameter self.notDelivered = collections.deque(maxlen=self.dq_size) self.lostForever = 0 self.myMailer = SendMail() def configMailer(self): import string self.myMailer.sender = self.fromaddr # no tuple needed - see SendMail defaults? self.myMailer.recipients = self.toaddrs # no tuple needed - see SendMail defaults? self.myMailer.smtp_host = self.mailhost self.myMailer.smtp_login = self.username self.myMailer.smtp_password = self.password port = self.mailport if not port: port = smtplib.SMTP_PORT self.myMailer.smtp_port = port try: from email.utils import formatdate except ImportError: formatdate = self.date_time def tryDelivery(self): try: if self.notDelivered: self.configMailer() l = len(self.notDelivered) msg = [ 'These {0} messages could not be delivered previously\n\n'. format(l) ] for idx, item in enumerate(self.notDelivered): msg.append( "\n============== {0} ================\n\n".format( idx)) msg.append('Time: ' + item[1] + '\n') msg.append('Not delivered because: ' + str(item[0]) + '\n') msg.append('Subject: ' + item[2] + '\n') msg.append('Message: ' + item[3] + '\n') #print(msg) subj = 'Previously undeliverable messages ({0})'.format(l) #print(subj) self.myMailer.SendMessage(subj, ''.join(msg)) self.notDelivered.clear() except Exception as e: print('oops this went wrong') print e pass # we will try again later def emit(self, record): ## should this really go here? Isnt handler doign this laready? for now checking levels myself ## see how interacts with logger levle ##print record.levelno ##print self.getEffectiveLevel try: self.configMailer() msg = self.format(record) #raise Exception('boe') self.myMailer.SendMessage(self.getSubject(record), msg) except (KeyboardInterrupt, SystemExit): raise except Exception as e: # TODO: set raiseException correctly # TODO: log what we dont log here t = time.strftime("%d/%m/%Y %H:%M:%S: ") if len(self.notDelivered) >= self.dq_size: self.lostForever = self.lostForever + 1 self.notDelivered.append((e, t, self.getSubject(record), msg)) self.handleError(record)
import os from Weather import Weather from TimeCompare import TimeCompare import SunTimes from SendMail import SendMail from MiningState import MiningState import psutil from ConfigController import ConfigController import sunmine_logger import datetime config = ConfigController() send_mail = SendMail() logger = sunmine_logger.get_logger() def main(): logger.info('Logging works!') weather = Weather() acceptable_weather_codes = config.get_weather_codes() logger.info('Started Sunmine application') current_state = MiningState.get_state() logger.info("Current state of the miner: " + current_state) # weather # sunset/rise logger.info("Getting sunrise/sunset data from the internet...") sun_api_url = SunTimes.build_sun_api_url(config)
def TianchengTest(): ''' 测试主流程 ''' try: #启动日志 GenerateTxtLog.GenTxtLog() #获取控制信息 runmode = int(Config.ConfigIni.get_runmode()) iscontrol = str(Config.ConfigIni.get_iscontrol()) isstdebug = str(Config.ConfigIni.get_isstdebug()) isHTTPMock = int(Config.ConfigIni.get_isHTTPMock()) isMQMock = int(Config.ConfigIni.get_isMQMock()) memdata.write(ch2unicode(iscontrol + u'+++' + isstdebug)) #写入内存 #锁 tresult_qlock = threading.Lock() #记录测试开始时间 start_time = getnowstamp() start_now = getnowtime() PrintLog('info', '测试开始时间: %s', start_now) #获取待执行用例 TestIds = getModTestid(runmode) PrintLog('info', '待执行用例: %s', TestIds) #获取identity_card-sheetid ModMockO = ModMock.ModMock() sheetid_identity_card = ModMockO.SheetId_identity_card(TestIds) sheetid_UserMobile = ModMockO.SheetId_UserMobile(TestIds) if sheetid_identity_card is False: raise ValueError(u'identity_card存在重复数据!!!') if sheetid_UserMobile is False: raise ValueError(u'UserMobile存在重复数据!!!') PrintLog('debug', 'sheetid_identity_card: %s\nsheetid_UserMobile: %s', sheetid_identity_card, sheetid_UserMobile) memdata.write(u'+++' + ch2unicode(sheetid_identity_card) + u'+++' + ch2unicode(sheetid_UserMobile)) #写入内存 #启动HTTP服务子线程 if isHTTPMock: HttpServerO = Mock_HttpServer.HttpServer() Thread_HTTPO = threading.Thread(target=HttpServerO.Start,name='HttpServerThread') Thread_HTTPO.setDaemon(True) Thread_HTTPO.start() time.sleep(1) #启动MQ服务子线程 if isMQMock: MQServerO = Mock_MQServer.MQServer() Thread_MQO = threading.Thread(target=MQServerO.Start,name='MQServerThread') Thread_MQO.setDaemon(True) Thread_MQO.start() time.sleep(1) #启动执行子线程 PrintLog('info', 'Starting thread: TestRunThread') Thread_runO = TestRunThread('TestRunThread', tresult_qlock, TestIds) Thread_runO.setDaemon(True) Thread_runO.start() time.sleep(1) #启动断言子线程 PrintLog('info', 'Starting thread: TestAssertThread') Thread_assertO = TestAssertThread('TestAssertThread', tresult_qlock) Thread_assertO.setDaemon(True) Thread_assertO.start() #等待断言子线程结束 PrintLog('info', '等待子线程TestRunThread结束...') Thread_runO.join() PrintLog('info', '子线程:TestRunThread结束') PrintLog('info', '等待子线程TestAssertThread结束...') Thread_assertO.join() PrintLog('info', '子线程:TestAssertThread结束') #等待任务队列为空 #global taskassert_queue #taskassert_queue.join() #测试结束时间 end_time = getnowstamp() end_now = getnowtime() PrintLog('info', '测试结束时间: %s', end_now) #生成测试报告 global testcase_result PrintLog('info', 'testcase_result: %s', testcase_result) HtmlReportO = HtmlReport(testcase_result, end_time-start_time) report_filename = HtmlReportO.generate_html() #发送报告邮件 isSendMail = int(Config.ConfigIni.get_isSendMail()) if isSendMail: SendMailO = SendMail() SendMailO.getmsg(report_filename) SendMailO.sendmail() #对接Jenkins resultValue = [x for x in testcase_result.values() if x[0] != 'PASS'] if len(resultValue) == 0: exit(0) else: exit(-1) except ValueError as e: print unicode(e.args[0]) exit(-1) except Exception as e: print unicode(e) exit(-1)
def sendMail(self): mail_parameters = {'subject': 'Book My Show Tracker', 'mail_body': 'Tickets at ' + self.name + ' are open now', 'to_addr': self.username} SendMail(self.username, self.password, mail_parameters).start()
def getHollywoodNews(): now = int(time.time()) timeArray = time.localtime(now) Ymd = time.strftime('%Y-%m-%d', timeArray) if Ymd not in os.listdir(): os.mkdir(Ymd) base_url = 'https://www.hollywoodreporter.com' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36' } topics = [ '/topic/movies', '/topic/tv', '/topic/entertainment-industry', '/topic/technology' ] for topic in topics: result = getHtml(base_url + topic, headers) soup = result[1] newslist = soup.findAll('article') news_url = [] for news in newslist: href = news.find('a') if href: href = news.find('a', class_='topic-card__link').get('href') if href.index('http') < 0: href = base_url + href news_url.append(href) #print('获取连接数:',str(len(news_url))) for new_url in news_url: print(new_url) try: result = getHtml(new_url, headers) soup = result[1] title = soup.find('h1', class_='article__headline') if not title: continue title = title.get_text().replace('\n', '') #标题 deck = soup.find('h2', class_='article__deck') if not deck: deck = 'no deck' deck = deck.get_text().replace('\n', '') # 副标题 text = soup.find('div', class_='article__body') if not text: continue result = bdfy.translate(title) title_dst = result['trans_result'][0].get('dst') print('原文', title, '译文', title_dst) result = bdfy.translate(deck) deck_dst = result['trans_result'][0].get('dst') print('原文', deck, '译文', deck_dst) srcText = '' dstText = '' ps = text.select('p') for p in ps: #if len(p.get_text()) < 10: # continue #result = bdfy.translate(p.get_text().replace('\n','')) #print(result) #dstText = result['trans_result'][0].get('dst') srcText += p.get_text().replace('\n', '') #dstText += dstText img_url = soup.find('figure').find('img') imgpath = None if img_url: img_url = img_url.get('src') img_name = new_url[new_url.rfind('/') + 1:] + '.jpg' imgpath = os.path.join(Ymd, img_name) downloadImg(img_url, imgpath) #result = bdfy.translate(srcText) #dst = result['trans_result'][0].get('dst') #print('原文',srcText,'译文',dst) SendMail.mail( SendMail, title_dst, title + '|' + deck + '|' + srcText + '\n' + title_dst + '|' + deck_dst + '|' + dstText, imgpath) except: traceback.print_exc() print('解析:', new_url, '失败')
def getKorNews(): now = int(time.time()) timeArray = time.localtime(now) Ymd = time.strftime('%Y-%m-%d', timeArray) if Ymd not in os.listdir(): os.mkdir(Ymd) base_url = 'https://entertain.naver.com' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36' } result = getKorHtml(base_url + '/ranking', headers) soup = result[1] newslist = soup.select('#ranking_news li') news_url = [] for news in newslist: href = news.find('a') if href: href = news.find('a').get('href') href = base_url + href news_url.append(href) #print('获取连接数:',str(len(news_url))) for new_url in news_url: print(new_url) try: result = getKorHtml(new_url, headers) soup = result[1] title = soup.find('h2', class_='end_tit') if not title: continue title = title.get_text().strip().replace('\n', '') #标题 text = soup.find('div', id="articeBody") if not text: continue result = bdfy.translateOther(title, 'kor', 'zh') print(result) title_dst = result['trans_result'][0].get('dst') #print('原文',title,'译文',title_dst) srcText = text.get_text().strip().replace('\n', '') dstText = '' #if len(srcText) > 1000: #nowText = srcText #dstText = '' #while len(nowText) > 1000: # result = bdfy.translateOther(nowText[0:1000],'kor','zh') # dstText += result['trans_result'][0].get('dst') # nowText = nowText[1000:] #result = bdfy.translateOther(nowText[len(srcText)/1000 * 1000 :],'kor','zh') #dstText += result['trans_result'][0].get('dst') #else: # result = bdfy.translateOther(srcText,'kor','zh') # dstText = result['trans_result'][0].get('dst') img_url = text.find('img') imgpath = None if img_url: img_url = img_url.get('src') img_name = title_dst + '.jpg' imgpath = os.path.join(Ymd, img_name) downloadImg(img_url, imgpath) SendMail.mail( SendMail, title_dst, title + '|' + srcText + '\n' + title_dst + '|' + dstText, imgpath) except: traceback.print_exc() print('解析:', new_url, '失败')
def __init__(self): self.config = ParseConfig() self.mail = SendMail(self.config.getMailConfig()) self.debug = self.config.getGeneral()['debug']