Ejemplos de SendMail.mail en Python, ejemplos de SendMail.SendMail.mail en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: henanjyt.py Proyecto: liangguang/20180910

    def run(self):

        response = requests.get(self.url, headers=self.headers)
        if not response.status_code == 200:
            print('请求失败,地址有误' + self.url)
            return False
        print('请求地址:' + self.url)
        response.encoding = 'utf-8'
        self.html = response.text
        soup = BeautifulSoup(self.html, 'html.parser')
        urls = soup.select('.list ul li')
        print(len(urls))
        for item in urls:
            pushtime = item.span.get_text()
            title = item.a.get_text()
            url_c = item.a['href']
            print(self.getOne(pushtime, 'hnjyt'))
            if (self.getOne(pushtime, 'hnjyt') > 0):
                print(title + '已存在')
                continue
            self.saveOne(pushtime, 'hnjyt', title, url_c, '')
            r = requests.get(url_c)
            r.encoding = 'utf-8'
            self.html = r.text
            #解析内容页
            soup_c = BeautifulSoup(self.html, 'html.parser')
            articetext = soup_c.select('.article')[0].get_text()  #文章内容
            #print(articetext)
            matchFlag = True  #re.search(u'辅导员|化学',articetext)
            if matchFlag:
                print(pushtime + '|' + title + ':匹配到了')
                SendMail.mail(SendMail(), title, url_c + '\n\t' + articetext)
            else:
                print('文章:' + title + '未匹配到')

Ejemplo n.º 2

0

Mostrar archivo

    def run(self):
        #pageNo = input('输入页数:')
        pageNo = '1'
        if not pageNo:
           pageNo = 'index_1.html'
        else:
           pageNo = 'index_'+pageNo+'.html'

        response = requests.get(self.url + pageNo, headers=self.headers)
        if not response.status_code == 200:
            print('请求失败,地址有误'+self.url + pageNo)
            return False
        print('请求地址:'+self.url + pageNo)
        self.download(self.url + pageNo, pageNo)
        soup = BeautifulSoup(self.html,'html.parser')
        urls = soup.select('.list_b_info.right')
        urlcount = 0
        for item in urls:
            if (urlcount > 2):
               break
            else:
              urlcount = urlcount + 1
              print('当前下载数量'+ str(urlcount))
            dir = item.h2.a['title']
            url_c = item.h2.a['href']
            if not os.path.exists(dir):
               os.makedirs(dir)
            fileName = dir +'/'+item.h2.a['title']+'.html'
            self.download(item.h2.a['href'],fileName)
            #解析列表页
            soup_c = BeautifulSoup(self.html,'html.parser', from_encoding="gb18030")
            urls_c = soup_c.select('.article_body p a')
            count = 0
            for item_c in urls_c:
                if item_c.span:
                   count = count + 1
                   url_t = item_c['href']
                   name = item_c.get_text()
                   childFileName = dir +'/'+ str(count) +self.replaceName(name) +'.html'
                   #print(childFileName) 
                   if os.path.exists(childFileName):
                      print(childFileName + '文件已存在')
                      continue
                   else:
                      self.download(url_t,childFileName)
                      soup_s = BeautifulSoup(self.html,'html.parser', from_encoding="gb18030")
                      try:
                          articetextBody = soup_s.select('.article_body')
                          if not articetextBody:
                            articetextBody = soup_s.select('.detail-content')
                          articetext = articetextBody[0].get_text()
                          matchFlag = re.search(u'辅导员|化学',articetext.decode('utf8'))
                          if matchFlag:
                            SendMail.mail(SendMail(),name,url_t+ '\n\t' +articetext)
                          else:
                            print('文章 名称:' + name + '未匹配到')
                      except Exception:
                         print( childFileName +' 解析内容失败')

Ejemplo n.º 3

0

Mostrar archivo

def getNBCNews():
    now = int(time.time())
    timeArray = time.localtime(now)
    Ymd = time.strftime('%Y-%m-%d', timeArray)
    if Ymd not in os.listdir():
        os.mkdir(Ymd)

    base_url = 'https://www.nbcnews.com'
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
    }
    result = getHtml(base_url + '/tech-media', headers)
    soup = result[1]
    newslist = soup.findAll('article')
    index = 0
    news_url = []
    for news in newslist:
        index = index + 1
        #print(news)
        href = news.find('a')
        if href:
            href = news.find('a').get('href')
            news_url.append(base_url + href)
    for new_url in news_url:
        print(new_url)
        try:
            result = getHtml(new_url, headers)
            soup = result[1]
            article = soup.find('div', class_='article')
            if not article:
                continue

            img_url = article.find('picture')
            imgpath = None
            if img_url:
                img_url = img_url.find('img').get('src')
                img_name = new_url[new_url.rfind('/') + 1:] + '.jpg'
                imgpath = os.path.join(Ymd, img_name)
                downloadImg(img_url, imgpath)

            text = article.get_text()
            #print(len(text))
            #result = bdfy.translate(text)
            dst = ''
            #dst = result['trans_result'][0].get('dst')
            #print('原文',text,'译文',dst)
            SendMail.mail(SendMail, img_url, text + '\n' + dst, imgpath)

        except:
            traceback.print_exc()
            print('解析：', new_url, '失败')

Ejemplo n.º 4

0

Mostrar archivo

def run():

    newslist = getNewList('http://ent.163.com/special/00032IAD/ent_json.js')
    for url in newslist:
        try:
            r = synonym.getByUrl(url)
            #print(r)
            if r is not None:  #没有是None
                continue
            news = parseUrl(url)
            text = bdnlp.nplParse(news[1])
            synonym.downloadText(text, news[0] + '/dest.txt', 'utf-8')
            files = news[2]
            files.append(news[0] + '/dest.txt')
            SendMail.mail(SendMail, news[0], news[1] + '\n' + text, files)
        except:
            traceback.print_exc()
            pass

Ejemplo n.º 5

0

Mostrar archivo

Archivo: sohuEntNews.py Proyecto: liangguang/20180910

def run():
    newslist = getNewList('http://yule.sohu.com/_scroll_newslist/%s/news.inc' %(getToday()))
    for url in newslist:
        try:
            if url.find('picture') > 0 : #组图 or url.find('music') > 0
                continue
            r = synonym.getByUrl(url)
             #print(r)
            if r is not None: #没有是None
                continue
            news = parseUrl(url)
            text = bdnlp.nplParse(news[1])
            synonym.downloadText(text,news[0] + '/dest.txt','utf-8')
            files = news[2]
            files.append(news[0] + '/dest.txt')
            SendMail.mail(SendMail,news[0],news[1] + '\n' + text,files)
        except:
            traceback.print_exc()
            pass

Ejemplo n.º 6

0

Mostrar archivo

    def run(self):
        #pageNo = input('输入页数:')
        pageNo = '1'
        if not pageNo:
           pageNo = 'index_1.html'
        else:
           pageNo = 'index_'+pageNo+'.html'

        response = requests.get(self.url + pageNo, headers=self.headers)
        if not response.status_code == 200:
            print('请求失败,地址有误'+self.url + pageNo)
            return False
        print('请求地址:'+self.url + pageNo)
        self.download(self.url + pageNo, pageNo)
        
        # 解析汇总页
        soup = BeautifulSoup(self.html,'html.parser')
        urls = soup.select('.list_b_info.right')
        for item in urls:
            #print(item.h2.a['title'])
            #print(item.h2.a['href'])
            dir = item.h2.a['title']
            url_c = item.h2.a['href']
            if not os.path.exists(dir):
               os.makedirs(dir)
            self.download(item.h2.a['href'],dir +'/'+item.h2.a['title']+'.html')
            #解析列表页
            soup_c = BeautifulSoup(self.html,'html.parser', from_encoding="gb18030")
            urls_c = soup_c.select('.article_body p a')
            count = 0
            for item_c in urls_c:
                if item_c.span:
                   count = count + 1
                   url_t = item_c['href']
                   name = item_c.get_text()
                   self.download(url_t,dir +'/'+ str(count) +self.replaceName(name) +'.html')
                   soup_s = BeautifulSoup(self.html,'html.parser', from_encoding="gb18030")
                   articetext = soup_s.select('.article_left.border')[0].get_text()
                   matchFlag = re.search('辅导员|化学',articetext)
                   if matchFlag:
                      SendMail.mail(SendMail,name,url_t+ '\t\n' +articetext)
                   else:
                      print('文章 名称:' + name + '未匹配到')

Ejemplo n.º 7

0

Mostrar archivo

Archivo: criEntNews.py Proyecto: liangguang/20180910

def run(cat):
    try:
        newslist = getNewList('http://ent.cri.cn/roll/' + cat)
        for url in newslist:
            try:
                if url.find('picture') > 0: #组图
                    continue
                r = synonym.getByUrl(url)
                #print(r)
                if r is not None: #没有是None
                    continue
                news = parseUrl(url)
                text = bdnlp.nplParse(news[1])
                synonym.downloadText(text,news[0] + '/dest.txt','utf-8')
                files = news[2]
                files.append(news[0] + '/dest.txt')
                SendMail.mail(SendMail,news[0],news[1] + '\n' + text,files)
            except:
                traceback.print_exc()
                pass
            #break
    except:
        pass

Ejemplo n.º 8

0

Mostrar archivo

Archivo: hnzj.py Proyecto: liangguang/20180910

 def run(self):
     for target in self.targets:
         self.url_addr = self.url + target
         response = requests.get(self.url_addr, headers=self.headers)
         if not response.status_code == 200:
             print('请求失败,地址有误' + self.url_addr)
             return False
         response.encoding = 'utf-8'
         self.html = response.text
         soup = BeautifulSoup(self.html, 'html.parser')
         urls = soup.select('.main ul li')
         print(len(urls))
         for item in urls:
             pushtime = item.span.get_text()
             title = item.a.get_text()
             url_c = item.a['href']
             print(self.getOne(pushtime, target))
             if (self.getOne(pushtime, target) > 0):
                 print(title + '已发送')
                 continue
             self.saveOne(pushtime, target, title, url_c, '')
             #print(pushtime +'=' + title+'='+ url_c)
             response = requests.get(url_c, headers=self.headers)
             response.encoding = 'utf-8'
             self.html = response.text
             #解析内容页
             soup_c = BeautifulSoup(self.html, 'html.parser')
             articetext = soup_c.select('.main')[0].get_text()  #文章内容
             #print(articetext)
             matchFlag = re.search(u'辅导员|化学|长垣', articetext)
             if matchFlag:
                 print(pushtime + '|' + title + ':匹配到了')
                 SendMail.mail(SendMail(), title,
                               url_c + '\n\t' + articetext)
             else:
                 print('文章:' + title + '未匹配到')

Ejemplo n.º 9

0

Mostrar archivo

def getKorNews():
    now = int(time.time())
    timeArray = time.localtime(now)
    Ymd = time.strftime('%Y-%m-%d', timeArray)
    if Ymd not in os.listdir():
        os.mkdir(Ymd)
    base_url = 'https://entertain.naver.com'
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
    }

    result = getKorHtml(base_url + '/ranking', headers)
    soup = result[1]
    newslist = soup.select('#ranking_news li')
    news_url = []
    for news in newslist:
        href = news.find('a')
        if href:
            href = news.find('a').get('href')
            href = base_url + href
            news_url.append(href)
    #print('获取连接数：',str(len(news_url)))
    for new_url in news_url:
        print(new_url)
        try:
            result = getKorHtml(new_url, headers)
            soup = result[1]
            title = soup.find('h2', class_='end_tit')
            if not title:
                continue
            title = title.get_text().strip().replace('\n', '')  #标题
            text = soup.find('div', id="articeBody")
            if not text:
                continue

            result = bdfy.translateOther(title, 'kor', 'zh')
            print(result)
            title_dst = result['trans_result'][0].get('dst')
            #print('原文',title,'译文',title_dst)
            srcText = text.get_text().strip().replace('\n', '')
            dstText = ''
            #if len(srcText) > 1000:
            #nowText = srcText
            #dstText = ''
            #while len(nowText) > 1000:
            #	result = bdfy.translateOther(nowText[0:1000],'kor','zh')
            #	dstText += result['trans_result'][0].get('dst')
            #	nowText = nowText[1000:]
            #result = bdfy.translateOther(nowText[len(srcText)/1000 * 1000 :],'kor','zh')
            #dstText += result['trans_result'][0].get('dst')

            #else:
            #	result = bdfy.translateOther(srcText,'kor','zh')
            #	dstText = result['trans_result'][0].get('dst')

            img_url = text.find('img')
            imgpath = None
            if img_url:
                img_url = img_url.get('src')
                img_name = title_dst + '.jpg'
                imgpath = os.path.join(Ymd, img_name)
                downloadImg(img_url, imgpath)

            SendMail.mail(
                SendMail, title_dst,
                title + '|' + srcText + '\n' + title_dst + '|' + dstText,
                imgpath)

        except:
            traceback.print_exc()
            print('解析：', new_url, '失败')

Ejemplo n.º 10

0

Mostrar archivo

def getHollywoodNews():
    now = int(time.time())
    timeArray = time.localtime(now)
    Ymd = time.strftime('%Y-%m-%d', timeArray)
    if Ymd not in os.listdir():
        os.mkdir(Ymd)

    base_url = 'https://www.hollywoodreporter.com'
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
    }
    topics = [
        '/topic/movies', '/topic/tv', '/topic/entertainment-industry',
        '/topic/technology'
    ]
    for topic in topics:
        result = getHtml(base_url + topic, headers)
        soup = result[1]
        newslist = soup.findAll('article')
        news_url = []
        for news in newslist:
            href = news.find('a')
            if href:
                href = news.find('a', class_='topic-card__link').get('href')
                if href.index('http') < 0:
                    href = base_url + href
                news_url.append(href)
        #print('获取连接数：',str(len(news_url)))
        for new_url in news_url:
            print(new_url)
            try:
                result = getHtml(new_url, headers)
                soup = result[1]

                title = soup.find('h1', class_='article__headline')
                if not title:
                    continue
                title = title.get_text().replace('\n', '')  #标题
                deck = soup.find('h2', class_='article__deck')
                if not deck:
                    deck = 'no deck'
                deck = deck.get_text().replace('\n', '')  # 副标题

                text = soup.find('div', class_='article__body')
                if not text:
                    continue

                result = bdfy.translate(title)
                title_dst = result['trans_result'][0].get('dst')
                print('原文', title, '译文', title_dst)
                result = bdfy.translate(deck)
                deck_dst = result['trans_result'][0].get('dst')
                print('原文', deck, '译文', deck_dst)

                srcText = ''
                dstText = ''
                ps = text.select('p')
                for p in ps:
                    #if len(p.get_text()) < 10:
                    #	continue
                    #result = bdfy.translate(p.get_text().replace('\n',''))
                    #print(result)
                    #dstText = result['trans_result'][0].get('dst')
                    srcText += p.get_text().replace('\n', '')
                    #dstText += dstText

                img_url = soup.find('figure').find('img')
                imgpath = None
                if img_url:
                    img_url = img_url.get('src')
                    img_name = new_url[new_url.rfind('/') + 1:] + '.jpg'
                    imgpath = os.path.join(Ymd, img_name)
                    downloadImg(img_url, imgpath)

                #result = bdfy.translate(srcText)
                #dst = result['trans_result'][0].get('dst')
                #print('原文',srcText,'译文',dst)
                SendMail.mail(
                    SendMail, title_dst, title + '|' + deck + '|' + srcText +
                    '\n' + title_dst + '|' + deck_dst + '|' + dstText, imgpath)

            except:
                traceback.print_exc()
                print('解析：', new_url, '失败')