class GuguPipeline(object): def __init__(self, mail_to): self.mailer = MailSender() self.mail_to = mail_to if mail_to: log.msg('Emails will be sent to %s' % mail_to, level=logging.INFO) @classmethod def from_settings(cls, settings): mail_to = settings['GUGU_PIPELINE_MAIL_TO'] return cls(mail_to) def process_item(self, item, spider): if re.search(GUGU_PATTERN, item['lyrics']): item['match'] = 'true' self.send_email(item) else: item['match'] = 'false' return item def send_email(self, item): if not self.mail_to: return subject = "Found a match: {artist} - {title}".format(**item) body = """URL: {url} {lyrics} """.format(**item) self.mailer.send(to=[self.mail_to], subject=subject, body=body)
def test_send_attach(self): attach = BytesIO() attach.write(b'content') attach.seek(0) attachs = [('attachment', 'text/plain', attach)] mailsender = MailSender(debug=True) mailsender.send(to=['*****@*****.**'], subject='subject', body='body', attachs=attachs, _callback=self._catch_mail_sent) assert self.catched_msg self.assertEqual(self.catched_msg['to'], ['*****@*****.**']) self.assertEqual(self.catched_msg['subject'], 'subject') self.assertEqual(self.catched_msg['body'], 'body') msg = self.catched_msg['msg'] self.assertEqual(msg['to'], '*****@*****.**') self.assertEqual(msg['subject'], 'subject') payload = msg.get_payload() assert isinstance(payload, list) self.assertEqual(len(payload), 2) text, attach = payload self.assertEqual(text.get_payload(decode=True), b'body') self.assertEqual(text.get_charset(), Charset('us-ascii')) self.assertEqual(attach.get_payload(decode=True), b'content')
def stats_spider_closed(self, spider, spider_stats): mail = MailSender() body = "Global stats\n\n" body += "\n".join("%-50s : %s" % i for i in stats.get_stats().items()) body += "\n\n%s stats\n\n" % spider.name body += "\n".join("%-50s : %s" % i for i in spider_stats.items()) mail.send(self.recipients, "Scrapy stats for: %s" % spider.name, body)
def test_send_attach_utf8(self): subject = u'sübjèçt' body = u'bödÿ-àéïöñß' attach = BytesIO() attach.write(body.encode('utf-8')) attach.seek(0) attachs = [('attachment', 'text/plain', attach)] mailsender = MailSender(debug=True) mailsender.send(to=['*****@*****.**'], subject=subject, body=body, attachs=attachs, charset='utf-8', _callback=self._catch_mail_sent) assert self.catched_msg self.assertEqual(self.catched_msg['subject'], subject) self.assertEqual(self.catched_msg['body'], body) msg = self.catched_msg['msg'] self.assertEqual(msg['subject'], subject) self.assertEqual(msg.get_charset(), Charset('utf-8')) self.assertEqual(msg.get('Content-Type'), 'multipart/mixed; charset="utf-8"') payload = msg.get_payload() assert isinstance(payload, list) self.assertEqual(len(payload), 2) text, attach = payload self.assertEqual(text.get_payload(decode=True).decode('utf-8'), body) self.assertEqual(text.get_charset(), Charset('utf-8')) self.assertEqual(attach.get_payload(decode=True).decode('utf-8'), body)
def test_send_html(self): mailsender = MailSender(debug=True) mailsender.send(to=['*****@*****.**'], subject='subject', body='<p>body</p>', mimetype='text/html', _callback=self._catch_mail_sent) msg = self.catched_msg['msg'] self.assertEqual(msg.get_payload(), '<p>body</p>') self.assertEqual(msg.get('Content-Type'), 'text/html')
def test_send_attach(self): attach = StringIO() attach.write('content') attach.seek(0) attachs = [('attachment', 'text/plain', attach)] mailsender = MailSender(debug=True, crawler=self.crawler) mailsender.send(to=['*****@*****.**'], subject='subject', body='body', attachs=attachs) assert self.catched_msg self.assertEqual(self.catched_msg['to'], ['*****@*****.**']) self.assertEqual(self.catched_msg['subject'], 'subject') self.assertEqual(self.catched_msg['body'], 'body') msg = self.catched_msg['msg'] self.assertEqual(msg['to'], '*****@*****.**') self.assertEqual(msg['subject'], 'subject') payload = msg.get_payload() assert isinstance(payload, list) self.assertEqual(len(payload), 2) text, attach = payload self.assertEqual(text.get_payload(decode=True), 'body') self.assertEqual(attach.get_payload(decode=True), 'content')
class SpiderOpenCloseLogging(object): def __init__(self): dispatcher.connect(self.spider_opened, signal=signals.spider_opened) dispatcher.connect(self.spider_closed, signal=signals.spider_closed) self.mailer = MailSender() self.mailer.smtphost = "smtp.sina.cn" self.mailer.smtpuser = "******" self.mailer.smtppass = "******" self.mailer.mailfrom = "*****@*****.**" def spider_opened(self, spider): log.msg("opened spider %s" % spider.name) self.mailer.send(to=["*****@*****.**"], subject="scrapy running", body="scrapy is start") def spider_closed(self, spider): if spider.domain: param =(spider.sales_num,spider.money,spider.queue_id) spider.cur.execute("update admin_queue set sales=%s , money=%s where id=%s",param) spider.conn.commit() spider.cur.close() spider.conn.close() mail_content = str(spider.shop_name)+"\n" mail_content += "30天销量:"+str(spider.sales_num)+" \n30天成交额:"+str(spider.money)+"\n店铺地址:"+str(spider.domain)+"\n" mail_content+="---------------------------------------\n" mail_content+=spider.shopinfo_str mail_title = str(spider.shop_name) +' 数据报告' self.mailer.send(to=[str(spider.mailto)], subject=mail_title, body=mail_content) log.msg("closed spider %s" % spider.name)
def close(spider, reason): # send email when spider closed if spider.email_content.strip(): mailer = MailSender(mailfrom="*****@*****.**", smtphost="smtp.gmail.com", smtpport=587, smtpuser="******",smtppass="******") mailer.send(to=["*****@*****.**", "*****@*****.**"], cc=["*****@*****.**"], subject= "[Movies Here] " + spider.keyword + " is coming!!!", body=spider.email_content) closed = getattr(spider, 'closed', None) if callable(closed): return closed(reason)
def spider_closed(self, spider): mailer = MailSender(mailfrom="*****@*****.**", smtphost="smtp.gmail.com", smtpport=587, smtpuser="******", smtppass="******") mailer.send(to=["*****@*****.**"], subject="Test mail : Report", body="Run completed for Thrillophilia Crawler ", cc=["*****@*****.**", "*****@*****.**"])
def get_email(source_name): mailer = MailSender(mailfrom="*****@*****.**", smtphost="smtp.gmail.com", smtpport=587, smtpuser="******", smtppass="******") mailer.send(to=["*****@*****.**"], subject="Test mail : Report", body="Run completed for %s " % source_name, cc=["*****@*****.**", "*****@*****.**"])
def send_mail(subject, body): mailer = MailSender(smtphost=settings.MAIL_HOST, mailfrom=settings.MAIL_FROM, smtpuser=settings.MAIL_USER, smtppass=settings.MAIL_PASS, smtpport=25) to = settings.MAIL_TO mailer.send(to=to, subject=subject.encode('utf-8'), body=body.encode('utf-8'))
def spider_error(failure): """Send errors email.""" from_email = RYANAIR_SETTINGS['FROM_EMAIL'] to_email = RYANAIR_SETTINGS['FAILURE_EMAIL'] mailer = MailSender(mailfrom=from_email) mailer.send( to=[to_email], subject="Ryanair flights error", body=failure.getErrorMessage(), )
def login(self, response): session = requests.session() session.cookies = cookielib.LWPCookieJar(filename="cookies.txt") try: session.cookies.load(ignore_discard=True) except: print("cookie未能加载") account = 'deya201' password = '******' captcha = response.css("#imgcode::attr(src)").extract_first() captcha_url = parse.urljoin(response.url, captcha) post_url = 'http://www.haiguan.info/ajaxpro/SCEC.HaiguanInfo.Login,SCEC.HaiguanInfo.ashx' post_data = '{"loginStr":"' + account + '/' + password + '"}' self.headers['X-AjaxPro-Method'] = 'Encode' response = session.post(post_url, data=post_data, headers=self.headers) session.cookies.save() token = str(response.content, encoding='utf-8').split('"')[1] #获取验证码 t = session.get(captcha_url) with open('captcha.jpg', 'wb') as f: f.write(t.content) f.close() from PIL import Image try: im = Image.open('captcha.jpg') im.show() im.close() except: pass code = input("输入验证码\n>") post_data = '{"loginStr":"' + token + '","code":"' + code + '"}' self.headers['X-AjaxPro-Method'] = 'CheckLogin' #请求checklogin response = session.post(post_url, data=post_data, headers=self.headers) session.cookies.save() #scrapy自动发送邮件 from scrapy.mail import MailSender # mailer = MailSender.from_settings(settings)# 出错了,没找到原因 mailer = MailSender( smtphost="smtp.163.com", # 发送邮件的服务器 mailfrom="*****@*****.**", # 邮件发送者 smtpuser="******", # 用户名 smtppass="******", # 发送邮箱的密码不是你注册时的密码,而是授权码!!!切记! smtpport=25 # 端口号 ) body = """ 发送的邮件内容 : 海关信息网登陆成功啦,赶紧打开你的pycharm查看查看查看!!!! """ subject = '爬虫邮件测试测试' # 如果说发送的内容太过简单的话,很可能会被当做垃圾邮件给禁止发送。 mailer.send(to=["*****@*****.**"], subject=subject, body=body)
def close_spider(self, spider): info = self.info.pop(spider.name) if info is not None: outdir = spider.outdir outpath = os.path.join(outdir, "links.json") items = info['items'] with open(outpath, 'w') as f: f.write(json.dumps([dict(i) for i in items])) errors = [ i for i in items if i['status'] != 200 or i['validation_error'] or i['header_errors']] if errors: with open(os.path.join(outdir, 'ERRORS'), 'w') as f: f.write(json.dumps([dict(i) for i in errors])) message = [] for i in errors: item_message = ["===\nURL: {0}\n\n".format(i['url'])] status = i['status'] if status != 200: item_message.append( "Failed retrieval with status: {0}\n".format( status)) if i['validation_error']: item_message.append("Failed validation.\n\n") header_errors = i['header_errors'] if header_errors: item_message.append( ("Failed header checks with the following " "errors:\n{0}\n").format( "\n".join(header_errors))) if len(item_message) > 1: message += item_message message.append("\nSee %s for details of validation errors." % outdir) email_body = "".join(message) with open(os.path.join(outdir, 'REPORT'), 'w') as f: f.write(email_body) send_to = spider.send_to if send_to is not None: sender = MailSender(mailfrom="*****@*****.**") sender.send([send_to], "Smoketest failure", email_body) else: with open(os.path.join(outdir, 'CLEAN'), 'w') as f: f.write("yes\n")
def close(self, reason): self.logger.info(reason) mailfrom = '*****@*****.**' smtphost = 'smtp.163.com' smtpport = 25, smtpuser = '******' smtppass = '******' smtpssl = True mailer = MailSender(mailfrom=mailfrom, smtphost=smtphost, smtpuser=smtpuser, smtppass=smtppass) # mailer = MailSender.from_settings(settings.MAIL) mailer.send(to=['*****@*****.**'], subject='Send Email Test by Scrapy MailSender!', body='Holle world!') print settings.MAIL['MAIL_USER']
class MailSender(): def __init__(self, email): self.mail_from = '*****@*****.**' self.mail_user = '******' self.mail_pass = '******' self.mail_host = 'smtp.sina.com' self.email = email self.mail_sender = ScrapyMailSender(smtphost=self.mail_host, mailfrom=self.mail_from, smtpuser=self.mail_user, smtppass=self.mail_pass) def send(self): print self.email.email_to print self.mail_sender.send(to = self.email.email_to, subject = self.email.subject, body = self.email.content) print '已发送!'
def wrapper(*args, **kwargs): max_attempts = settings.getint("MAX_MONGO_RECONNECT_ATTEMPTS", MAX_AUTO_RECONNECT_ATTEMPTS) mail = MailSender() for attempt in xrange(max_attempts): try: return mongo_op_func(*args, **kwargs) except AutoReconnect as e: wait_t = 1 + attempt # exponential back off log.msg("PyMongo auto-reconnecting... %s. Waiting %.1f seconds."%(str(e), wait_t), log.INFO) mail.send(to=[settings.get('MAIL_TO')], subject='PyMongo auto-reconnecting....', \ body="%s\n%s"%(e, traceback.format_exc())) time.sleep(wait_t)
def spider_opened(self, spider): mailer = MailSender(mailfrom="*****@*****.**") settings = get_project_settings() hostname = socket.gethostname() body = '''-Crawl name: {0}\n-Cache directory: {1}\n-Hostname : {2} \n Crawler_name: Amani BOUYAHIA'''.format( settings.get('BOT_NAME'), settings.get('HTTPCACHE_DIR'), hostname, ) mailer.send(to=["*****@*****.**"], subject="The crawl of %s is %s " % (spider.name, "launched"), body=body)
def send_email(self, to=[], cc=[], subject="爬虫运行异常", body="", attachs=[]): # 如果收件人邮箱为空, 则发送到root账户的邮箱 if len(to) == 0: root_user = User.objects.filter(is_superuser=1) if len(root_user) == 0: raise Exception("root账户不存在, 请添加root账户和root账户的邮箱") root_user_email = root_user[0].email if root_user_email == None or root_user_email == "": raise Exception("root账户没有配置邮箱, 请添加root账户的邮箱") self.email_receiver.append(root_user_email) mailer = MailSender() mailer.send(to=to, cc=cc, subject=subject.encode("utf-8"), body=body.encode("utf-8"), attachs=attachs)
def test_send_html(self): mailsender = MailSender(debug=True) mailsender.send( to=["*****@*****.**"], subject="subject", body="<p>body</p>", mimetype="text/html", _callback=self._catch_mail_sent, ) msg = self.catched_msg["msg"] self.assertEqual(msg.get_payload(), "<p>body</p>") self.assertEqual(msg.get("Content-Type"), "text/html")
class MemoryDebugger(object): def __init__(self): try: import libxml2 self.libxml2 = libxml2 except ImportError: self.libxml2 = None if not settings.getbool('MEMDEBUG_ENABLED'): raise NotConfigured self.mail = MailSender() self.rcpts = settings.getlist('MEMDEBUG_NOTIFY') dispatcher.connect(self.engine_started, signals.engine_started) dispatcher.connect(self.engine_stopped, signals.engine_stopped) def engine_started(self): if self.libxml2: self.libxml2.debugMemory(1) def engine_stopped(self): figures = self.collect_figures() report = self.create_report(figures) self.log_or_send_report(report) def collect_figures(self): gc.collect() figures = [] figures.append(("Objects in gc.garbage", len(gc.garbage), "")) if self.libxml2: self.libxml2.cleanupParser() figures.append(("libxml2 memory leak", self.libxml2.debugMemory(1), "bytes")) return figures def create_report(self, figures): s = "" s += "SCRAPY MEMORY DEBUGGER RESULTS\n\n" for f in figures: s += "%-30s : %d %s\n" % f if settings.getbool('TRACK_REFS'): s += os.linesep s += format_live_refs() return s def log_or_send_report(self, report): if self.rcpts: self.mail.send(self.rcpts, "Scrapy Memory Debugger results at %s" % \ socket.gethostname(), report) log.msg(report)
def closed(self, reason): # 爬取结束的时候发送邮件 mailer = MailSender( smtphost="smtp.163.com", # 发送邮件的服务器 mailfrom="*****@*****.**", # 邮件发送者 smtpuser="******", # 用户名 smtppass="******", # 发送邮箱的密码不是你注册时的密码,而是授权码!!!切记! smtpport=25 # 端口号 ) body = u"测试发送---" subject = u'测试发送---标题' # 如果说发送的内容太过简单的话,很可能会被当做垃圾邮件给禁止发送。 mailer.send(to=["*****@*****.**", "*****@*****.**"], subject=subject, body=body)
def close_spider(self, spider): self.exporter.finish_exporting() # usinf gmail to send mail mailer = MailSender(smtphost="smtp.gmail.com", mailfrom='', smtpuser="", smtppass="", smtpport=587) myFile = open("jobs.csv", "r") self.file.close() mailer.send(to=["*****@*****.**"], subject="Scrapy mail", body="Did you receive this, oh!", attachs=(("twors", "text/plain", myFile), ))
def engine_closed(self): dirtree = os.path.join(settings.get('DATA_PATH'), 'Top') #get the total size of the corpus (could take a while...) mailer = MailSender() success = os.path.isdir(dirtree) if success: content = "Crawling ended succesfully at %s." % time.asctime() else: content = "Crawling ended abnormally at %s" % time.asctime() mailer.send(to = ['*****@*****.**', '*****@*****.**', '*****@*****.**'], subject = "The training corpus has been downloaded" if success else "Error crawling", body =content , )
def parse(self, response): try: db_id_url = response.meta['db_id_url'] select = Selector(response) find_str = select.xpath('//*[@id="header_top_bar"]/span/text()').extract()[0] result = int(''.join(re.findall('\d+', find_str))) cur = self.conn.cursor() cur.execute(self.update_sql%(result,db_id_url)) self.conn.commit() log.msg("success to parse : %s and result is %s"%(response.url,find_str), level=log.INFO) except KeyError,e: log.msg("fail to prase url : %s"%response.url,level=log.ERROR) mailer = MailSender(smtphost="smtp.163.com",mailfrom="*****@*****.**",smtpuser="******",smtppass="******",smtpport=25) mailer.send(to=["*****@*****.**"], subject="Some subject", body=response.url)
def test_send(self): mailsender = MailSender(debug=True) mailsender.send(to=['*****@*****.**'], subject='subject', body='body') assert self.catched_msg self.assertEqual(self.catched_msg['to'], ['*****@*****.**']) self.assertEqual(self.catched_msg['subject'], 'subject') self.assertEqual(self.catched_msg['body'], 'body') msg = self.catched_msg['msg'] self.assertEqual(msg['to'], '*****@*****.**') self.assertEqual(msg['subject'], 'subject') self.assertEqual(msg.get_payload(), 'body')
def spider_closed(self, spider, reason): mailer = MailSender() pige = 1333 intro = "Summary stats from Scrapy spider: \n\n" stats = spider.crawler.stats.get_stats() comptage = stats.get('item_scraped_count') pourcentage = comptage * 100 / pige body = pprint.pformat(stats) body = spider.name + " is " + reason + "\n\n" + "Le comptage a atteint " + str( pourcentage) + "%\n" + intro + body mailer.send(to=["*****@*****.**"], subject="The crawl of %s is %s " % (spider.name, reason), body=body)
def test_send(self): mailsender = MailSender(debug=True) mailsender.send(to=['*****@*****.**'], subject='subject', body='body', _callback=self._catch_mail_sent) assert self.catched_msg self.assertEqual(self.catched_msg['to'], ['*****@*****.**']) self.assertEqual(self.catched_msg['subject'], 'subject') self.assertEqual(self.catched_msg['body'], 'body') msg = self.catched_msg['msg'] self.assertEqual(msg['to'], '*****@*****.**') self.assertEqual(msg['subject'], 'subject') self.assertEqual(msg.get_payload(), 'body')
def send_mail(self, flat_uri): # only for gmail smtps servers, read here for more: https://www.quora.com/What-is-SMTP-Host # check here for TLS vs SSL: http://www.smtp-gmail.com/ # probably you will have to enable "Access for less secure apps"(on gmail host) for this to work mailer = MailSender(smtphost='smtp.gmail.com', mailfrom='scrapy_bot', smtpuser=self.user_mail, smtppass=self.user_pass, smtpssl=True, smtpport=465) mailer.send( to=self.user_mail, subject='detektiran potencijalan stan', body='robot je prepoznao sljedeci stan kao potencijalan:\n' + flat_uri)
def test_send(self): mailsender = MailSender(debug=True) mailsender.send(to=['*****@*****.**'], subject='subject', body='body', _callback=self._catch_mail_sent) assert self.catched_msg self.assertEqual(self.catched_msg['to'], ['*****@*****.**']) self.assertEqual(self.catched_msg['subject'], 'subject') self.assertEqual(self.catched_msg['body'], 'body') msg = self.catched_msg['msg'] self.assertEqual(msg['to'], '*****@*****.**') self.assertEqual(msg['subject'], 'subject') self.assertEqual(msg.get_payload(), 'body') self.assertEqual(msg.get('Content-Type'), 'text/plain')
def spider_closed(self, spider, reason): mailer = MailSender() pige = 1324 intro = "Summary stats from Scrapy spider: \n\n" stats = spider.crawler.stats.get_stats() comptage = stats.get('item_scraped_count') pourcentage = comptage * 100 / pige body = intro + "Finish reason : " + reason + "\n" + "Item scraped count : " + str( comptage) + "\n" + "Le comptage a atteint " + str( pourcentage) + "%\n" mailer.send(to=["*****@*****.**"], subject="The crawl of %s is %s " % (spider.name, reason), body=body) self.exporter.finish_exporting() file = self.files.pop(spider) file.close()
def parse(self, response): #方式一 # mailer = MailSender(mailfrom="*****@*****.**", smtphost="smtp.exmail.qq.com", smtpport=465, # smtpuser="******", smtppass="******",smtptls=True, smtpssl=True) print("url:", response.url) # 方式一 # mailer = MailSender(mailfrom=settings['MAIL_FROM'], # smtphost=settings['MAIL_HOST'], # smtpport=settings['MAIL_PORT'], # smtpuser=settings['MAIL_USER'], # smtppass=settings['MAIL_PASS'], # smtptls=settings['MAIL_TLS'], # smtpssl=settings['MAIL_SSL']) # 方式一 mailer = MailSender(mailfrom=self.settings['MAIL_FROM'], smtphost=self.settings['MAIL_HOST'], smtpport=self.settings['MAIL_PORT'], smtpuser=self.settings['MAIL_USER'], smtppass=self.settings['MAIL_PASS'], smtptls=self.settings['MAIL_TLS'], smtpssl=self.settings['MAIL_SSL']) #方式二 # mailer = MailSender.from_settings(self.settings) return mailer.send(to=["*****@*****.**"], subject="title test", body="text test") print("end")
def spider_closed(self, spider, reason): mailer = MailSender(mailfrom="*****@*****.**") pige = 52792 intro = "Summary stats from Scrapy spider: \n\n" stats = spider.crawler.stats.get_stats() comptage = stats.get('item_scraped_count') pourcentage = comptage * 100 / pige body = intro + "Finish reason : " + reason + "\n" + "Item scraped count : " + str( comptage) + "\n" + "Le comptage a atteint " + str( pourcentage) + "%\n" mailer.send(to=[ "*****@*****.**", "*****@*****.**", "*****@*****.**" ], subject="The crawl of %s is %s " % (spider.name, reason), body=body)
def close_spider(self, spider): mailer = MailSender(smtphost=STMPHOST, mailfrom=EMAIL_FROM, smtppass=STMPPASS, smtpuser=EMAIL_FROM, smtpport=STMPPORT, smtptls=True) if spider.start_time == spider.end_time: subject = '(' + spider.end_time + ')招标文件,及时查收' else: subject = '(' + spider.start_time + '--' + spider.end_time + ')招标文件,及时查收' file = spider.zip_path if os.path.isfile(file): print(type(os.path.basename(file))) attachs = [(os.path.basename(file), EMAIL_ATTACH_MIME, open(file, "rb"))] body = '招标邮件,及时查收'.encode('utf-8') else: body = '今日无数据'.encode('utf-8') attachs = () return mailer.send(to=EMAIL_TO, subject=subject, body=body, cc=["*****@*****.**"], attachs=attachs, mimetype="text/plain", charset='utf-8')
def close_spider(self, spider): mailer = MailSender( smtphost="smtp.163.com", # 发送邮件的服务器 mailfrom="*****@*****.**", # 邮件发送者 smtpuser="******", # 用户名 smtppass="******", # 授权码 smtpport=25 # 端口号 ) send_time = datetime.now().replace(microsecond=0).isoformat(' ') mail_body = send_time + str(self.count) + u""" items processed successfully! """ mail_subject = u'scraped.' mailer.send(to=["*****@*****.**", "*****@*****.**"], subject=mail_subject, body=mail_body)
def test_send_utf8(self): subject = u'sübjèçt' body = u'bödÿ-àéïöñß' mailsender = MailSender(debug=True) mailsender.send(to=['*****@*****.**'], subject=subject, body=body, charset='utf-8', _callback=self._catch_mail_sent) assert self.catched_msg self.assertEqual(self.catched_msg['subject'], subject) self.assertEqual(self.catched_msg['body'], body) msg = self.catched_msg['msg'] self.assertEqual(msg['subject'], subject) self.assertEqual(msg.get_payload(), body) self.assertEqual(msg.get_charset(), Charset('utf-8')) self.assertEqual(msg.get('Content-Type'), 'text/plain; charset="utf-8"')
def close_spider(self, spider): mailer = MailSender(smtphost="smtp.163.com", mailfrom="*****@*****.**", smtppass="******", smtpuser="******", smtpport=25, smtptls=True) subject = spider.output_excel_filename attach_mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" if os.path.isfile(spider.output_excel_filename): attachs = [(spider.output_excel_filename, attach_mime, open(spider.output_excel_filename, "rb"))] body = '招标邮件,及时查收' else: body = (spider.zh_name + '今日无数据(' + spider.today + ')').encode('utf-8') attachs = () #, "*****@*****.**" return mailer.send(to=["*****@*****.**"], subject=subject, body=body, cc=["*****@*****.**"], attachs=attachs, mimetype="text/plain", charset='utf-8')
def close_spider(self, spider): self.file.close() mailer = MailSender( mailfrom=mail_username, smtphost="smtp.gmail.com", smtpport=465, smtpuser=mail_username, smtppass=mail_password, smtpssl=True ) if len(self.new_cars) > 0: links = '\n'.join(self.new_cars) mailer.send( to=[spider.mail_to], subject="New cars for you - " + str(len(self.new_cars)) + " - " + spider.name, body=links + "----------------------------\n" + "All cars from this category: " + spider.start_urls[0] )
def close_spider(self, spider): subject = 'Image Scraper Report for ' + datetime.date.today().strftime("%m/%d/%y") from_email = "*****@*****.**" to_email = "*****@*****.**" msg = MIMEMultipart() msg['From'] = from_email msg['To'] = to_email msg['Subject'] = subject intro = "Summary stats from Scrapy spider: \n\n" body = spider.crawler.stats.get_stats() body = pprint.pformat(body) body = intro + body msg.attach(MIMEText(body, 'plain')) mailer = MailSender() text = msg.as_string() mailer.send(to=[to_email], subject=subject, body=text) mailer.quit()
def send_email(self, mail_body): mailer = MailSender(mailfrom="*****@*****.**", smtphost="smtp.gmail.com", smtpport=587, smtpuser="******", smtppass="******") return mailer.send(to=["*****@*****.**"], subject="StockSpider: Stock Spiders Contract Error", body=mail_body)
def closed(self, reason): # 爬取结束的时候发送邮件 from scrapy.mail import MailSender # mailer = MailSender.from_settings(settings)# 出错了,没找到原因 mailer = MailSender( smtphost="smtp.163.com", # 发送邮件的服务器 mailfrom="***********@163.com", # 邮件发送者 smtpuser="******", # 用户名 smtppass="******", # 发送邮箱的密码不是你注册时的密码,而是授权码!!!切记! smtpport=25 # 端口号 ) body = u""" 发送的邮件内容 """ subject = u'发送的邮件标题' mailer.send(to=["****@qq.com", "****@qq.com"], subject=subject.encode("utf-8"), body=body.encode("utf-8"))
def send_email(self): intro = "407 and 429 errors: \n\n" body = "During current parsing session received 10 responses with status 407 or 429" body = pprint.pformat(body) body = intro + body settings = get_project_settings() mailer = MailSender(smtphost=settings.get("SMTP_HOST"), mailfrom=settings.get("MAIL_FROM"), smtpuser=settings.get("SMTP_USER"), smtppass=settings.get("SMTP_PASS"), smtpport=settings.get("SMTP_PORT"), smtptls=settings.get("SMTP_TLS"), smtpssl=settings.get("SMTP_SSL") ) mailer.send(to=settings.get("MAIL_RECEIVERS"), subject="Booking Scrapy parser. Error report for " + datetime.today().strftime("%d.%m.%Y %H:%M"), body=body, )
def send_bug_email(err=None, type=0): mailer = MailSender( smtphost="smtp.163.com", # 发送邮件的服务器 mailfrom="*****@*****.**", # 邮件发送者 smtpuser="******", # 用户名 smtppass="******", # 发送邮箱的密码不是你注册时的密码,而是授权码!!!切记! smtpport=25 # 端口号 ) to = ["*****@*****.**", "*****@*****.**"] subject = u"啊欧~~,你的程序GG了..." body = """<html> <body> <h3><i style='color:#349CFF;'>【Infinity Group: BUG侦测系统】</i></h3> <p> <strong>助手小i提醒您</strong> 位于 <font color='green'> <a href='https://www.aliyun.com/'>阿里云服务器</a> </font>上基于scrapy的爬虫程序已经GG了, <font color='red'>请赶快前往抢修BUG!!!</font> </p> <h4><font color='red'>TRACEBACK:</font></h4> <p><font color='red'>%s</font></p> <p><font color='red'>%s</font></p> </body> </html> """ % (err.__str__(), '出错类型:' + str( err.__class__).lstrip('<').rstrip('>')) if type == 0 else """<html> <body> <h3><i style='color:#349CFF;'>【Infinity Group: BUG侦测系统】</i></h3> <p> <strong>助手小i提醒您</strong> 位于 <font color='green'> <a href='https://www.aliyun.com/'>阿里云服务器</a> </font>上基于scrapy的爬虫程序已经关闭了, <font color='red'>若非管理员正常关闭,请及时前往重新启动!!!</font> </p> </body> </html> """ cc = None mailer.send(to=to, subject=subject, body=body, cc=cc, mimetype='text/HTML') # 抄送类似于分发
def parse(self, response): logging.basicConfig(filename='scraper.log', level=logging.DEBUG) mailer = MailSender(smtphost="smtp.sendgrid.net", mailfrom="scrapy@localhost",smtpuser="******", smtppass="******", smtpport=25) job_item = [] email_body = "DOST \n" for sel in response.xpath('//div[@id="jg_el_listing_single"]/table/tr[@style]'): #define jobs postigns table selector item = ScrapejobsItem() item['title'] = sel.xpath('td[@class="jg_jobtitle"]/strong/text()').extract() item['link'] = sel.xpath('@onclick').extract() removed_prefix = ("".join(item['link'])).strip("window.location=") stripped_link = removed_prefix.strip("'") job_item.append("Job Item: " + "".join(item['title']) + "\n Link: http://dost.gov.ph" + stripped_link) #yield item for job in job_item: email_body = email_body + job + "\n\n" print email_body mailer.send(to=["*****@*****.**"], subject="Scrapy Job", body=email_body)
class RentPipeline(object): collection_name = 'house' def __init__(self, mongo_uri, mongo_db): self.mongo_uri = mongo_uri self.mongo_db = mongo_db @classmethod def from_crawler(cls, crawler): return cls( mongo_uri = crawler.settings.get("MONGO_URI"), mongo_db = crawler.settings.get("MONGO_DATABASE", "items") ) def open_spider(self, spider): self.mailer = MailSender(smtphost="smtp.exmail.qq.com", mailfrom="*****@*****.**", smtpuser="******", smtppass="******") self.client = pymongo.MongoClient(self.mongo_uri) self.db = self.client[self.mongo_db] def close_spider(self, spider): self.client.close() def process_item(self, item, spider): row = {} row['id'] = item['id'][0] row['title'] = item['title'][0].strip() row['link'] = item['link'][0] doc = self.db[self.collection_name].find({"id": row['id']}) if doc.count() == 0: self.mailer.send(to=["*****@*****.**", "*****@*****.**"], subject=u"[新房源]".encode('utf-8') + row['title'].encode('utf-8'), body=u"[链接地址]".encode('utf-8') + row['link']) self.db[self.collection_name].insert(row) else: print "has exist", row['id'] return item
def log(self, spider): items = self.stats.get_value('item_scraped_count', 0) pages = self.stats.get_value('response_received_count', 0) exception_count = self.stats.get_value('downloader/exception_count',0) irate = (items - self.itemsprev) * self.multiplier prate = (pages - self.pagesprev) * self.multiplier errrate = (exception_count - self.exception_countprev) * self.multiplier self.pagesprev, self.itemsprev = pages, items msg = ("Crawled "+str(pages)+" pages (at "+str(prate)+" pages/min), scraped "+str(items)+" items (at "+str(irate)+" items/min)") #log_args = {'pages': pages, 'pagerate': prate,'items': items, 'itemrate': irate} logging.info(msg) if errrate > self.error_threshold: mailer = MailSender(smtphost=self.smtphost, mailfrom=self.mailfrom, smtpuser=self.smtpuser, smtppass=self.smtppass) mailer.send(to=[self.mailto], subject="Scrapy twitter Error", body="Exception rate has reached to %d" % (errrate))
def parse(self, response): job_item = [] mailer = MailSender(smtphost="smtp.sendgrid.net", mailfrom="scrapy@localhost",smtpuser="******", smtppass="******", smtpport=25) email_body = "DOST PCIEERD\n" for selector in response.xpath("//table/tbody/tr/td/a"): item = ScrapejobsItem() if (selector.xpath("strong/text()")): #run validation if the the job item title has "strong" child #print selector.xpath("text()").extract() + selector.xpath("strong/text()").extract() item['title'] = selector.xpath("text()").extract() + selector.xpath("strong/text()").extract() item['link'] = selector.xpath("@href").extract() #print selector.xpath("@href").extract() elif (selector.xpath("text()")): item['title'] = selector.xpath("text()").extract() item['link'] = selector.xpath("@href").extract() #print selector.xpath("text()").extract() #print selector.xpath("@href").extract() else: print "\n" try: #print "".join(item['title']) + "\n" #print "".join(item['link']) + "\n\n" job_item.append("Job Title: " + "".join(item['title']).encode('utf-8') + "\n Link: http://pcieerd.dost.gov.ph" + "".join(item['link']).encode('utf-8')) except KeyError as e: print e; except UnicodeEncodeError as e: #catch unicode printing to console exception print e; for item in job_item: email_body = email_body + "\n" + item mailer.send(to=["*****@*****.**"], subject="DOST PCIEERD Scraped Jobs", body=email_body)
def spider_closed(self, spider): mailer = MailSender(mailfrom="*****@*****.**",smtphost="smtp.gmail.com",smtpport=587,smtpuser="******",smtppass="******") # get statistics self.cur.execute("SELECT COUNT(*) FROM Results") crawled = self.cur.fetchone() self.cur.execute("SELECT COUNT(*) FROM RequestUrls") totalUrl = self.cur.fetchone() toBeCrawled = totalUrl[0] - crawled[0] emailBody = "Crawled: " + str(crawled[0]) + "\nTo be crawled: " + \ str(toBeCrawled) + "\nProgress: " + str(float(crawled[0])/totalUrl[0]) return mailer.send(to=["*****@*****.**"],subject="Test",body=emailBody)
class KuwoScrapyMailWriter(): spider = None num_new = 0 new_list = [] item_list = [] addr_list = [] mail_sender = None mail_content = '' spider_inware = None def __init__(self,spider , spider_inware, addr_list): self.mail_sender = MailSender(smtphost = MAIL_HOST, mailfrom = MAIL_FROM, smtpuser = MAIL_USER, smtppass = MAIL_PASS) self.spider = spider self.addr_list = addr_list self.spider_inware = spider_inware def push(self, item, crawled): if crawled: self.num_new += 1 self.new_list.append(crawled) self.item_list.append(item) def write_mail(self): lines = [] head = "<h1>%s</h1><hr/><ol>" % self.spider_inware['basic_desc'] tail = "</ol><hr/>%s" % self.spider_inware['m_info'] for item in self.item_list: mark = '' info = json.loads(item['basic_source_info']) info_str = '<span>' + json.dumps(info, indent = 2) + '</span>' if self.new_list[len(lines)]: mark = '########' line = '<li>' + mark if item['basic_source_artist']: line += '%(basic_source_artist)s - %(basic_source_name)s ' % item else: line += '%(basic_source_name)s ' % item if info: line += info_str line += '</li>' line = line.decode(KUWO_SCRAPY_MYSQL_CS) lines.append(line) html_head = '<div>' html_tail = '</div>' if self.spider.start_urls: index = 1 for url in self.spider.start_urls: if url: html_tail += u'<a href="%s" target="_blank">连接%d</a> ' % (url, index) index += 1 self.mail_content = head + '\n'.join(lines) + tail log.msg("Write mail : \n %s" % self.mail_content, log.DEBUG) self.mail_content = html_head + self.mail_content + html_tail def send_mails(self): log.msg("Sending mail :: \n num_new : %d \n spider_inware : %s \n addr_list : %s" % (self.num_new, json.dumps(self.spider_inware, indent = 2), json.dumps(self.addr_list, indent = 2)), log.DEBUG ) if (self.num_new < MIN_ALARM_LIMIT) or (not self.spider_inware) or (len(self.addr_list) < 1): return self.write_mail() try: addr = "<%s>" % ('>,<'.join(self.addr_list)) log.msg("Sending mail to %s" % (addr), log.INFO) self.mail_sender.send(to = self.addr_list, subject = self.spider_inware['basic_desc'], body = self.mail_content.encode('utf-8','ignore'), mimetype = 'text/HTML;charset="utf-8"') except Exception,e: log.msg("Sendding mail error : %s" %(e), log.ERROR)
class MemoryUsage(BaseMiddleware): memusage_enable = BooleanField(default=False) memusage_notify_mail = ListField(default=[]) memusage_limit_mb = IntegerField(default=0) memusage_warning_mb = IntegerField(default=0) memusage_report = BooleanField(default=False) def __init__(self, crawler): super(MemoryUsage, self).__init__(crawler.metas) if not self.memusage_enable.to_value(): raise NotConfigured if not procfs_supported(): raise NotConfigured self.crawler = crawler self.warned = False self.notify_mails = self.memusage_notify_mail.to_value() self.limit = self.memusage_limit_mb.to_value() * 1024 * 1024 self.warning = self.memusage_warning_mb.to_value() * 1024 * 1024 self.report = self.memusage_report.to_value() self.mail = MailSender(self.metas) dispatcher.connect(self.engine_started, signal=signals.engine_started) dispatcher.connect(self.engine_stopped, signal=signals.engine_stopped) @classmethod def from_crawler(cls, crawler): return cls(crawler) def get_virtual_size(self): return get_vmvalue_from_procfs('VmSize') def engine_started(self): stats.set_value('memusage/startup', self.get_virtual_size()) self.tasks = [] tsk = task.LoopingCall(self.update) self.tasks.append(tsk) tsk.start(60.0, now=True) if self.limit: tsk = task.LoopingCall(self._check_limit) self.tasks.append(tsk) tsk.start(60.0, now=True) if self.warning: tsk = task.LoopingCall(self._check_warning) self.tasks.append(tsk) tsk.start(60.0, now=True) def engine_stopped(self): for tsk in self.tasks: if tsk.running: tsk.stop() def update(self): stats.max_value('memusage/max', self.get_virtual_size()) def _check_limit(self): if self.get_virtual_size() > self.limit: stats.set_value('memusage/limit_reached', 1) mem = self.limit / 1024 / 1024 log.msg("Memory usage exceeded %dM. Shutting down Scrapy..." % mem, level=log.ERROR) if self.notify_mails: subj = "%s terminated: memory usage exceeded %dM at %s" % \ (self.crawler.settings['BOT_NAME'], mem, socket.gethostname()) self._send_report(self.notify_mails, subj) stats.set_value('memusage/limit_notified', 1) self.crawler.stop() def _check_warning(self): if self.warned: # warn only once return if self.get_virtual_size() > self.warning: stats.set_value('memusage/warning_reached', 1) mem = self.warning / 1024 / 1024 log.msg("Memory usage reached %dM" % mem, level=log.WARNING) if self.notify_mails: subj = "%s warning: memory usage reached %dM at %s" % \ (self.crawler.settings['BOT_NAME'], mem, socket.gethostname()) self._send_report(self.notify_mails, subj) stats.set_value('memusage/warning_notified', 1) self.warned = True def _send_report(self, rcpts, subject): """send notification mail with some additional useful info""" s = "Memory usage at engine startup : %dM\r\n" % (stats.get_value('memusage/startup') / 1024 / 1024) s += "Maximum memory usage : %dM\r\n" % (stats.get_value('memusage/max') / 1024 / 1024) s += "Current memory usage : %dM\r\n" % (self.get_virtual_size() / 1024 / 1024) s += "ENGINE STATUS ------------------------------------------------------- \r\n" s += "\r\n" s += pformat(get_engine_status()) s += "\r\n" self.mail.send(rcpts, subject, s)
def test_send_single_values_to_and_cc(self): mailsender = MailSender(debug=True) mailsender.send(to='*****@*****.**', subject='subject', body='body', cc='*****@*****.**', _callback=self._catch_mail_sent)
class MemoryUsage(object): def __init__(self, crawler): if not crawler.settings.getbool('MEMUSAGE_ENABLED'): raise NotConfigured if not procfs_supported(): raise NotConfigured self.crawler = crawler self.warned = False self.notify_mails = crawler.settings.getlist('MEMUSAGE_NOTIFY_MAIL') self.limit = crawler.settings.getint('MEMUSAGE_LIMIT_MB')*1024*1024 self.warning = crawler.settings.getint('MEMUSAGE_WARNING_MB')*1024*1024 self.report = crawler.settings.getbool('MEMUSAGE_REPORT') self.mail = MailSender() crawler.signals.connect(self.engine_started, signal=signals.engine_started) crawler.signals.connect(self.engine_stopped, signal=signals.engine_stopped) @classmethod def from_crawler(cls, crawler): return cls(crawler) def get_virtual_size(self): return get_vmvalue_from_procfs('VmRSS') def engine_started(self): self.crawler.stats.set_value('memusage/startup', self.get_virtual_size()) self.tasks = [] tsk = task.LoopingCall(self.update) self.tasks.append(tsk) tsk.start(60.0, now=True) if self.limit: tsk = task.LoopingCall(self._check_limit) self.tasks.append(tsk) tsk.start(60.0, now=True) if self.warning: tsk = task.LoopingCall(self._check_warning) self.tasks.append(tsk) tsk.start(60.0, now=True) def engine_stopped(self): for tsk in self.tasks: if tsk.running: tsk.stop() def update(self): self.crawler.stats.max_value('memusage/max', self.get_virtual_size()) def _check_limit(self): if self.get_virtual_size() > self.limit: self.crawler.stats.set_value('memusage/limit_reached', 1) mem = self.limit/1024/1024 log.msg("Memory usage exceeded %dM. Shutting down Scrapy..." % mem, level=log.ERROR) if self.notify_mails: subj = "%s terminated: memory usage exceeded %dM at %s" % \ (self.crawler.settings['BOT_NAME'], mem, socket.gethostname()) self._send_report(self.notify_mails, subj) self.crawler.stats.set_value('memusage/limit_notified', 1) open_spiders = self.crawler.engine.open_spiders if open_spiders: for spider in open_spiders: self.crawler.engine.close_spider(spider, 'memusage_exceeded') else: self.crawler.stop() def _check_warning(self): if self.warned: # warn only once return if self.get_virtual_size() > self.warning: self.crawler.stats.set_value('memusage/warning_reached', 1) mem = self.warning/1024/1024 log.msg("Memory usage reached %dM" % mem, level=log.WARNING) if self.notify_mails: subj = "%s warning: memory usage reached %dM at %s" % \ (self.crawler.settings['BOT_NAME'], mem, socket.gethostname()) self._send_report(self.notify_mails, subj) self.crawler.stats.set_value('memusage/warning_notified', 1) self.warned = True def _send_report(self, rcpts, subject): """send notification mail with some additional useful info""" stats = self.crawler.stats s = "Memory usage at engine startup : %dM\r\n" % (stats.get_value('memusage/startup')/1024/1024) s += "Maximum memory usage : %dM\r\n" % (stats.get_value('memusage/max')/1024/1024) s += "Current memory usage : %dM\r\n" % (self.get_virtual_size()/1024/1024) s += "ENGINE STATUS ------------------------------------------------------- \r\n" s += "\r\n" s += pformat(get_engine_status(self.crawler.engine)) s += "\r\n" self.mail.send(rcpts, subject, s)
from scrapy.mail import MailSender mailer = MailSender() mailer.send(to=["*****@*****.**"], subject="Some subject", body="Some body", cc=['*****@*****.**'])
class MemoryUsage(object): def __init__(self): if not settings.getbool("MEMUSAGE_ENABLED"): raise NotConfigured if not procfs_supported(): raise NotConfigured self.warned = False self.notify_mails = settings.getlist("MEMUSAGE_NOTIFY") self.limit = settings.getint("MEMUSAGE_LIMIT_MB") * 1024 * 1024 self.warning = settings.getint("MEMUSAGE_WARNING_MB") * 1024 * 1024 self.report = settings.getbool("MEMUSAGE_REPORT") self.mail = MailSender() dispatcher.connect(self.engine_started, signal=signals.engine_started) dispatcher.connect(self.engine_stopped, signal=signals.engine_stopped) def get_virtual_size(self): return get_vmvalue_from_procfs("VmSize") def engine_started(self): stats.set_value("memusage/startup", self.get_virtual_size()) self.tasks = [] tsk = task.LoopingCall(self.update) self.tasks.append(tsk) tsk.start(60.0, now=True) if self.limit: tsk = task.LoopingCall(self._check_limit) self.tasks.append(tsk) tsk.start(60.0, now=True) if self.warning: tsk = task.LoopingCall(self._check_warning) self.tasks.append(tsk) tsk.start(60.0, now=True) def engine_stopped(self): for tsk in self.tasks: if tsk.running: tsk.stop() def update(self): stats.max_value("memusage/max", self.get_virtual_size()) def _check_limit(self): if self.get_virtual_size() > self.limit: stats.set_value("memusage/limit_reached", 1) mem = self.limit / 1024 / 1024 log.msg("Memory usage exceeded %dM. Shutting down Scrapy..." % mem, level=log.ERROR) if self.notify_mails: subj = "%s terminated: memory usage exceeded %dM at %s" % ( settings["BOT_NAME"], mem, socket.gethostname(), ) self._send_report(self.notify_mails, subj) stats.set_value("memusage/limit_notified", 1) crawler.stop() def _check_warning(self): if self.warned: # warn only once return if self.get_virtual_size() > self.warning: stats.set_value("memusage/warning_reached", 1) mem = self.warning / 1024 / 1024 log.msg("Memory usage reached %dM" % mem, level=log.WARNING) if self.notify_mails: subj = "%s warning: memory usage reached %dM at %s" % (settings["BOT_NAME"], mem, socket.gethostname()) self._send_report(self.notify_mails, subj) stats.set_value("memusage/warning_notified", 1) self.warned = True def _send_report(self, rcpts, subject): """send notification mail with some additional useful info""" s = "Memory usage at engine startup : %dM\r\n" % (stats.get_value("memusage/startup") / 1024 / 1024) s += "Maximum memory usage : %dM\r\n" % (stats.get_value("memusage/max") / 1024 / 1024) s += "Current memory usage : %dM\r\n" % (self.get_virtual_size() / 1024 / 1024) s += "ENGINE STATUS ------------------------------------------------------- \r\n" s += "\r\n" s += pformat(get_engine_status()) s += "\r\n" self.mail.send(rcpts, subject, s)
def close_spider(self, spider): mailer = MailSender() mailer.send(to=["*****@*****.**"], subject="PowderValley New In-Stock", body=pprint.pformat(self.items))