def FetchProxies(self): print 'start to fetch html page' user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' headers = { 'User-Agent' : user_agent } req = urllib2.Request('http://www.xicidaili.com/wn/', headers=headers) response = urllib2.urlopen(req) html = response.read() #print html print 'start to analysis html page' soup = BeautifulSoup(html, 'html5lib') tbody = soup.find_all('tbody') proxylistitems = tbody[0].find_all('tr') proxies = [] print 'start to validate proxys, count is',len(proxylistitems) for proxylistitem in proxylistitems: itemtexts = proxylistitem.find_all('td') if len(itemtexts) < 4: continue address = itemtexts[2].string.strip() port = itemtexts[3].string.strip() postdata = '{0}:{1}'.format(address, port) add_task('ValidateProxyTaskQueue', 'http://1.fetchproxy.applinzi.com/task/validateproxy', postdata) proxies += [postdata] return proxies
def broadcast(self, msg, type): print msg, type #listener_manager.broadcast(json.dumps({'msg':msg,'type':type}),self.listeners) from sae.taskqueue import add_task rkey = msg_box.gen_key() msg_box.set_msg(rkey, json.dumps({'type': type, 'msg': msg})) add_task('msg_queue', '/logboard/broadcast/', rkey)
def get(self, request, *args, **kwargs): for subscribe in Subscribe.objects.filter(is_valid=True): _kwargs = {"site": subscribe.site, "id": subscribe.id, "keywords": subscribe.keywords} url = reverse('subscribe_scan') add_task('subscribe_q', url, payload=_kwargs) return self.render_to_response({'code': 0, 'msg': 'ok'})
def broadcast(self, msg, type): print msg, type # listener_manager.broadcast(json.dumps({'msg':msg,'type':type}),self.listeners) from sae.taskqueue import add_task rkey = msg_box.gen_key() msg_box.set_msg(rkey, json.dumps({"type": type, "msg": msg})) add_task("msg_queue", "/logboard/broadcast/", rkey)
def post(self): self.set_header('Content-Type','application/json') rspd = {'status': 201, 'msg':'ok'} try: tf = {'true':1,'false':0} timestamp = int(time()) post_dic = { 'category': self.get_argument("cat"), 'title': self.get_argument("tit"), 'content': self.get_argument("con"), 'tags': self.get_argument("tag",'').replace(u',',','), 'closecomment': self.get_argument("clo",'0'), 'password': self.get_argument("password",''), 'add_time': timestamp, 'edit_time': timestamp, 'archive': genArchive(), } if post_dic['tags']: tagslist = set([x.strip() for x in post_dic['tags'].split(',')]) try: tagslist.remove('') except: pass if tagslist: post_dic['tags'] = ','.join(tagslist) post_dic['closecomment'] = tf[post_dic['closecomment'].lower()] except: rspd['status'] = 500 rspd['msg'] = '错误: 注意必填的三项' self.write(json.dumps(rspd)) return postid = Article.add_new_article(post_dic) if postid: keyname = 'pv_%s' % (str(postid)) set_count(keyname,0,0) Category.add_postid_to_cat(post_dic['category'], str(postid)) Archive.add_postid_to_archive(genArchive(), str(postid)) increment('Totalblog') if post_dic['tags']: Tag.add_postid_to_tags(post_dic['tags'].split(','), str(postid)) rspd['status'] = 200 rspd['msg'] = '完成: 你已经成功添加了一篇文章 <a href="/t/%s" target="_blank">查看</a>' % str(postid) clear_cache_by_pathlist(['/', 'cat:%s' % quoted_string(post_dic['category'])]) if not debug: add_task('default', '/task/pingrpctask') self.write(json.dumps(rspd)) return else: rspd['status'] = 500 rspd['msg'] = '错误: 未知错误,请尝试重新提交' self.write(json.dumps(rspd)) return
def post(self): self.set_header('Content-Type', 'application/json') rspd = {'status': 201, 'msg': 'ok'} try: tf = {'true': 1, 'false': 0} timestamp = int(time()) post_dic = { 'category': self.get_argument("cat"), 'title': self.get_argument("tit"), 'content': self.get_argument("con"), 'tags': self.get_argument("tag", '').replace(u',', ','), 'closecomment': self.get_argument("clo", '0'), 'password': self.get_argument("password", ''), 'add_time': timestamp, 'edit_time': timestamp, } if post_dic['tags']: tagslist = set( [x.strip() for x in post_dic['tags'].split(',')]) try: tagslist.remove('') except: pass if tagslist: post_dic['tags'] = ','.join(tagslist) post_dic['closecomment'] = tf[post_dic['closecomment'].lower()] except: rspd['status'] = 500 rspd['msg'] = '错误: 注意必填的三项' self.write(json.dumps(rspd)) return postid = Article.add_new_article(post_dic) if postid: Category.add_postid_to_cat(post_dic['category'], str(postid)) if post_dic['tags']: Tag.add_postid_to_tags(post_dic['tags'].split(','), str(postid)) rspd['status'] = 200 rspd[ 'msg'] = '完成: 你已经成功添加了一篇文章 <a href="/t/%s" target="_blank">查看</a>' % str( postid) clear_cache_by_pathlist( ['/', 'cat:%s' % quoted_string(post_dic['category'])]) if not debug: add_task('default', '/task/pingrpctask') self.write(json.dumps(rspd)) return else: rspd['status'] = 500 rspd['msg'] = '错误: 未知错误,请尝试重新提交' self.write(json.dumps(rspd)) return
def get(self, request, *args, **kwargs): for user in User.objects.all(): if not has_fish(user): continue _kwargs = {'user': user.username} url = reverse('fish_check4water', kwargs=_kwargs) fish_taskq = getattr(settings, 'FEEDFISH_TASKQUEUE', 'fish') add_task(fish_taskq, url) return self.render_to_response({'code': 0, 'msg': 'ok'})
def get(self, request, *args, **kwargs): for subscribe in Subscribe.objects.filter(is_valid=True): _kwargs = { "site": subscribe.site, "id": subscribe.id, "keywords": subscribe.keywords } url = reverse('subscribe_scan') add_task('subscribe_q', url, payload=_kwargs) return self.render_to_response({'code': 0, 'msg': 'ok'})
def on_exportlog_saved(sender, instance, created, **kwargs): if not created: return if 'SERVER_SOFTWARE' in os.environ: from sae.taskqueue import add_task add_task('export', '/v1/task/export/', payload='eid=%d' % instance.id) return print "local env: do real export task"
def QueryAllProxy(self): conn=MySQLdb.connect(host=sae.const.MYSQL_HOST,user=sae.const.MYSQL_USER,passwd=sae.const.MYSQL_PASS,db=sae.const.MYSQL_DB,port=int(sae.const.MYSQL_PORT),charset="utf8") cursor = conn.cursor() cursor.execute("select * from app_proxys") queryret = cursor.fetchall() for i in range(0,len(queryret),10): b=queryret[i:i+10] c = map(getres, b) add_task('ValidateProxyTaskQueue', 'http://1.fetchproxy.applinzi.com/task/removeproxy', json.dumps(c)) return len(queryret)
def trigger(queue_name, url_name, payload = {}, *args, **kwargs): if not isinstance(payload, (str, unicode)) and isinstance(payload, dict): payload = urlencode(payload) add_task( 'chat'+str(randint(0,9)), reverse(url_name), payload, *args, **kwargs )
def post(self): self.set_header("Content-Type", "application/json") rspd = {"status": 201, "msg": "ok"} try: tf = {"true": 1, "false": 0} timestamp = int(time()) post_dic = { "category": self.get_argument("cat"), "title": self.get_argument("tit"), "content": self.get_argument("con"), "tags": self.get_argument("tag", "").replace(u",", ","), "closecomment": self.get_argument("clo", "0"), "password": self.get_argument("password", ""), "add_time": timestamp, "edit_time": timestamp, } if post_dic["tags"]: tagslist = set([x.strip() for x in post_dic["tags"].split(",")]) try: tagslist.remove("") except: pass if tagslist: post_dic["tags"] = ",".join(tagslist) post_dic["closecomment"] = tf[post_dic["closecomment"].lower()] except: rspd["status"] = 500 rspd["msg"] = "错误: 注意必填的三项" self.write(json.dumps(rspd)) return postid = Article.add_new_article(post_dic) if postid: Category.add_postid_to_cat(post_dic["category"], str(postid)) if post_dic["tags"]: Tag.add_postid_to_tags(post_dic["tags"].split(","), str(postid)) rspd["status"] = 200 rspd["msg"] = '完成: 你已经成功添加了一篇文章 <a href="/t/%s" target="_blank">查看</a>' % str(postid) clear_cache_by_pathlist(["/", "cat:%s" % quoted_string(post_dic["category"])]) if not debug: add_task("default", "/task/pingrpctask") self.write(json.dumps(rspd)) return else: rspd["status"] = 500 rspd["msg"] = "错误: 未知错误,请尝试重新提交" self.write(json.dumps(rspd)) return
def on_exportlog_saved(sender, instance, created, **kwargs): if not created: return if 'SERVER_SOFTWARE' in os.environ: from sae.taskqueue import add_task add_task( 'export', '/v1/task/export/', payload='eid=%d' % instance.id) return print "local env: do real export task"
def on_url(self, msg): text = None msgs = None id, result = dba.msg_text_insert(msg) if id > 0: text = u"谢谢投递!" url = u"{}-{}".format(str(id), msg.content) add_task("FetchJobQueue", "/task/fetch", url) else: text = u"数据库操作不幸失败鸟!" msgs = [{"title": text, "content": ""}] return 0, self.msg_builder.to_text(msgs, msg.from_user, msg.to_user, int(time.time()))
def post(self): rspd = {"status": 200, "msg": "OK"} try: tf = {'true': 1, 'false': 0} act = self.get_argument("act", '').encode('utf-8') post_dic = { 'category_id': self.get_argument("category_id", '-').encode('utf-8'), 'user_id': self.get_secure_cookie("user_id"), 'title': self.get_argument("title").encode('utf-8'), 'digest': '-', 'content': self.get_argument("content").encode('utf-8'), 'image_url': '-', 'tags': ','.join(self.get_arguments("tag")), 'allow_comment': tf[self.get_argument("clo", 'false')], 'top': tf[self.get_argument("top", 'false')], 'password': self.get_argument("password", '').encode('utf-8'), 'salt': '-', } except: rspd['status'] = 500 rspd['msg'] = "用户名、邮箱均为必填项!" self.write(json.dumps(rspd)) return if act == 'add': Posts.create(post_dic) elif act == 'edit': post_dic['post_id'] = int(self.get_argument("post_id", "")) Posts.update(post_dic) clear_cache_by_pathlist(['/']) if not debug: add_task('default', '/task/pingrpctask') self.set_header("Content-Type", "application/json") rspd['msg'] = "成功保存文章!" self.write(json.dumps(rspd))
def get(self, request, *args, **kwargs): """ Called from crontab. """ # time_called : which time in a day this function is called. # Must in [0, 1, 2], i.e, three times a day time_called = kwargs.setdefault('time', 0) for user in User.objects.all(): if not has_fish(user): continue _kwargs = {'time': time_called, 'user': user.username} url = reverse('fish_check4feed', kwargs=_kwargs) fish_taskq = getattr(settings, 'FEEDFISH_TASKQUEUE', 'fish') add_task(fish_taskq, url) return self.render_to_response({'code': 0, 'msg': 'ok'})
def get(self, request, *args, **kwargs): instance = self.get_object() default_bucket = getattr(settings, 'STORAGE_BUCKET_NAME') content = instance.content pattern = r' src="(.*?)" ' image_urls = re.findall(pattern, content) for url in image_urls: path = urlparse(url).path image_file_name = path.split(default_bucket)[-1] task_link = reverse('weixinmp.image_upload', args=(instance.pk, )) payload = {'name': image_file_name, 'raw_url': url} payload = urlencode(payload) delay = random.randrange(298) add_task('weixin', task_link, payload=payload, delay=delay) return self.render_to_response({'msg': image_urls})
def get(self, request, *args, **kwargs): instance = self.get_object() default_bucket = getattr(settings, 'STORAGE_BUCKET_NAME') content = instance.content pattern = r' src="(.*?)" ' image_urls = re.findall(pattern, content) for url in image_urls: path = urlparse(url).path image_file_name = path.split(default_bucket)[-1] task_link = reverse('weixinmp.image_upload', args=(instance.pk,)) payload = {'name': image_file_name, 'raw_url': url} payload = urlencode(payload) delay = random.randrange(298) add_task('weixin', task_link, payload=payload, delay=delay) return self.render_to_response({'msg': image_urls})
def save(self, *args, **kwargs): """ Auto add timestamp when saved.""" created = not self.id if not self.digest: self.digest = self.get_digest() if not self.cover_img: self.cover_img = getattr(settings, 'WEIXIN_DEFAULT_COVER') self.fileid = getattr(settings, 'WEIXIN_DEFAULT_COVER_ID') super(WeixinMp, self).save(*args, **kwargs) if self.sync and created: from sae.taskqueue import add_task task_link = reverse('weixinmp.upload', args=(self.pk,)) add_task('weixin', task_link, delay=300) task_link = reverse('weixinmp.image_collect', args=(self.pk,)) add_task('weixin', task_link, delay=180) if created: event = Event(user=self.user, content_object=self) event.save()
def save(self, *args, **kwargs): """ Auto add timestamp when saved.""" created = not self.id if not self.digest: self.digest = self.get_digest() if not self.cover_img: self.cover_img = getattr(settings, 'WEIXIN_DEFAULT_COVER') self.fileid = getattr(settings, 'WEIXIN_DEFAULT_COVER_ID') super(WeixinMp, self).save(*args, **kwargs) if self.sync and created: from sae.taskqueue import add_task task_link = reverse('weixinmp.upload', args=(self.pk, )) add_task('weixin', task_link, delay=300) task_link = reverse('weixinmp.image_collect', args=(self.pk, )) add_task('weixin', task_link, delay=180) if created: event = Event(user=self.user, content_object=self) event.save()
def save(self, *args, **kwargs): """ Auto add timestamp when saved.""" self.meta_link = re.sub(r'\W', '-', self.meta_link) created = not self.id super(Note, self).save(*args, **kwargs) # Change since 2014/04/02 . Signal was canceled. if not self.is_private and not settings.DEBUG: try: bd_pingback.pingback(self) except Exception: pass # Add timeline update. if created: event = Event(user=self.user, content_object=self) event.save() # Add task to taskqueue for search indexes update. from sae.taskqueue import add_task add_task('task1', '/backends/updateindex/')
def POST(self): news = pickle.loads(web.data()) #TODO: too dangerous here try: if 'content' not in news: page = PageContentParser(news['link']) news['content'] = page.getMainContent() # if hasattr(page, 'getTitle'): if not news.get('title', None): news['title'] = page.getTitle() news['title'] = getattr(page, 'getTitlePrefix', lambda: '')() + news['title'] if 'pubDate' not in news: content = getattr(news['content'], 'get_text', lambda : news['content'])() news['pubDate'] = dateSniffer(content) or datetime.today() # on Jan 29, 2013 to add school-notices in howareyou notice = {'title': news['title'], 'date': news['pubDate'], 'link': news['link'], 'dept': news['tbln']} add_task('web_content_fetcher', '/notice', str(notice)) self.db.insert(news.pop('tbln'), **news) except Exception, e: if const.isLocal: traceback.print_exc() os._exit(1) raise
def add_event_task(event, data): uri = '/task/{event}/'.format(event=event) logging.debug('add %s task %s' % (event, data)) return add_task('queue', uri, json.dumps(data))
def get(self): for n in range(len(XML_RPC_ENDPOINTS)): add_task('default', '%s/task/pingrpc/%d' % (BASE_URL, n)) self.write(str(time()))
def post(self): self.set_header('Content-Type', 'application/json') rspd = {'status': 201, 'msg': 'ok'} try: tf = {'true': 1, 'false': 0} timestamp = int(time()) content = self.get_argument("con") if getAttr('MARKDOWN'): #content = markdown.markdown(parse_text(content)) content = content.encode("utf-8") post_dic = { 'category': self.get_argument("cat"), 'title': self.get_argument("tit"), 'content': content, 'tags': self.get_argument("tag", '').replace(u',', ','), 'closecomment': self.get_argument("clo", '0'), 'password': self.get_argument("password", ''), 'add_time': timestamp, 'edit_time': timestamp, 'archive': genArchive(), } if MYSQL_TO_KVDB_SUPPORT: post_dic['comment_num'] = '0' if post_dic['tags']: tagslist = set( [x.strip() for x in post_dic['tags'].split(',')]) try: tagslist.remove('') except: pass if tagslist: post_dic['tags'] = ','.join(tagslist) post_dic['closecomment'] = tf[post_dic['closecomment'].lower()] except: rspd['status'] = 500 rspd['msg'] = '错误: 注意必填的三项' self.write(json.dumps(rspd)) return postid = Article.add_new_article(post_dic) if postid: keyname = 'pv_%s' % (str(postid)) set_count(keyname, 0, 0) Category.add_postid_to_cat(post_dic['category'], str(postid)) Archive.add_postid_to_archive(genArchive(), str(postid)) increment('Totalblog') if post_dic['tags']: Tag.add_postid_to_tags(post_dic['tags'].split(','), str(postid)) rspd['status'] = 200 rspd[ 'msg'] = '完成: 你已经成功添加了一篇文章 <a href="/t/%s" target="_blank">查看</a>' % str( postid) #clear_cache_by_pathlist(['/', 'cat:%s' % quoted_string(post_dic['category']), 'post_list_index',]) clear_all_cache() #yobin 20160921 if not debug: add_task('default', '/task/pingrpctask') self.write(json.dumps(rspd)) return else: rspd['status'] = 500 rspd['msg'] = '错误: 未知错误,请尝试重新提交' self.write(json.dumps(rspd)) return
def get(self, page=None): if page is not None: url = "http://jandan.net/pic/page-%s" % page else: url = "http://jandan.net/pic" req = urllib2.Request(url) req.add_header( "User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.47 Safari/536.11", ) response = urllib2.urlopen(req) the_page = response.read() soup = BeautifulSoup(the_page) if page is None: current_page = soup.find("span", {"class": "current-comment-page"}) current_page = current_page.text[1:-1] if current_page: current_page = int(current_page) # 如果没有页码则检索所有页面 if not self.mc.get("jandan_current_page"): # for i in range(1, current_page+1): # add_task('fetch', '/fetch_jandan/%s' % i) self.mc.set("jandan_current_page", current_page) return # 如果换页了要检索上一页 elif int(self.mc.get("jandan_current_page")) < current_page: add_task("fetch", "/fetch_jandan/%s" % (current_page - 1)) self.mc.set("jandan_current_page", current_page) else: current_page = page pics = [] for comment in list(soup.findAll("li", attrs={"id": re.compile("^comment")})): images = list(comment.findAll("img")) if len(images) < 2: continue pic = {} unique_id = comment["id"] pic["unique_id"] = hashlib.sha1(unique_id).hexdigest() pic["url"] = images[1]["src"] text = [] for p in list(comment.findAll("p")): for img in list(p.findAll("img")): img.extract() text.append(p.text) pic["desc"] = "".join(text) pic["add_time"] = time.time() pic["source_url"] = "http://jandan.net/pic/page-%s#%s" % (current_page, unique_id) old_pic = self.db.get("SELECT * FROM pics WHERE unique_id='%s'" % pic["unique_id"]) # 已存在跳过 if old_pic: continue pics.append(pic) pics.reverse() for pic in pics: sql = ( """INSERT INTO pics (unique_id, url, width, height, source_url, `from`, `desc`, add_time) VALUES('%(unique_id)s','%(url)s', '0', '0', '%(source_url)s', 'jandan','%(desc)s', '%(add_time)s')""" % pic ) row_id = self.db.execute(sql) if row_id: payload = "id=%(id)s&url=%(url)s" % {"id": row_id, "url": pic["url"]} add_task("download_image", "/download_image", payload) soup = None self.write("fetched %s images" % len(pics))
def post(self): self.set_header("Content-Type", "application/json") rspd = {"status": 201, "msg": "ok"} try: tf = {"true": 1, "false": 0} timestamp = int(time()) content = self.get_argument("con") if getAttr("MARKDOWN"): # content = markdown.markdown(parse_text(content)) content = content.encode("utf-8") post_dic = { "category": self.get_argument("cat"), "title": self.get_argument("tit"), "content": content, "tags": self.get_argument("tag", "").replace(u",", ","), "closecomment": self.get_argument("clo", "0"), "password": self.get_argument("password", ""), "add_time": timestamp, "edit_time": timestamp, "archive": genArchive(), } if MYSQL_TO_KVDB_SUPPORT: post_dic["comment_num"] = "0" if post_dic["tags"]: tagslist = set([x.strip() for x in post_dic["tags"].split(",")]) try: tagslist.remove("") except: pass if tagslist: post_dic["tags"] = ",".join(tagslist) post_dic["closecomment"] = tf[post_dic["closecomment"].lower()] except: rspd["status"] = 500 rspd["msg"] = "错误: 注意必填的三项" self.write(json.dumps(rspd)) return postid = Article.add_new_article(post_dic) if postid: keyname = "pv_%s" % (str(postid)) set_count(keyname, 0, 0) Category.add_postid_to_cat(post_dic["category"], str(postid)) Archive.add_postid_to_archive(genArchive(), str(postid)) increment("Totalblog") if post_dic["tags"]: Tag.add_postid_to_tags(post_dic["tags"].split(","), str(postid)) rspd["status"] = 200 rspd["msg"] = '完成: 你已经成功添加了一篇文章 <a href="/t/%s" target="_blank">查看</a>' % str(postid) # clear_cache_by_pathlist(['/', 'cat:%s' % quoted_string(post_dic['category']), 'post_list_index',]) clear_all_cache() # yobin 20160921 if not debug: add_task("default", "/task/pingrpctask") self.write(json.dumps(rspd)) return else: rspd["status"] = 500 rspd["msg"] = "错误: 未知错误,请尝试重新提交" self.write(json.dumps(rspd)) return