def get_media(self): r = RedisClient() state = r.get(self._id + ":state") if state == "start": self.error_response( "Sorry, what kind of news would you like to hear about?") article = self.extract_article() if article: if article['multimedia']: r.set(self._id + ":selected", article) r.set(self._id + ":state", "selected") have_media = Media(self.name) self.payload = { "read": have_media.get_phrase() # usable url for html } self.pushlater = { "type": "media", "value": article['multimedia'] } self.finish_response() else: self.error_response( "Sorry I don't have images for that article") else: self.error_response( "Sorry I don't know what article you are talking about")
def start(self): r = RedisClient() # Clear the state for this new user r.set(self._id + ":state", None) r.set(self._id + ":articles", None) r.set(self._id + ":selected", None) NYTimes.get_headlines(self.respond_start)
def post(self): payload = {} data = tornado.escape.json_decode(self.request.body) user_id = data["id"] if ("name" not in data): payload = {"status": 500} else: user_name = data["name"] r = RedisClient() r.set(user_id + ":name", user_name) payload = {"id": user_id, "name": user_name, "status": 200} self.write(tornado.escape.json_encode(payload))
class WeiboCookiesGenerator(object): def __init__(self, website='weibo'): """ 初始化一些对象 :param website: 名称 :param browser: 浏览器, 若不使用浏览器则可设置为 None """ self.website = website self.cookies_db = RedisClient('cookies', self.website) self.accounts_db = RedisClient('accounts', self.website) self.uid_db = RedisClient('uid', self.website) def run(self): """ 运行, 得到所有账户, 然后逐个模拟登录获取cookies,以及uid :return: """ accounts_usernames = self.accounts_db.usernames() cookies_usernames = self.cookies_db.usernames() for username in accounts_usernames: if not username in cookies_usernames: password = self.accounts_db.get(username) print('正在生成Cookies', '账号', username, '密码', password) result = self.new_cookies(username, password) # 成功获取 if result.get('status') == 1: cookies = result.get('content') #cookies为dict类型 uid = result.get('uid') print('成功获取到Cookies', cookies, '成功获取到uid', uid) if self.cookies_db.set(username, json.dumps(cookies)): print('成功保存Cookies') if self.uid_db.set(username, uid): print('成功保存uid') # 登录出错 elif result.get('status') == 0: print(result.get('content')) else: print('所有账号都已经成功获取Cookies') def new_cookies(self, username, password): """ 生成Cookies :param username: 用户名 :param password: 密码 :return: 用户名和Cookies """ return WeiboCookies(username, password).main()
def post(self): payload = {} data = tornado.escape.json_decode(self.request.body) user_id = data["id"] if ("name" not in data): payload = {"status": 500} else: user_name = data["name"] r = RedisClient() r.set(user_id + ":name", user_name) payload = { "id": user_id, "name": user_name, "status": 200 } self.write(tornado.escape.json_encode(payload))
def save_headline(self): r = RedisClient() state = r.get(self._id + ":state") if state == "start": self.error_response("Sorry, I didn't get that.") article = self.extract_article() if article: confirmation = SaveConfirmation() self.payload = {"read": confirmation.get_phrase()} # Do some other stuff to actually save the article somewhere r.set(self._id + ":selected", article) r.set(self._id + ":state", "selected") self.finish_response() else: self.error_response( "I'm sorry. I don't know which article you want me to save.")
def save_headline(self): r = RedisClient() state = r.get(self._id + ":state") if state == "start": self.error_response("Sorry, I didn't get that."); article = self.extract_article() if article: confirmation = SaveConfirmation() self.payload = { "read": confirmation.get_phrase() } # Do some other stuff to actually save the article somewhere r.set(self._id + ":selected", article) r.set(self._id + ":state", "selected") self.finish_response() else: self.error_response("I'm sorry. I don't know which article you want me to save.")
def respond_start(self, articles): if not article: self.error_response("API error") r = RedisClient() keywords = r.get(self._id + ":keywords") if keywords: keyword_articles = NYTimes.check_keywords(articles, keywords) hasUpdate = len(keywords) > 0 articles = keyword_articles else: hasUpdate = False start_state = UpdatesOrNoUpdates(hasUpdate, self.name) self.payload = {"read": start_state.get_phrase()} r.set(self._id + ":articles", articles) r.set(self._id + ":state", "start") self.finish_response()
def store_proxies(proxies): conn = RedisClient(name='certificate_proxies') now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) conn_check = connect_check(proxies) if conn_check: proxies = json.dumps(proxies) duplicate_check = conn.exist(proxies) if not duplicate_check: conn.set(proxies, 1) info_logger.info(str(now) + ' New proxies: ' + str(proxies)) print(str(now) + ' New proxies: ' + str(proxies)) else: info_logger.info( str(now) + ' Already exist proxies: ' + str(proxies)) else: err_logger.error( str(now) + ' Can not connect baidu.com -- proxies: ' + str(proxies))
def get_summary(self): r = RedisClient() state = r.get(self._id + ":state") if state == "start": self.error_response("Sorry, what kind of news would you like to hear about?") article = self.extract_article() if article: r.set(self._id + ":selected", article) r.set(self._id + ":state", "selected") print "**************" print type(article) print article print article is None self.payload = { "read": "%s. Would you like me to send the full article to your kindle?" % article["snippet"] } self.finish_response() else: self.error_response("Sorry I don't know what article you are talking about")
def get_uniform_proxy(name, url): proxy_url = 'http://166.188.20.55:3000/api/proxy/web' headers = { 'Content-Type': 'application/json', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/' '537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' } data = {'name': name, 'url': url} while True: resp = requests.post(url=proxy_url, headers=headers, data=json.dumps(data)).json() region = resp['proxy']['region'] if region == 'R1' or region == 'R2': # print(region, 'err') continue else: conn = RedisClient(name='certificate_proxies') ip = resp['proxy']['ip'] port = '30000' proxies = { 'http': 'http://%s:%s' % (ip, port), 'https': 'http://%s:%s' % (ip, port), } ping_url = 'http://www.baidu.com' status_code = requests.get(ping_url, proxies=proxies, timeout=(3.1, 15)).status_code if status_code == 200: p = json.dumps(proxies) check = conn.exist(p) if not check: conn.set(p, 1) # conn.lpush(p) print('New proxies: ', proxies) break else: print('already exist proxies: ', p) else: print('connection error...')
def respond_start(self, articles): if not article: self.error_response("API error") r = RedisClient() keywords = r.get(self._id + ":keywords") if keywords: keyword_articles = NYTimes.check_keywords(articles, keywords) hasUpdate = len(keywords) > 0 articles = keyword_articles else: hasUpdate = False start_state = UpdatesOrNoUpdates(hasUpdate, self.name) self.payload = { "read": start_state.get_phrase() } r.set(self._id + ":articles", articles) r.set(self._id + ":state", "start") self.finish_response()
def get_media(self): r = RedisClient() state = r.get(self._id + ":state") if state == "start": self.error_response("Sorry, what kind of news would you like to hear about?") article = self.extract_article() if article: if article['multimedia']: r.set(self._id + ":selected", article) r.set(self._id + ":state", "selected") have_media = Media(self.name) self.payload = { "read": have_media.get_phrase() # usable url for html } self.pushlater = {"type": "media", "value": article['multimedia']} self.finish_response() else: self.error_response("Sorry I don't have images for that article") else: self.error_response("Sorry I don't know what article you are talking about")
def respond_get_headlines(self, payload): if not payload: self.error_response("API error") sentence_headlines, topic_headlines, article_order = NLPParser.parse_headlines( map(lambda x: x["headline"], payload)) article_values = [] for article in payload: article_values.append({ "headline": article["headline"], "url": article["url"], "snippet": article["snippet"] }) self.pushlater = {"type": "article", "value": article_values} found = FoundHeadlines(sentence_headlines, topic_headlines, self.name) self.payload = {"read": found.get_phrase()} r = RedisClient() ordered_articles = map(lambda x: payload[x], article_order) # State transition r.set(self._id + ":selected", None) r.set(self._id + ":articles", ordered_articles) r.set(self._id + ":state", "headlines") self.finish_response()
def download_proxies(): # url = 'http://www.xdaili.cn/ipagent/privateProxy/applyStaticProxy?' \ # 'count=1&spiderId=fd4708592c97444c9f42060c500649ac&returnType=2' url = 'http://www.xdaili.cn/ipagent/privateProxy/applyStaticProxy' \ '?count=1&spiderId=8407fdf0311d4eb7b30f9e39699795e5&returnType=2' second = datetime.datetime.now().second if second == 0 or second == 24 or second == 48: #print(second) time.sleep(1) conn = RedisClient(name='certificate_proxies') content = requests.get(url, timeout=(3.1, 15)).json() result = content['RESULT'] if result == '提取太频繁,请按规定频率提取!': err_logger.error(result) else: for proxy in content['RESULT']: ip = proxy['ip'] port = proxy['port'] proxies = { 'http': 'http://%s:%s' % (ip, port), 'https': 'http://%s:%s' % (ip, port), } ping_url = 'http://www.baidu.com' status_code = requests.get(ping_url, timeout=(3.1, 15), proxies=proxies).status_code if status_code == 200: p = json.dumps(proxies) now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) check = conn.exist(p) if not check: conn.set(p, 1) conn.lpush(p) now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print(now, ' New proxies: ', p) else: print(now, ' already exist proxies: ', p)
def respond_get_headlines(self, payload): if not payload: self.error_response("API error") sentence_headlines, topic_headlines, article_order = NLPParser.parse_headlines(map(lambda x: x["headline"], payload)) article_values = [] for article in payload: article_values.append({"headline": article["headline"], "url": article["url"], "snippet": article["snippet"]}) self.pushlater = { "type": "article", "value": article_values } found = FoundHeadlines(sentence_headlines, topic_headlines, self.name) self.payload = { "read": found.get_phrase() } r = RedisClient() ordered_articles = map(lambda x: payload[x], article_order) # State transition r.set(self._id + ":selected", None) r.set(self._id + ":articles", ordered_articles) r.set(self._id + ":state", "headlines") self.finish_response()
def get_summary(self): r = RedisClient() state = r.get(self._id + ":state") if state == "start": self.error_response( "Sorry, what kind of news would you like to hear about?") article = self.extract_article() if article: r.set(self._id + ":selected", article) r.set(self._id + ":state", "selected") print "**************" print type(article) print article print article is None self.payload = { "read": "%s. Would you like me to send the full article to your kindle?" % article["snippet"] } self.finish_response() else: self.error_response( "Sorry I don't know what article you are talking about")