def getSubTopics(topic_id): offset = 0 while 1: form_data = {'method': 'next', 'params': '{"topic_id": %s, "offset": %s, "hash_id": ""}' % (topic_id, offset)} try: response = requests.post(url=subTopic_url, data=form_data, headers=requestHeader, proxies=rand_proxy()) datas = response.content.decode('utf-8') jr = json.loads(datas) # convert string array to string body = ''.join(jr['msg']) items = subTopic_p.findall(body) if len(items) == 0: break for item in items: #logger.info(item[0], item[1]) yield(item) offset += 20 except Exception as e: # A 400 means that the request was malformed. # In other words, the data stream sent by the client to the server didn't follow the rules logger.error(e) logger.info('args -> topic_id: {0}, offset: {1}'.format(topic_id, offset))
def get_coin_price(self, symbol): self.ws_connect() self.socketData = None threading.Thread(target=self.socket_recv, args=(self,)).start() i = 0 while not self.socketData: time.sleep(0.1) i += 1 if i == 150: self.ping = True try: self.ws.send(b"ping") logger.info("ping.........") except Exception as e: logger.info("ping exception,{}".format(e)) time.sleep(1) break if self.ping and self.socketData != 'pong': logger.warning("ping failed,reconnect!") self.ping = False self.ws.close() self.get_coin_price(symbol) return res = None try: res = json.loads(self.socketData) except Exception as e: logger.error("{} : {}".format(self.socketData, e)) if res and res.get("data") is not None: data = res.get("data")[0] price_info = self.priceInfo[symbol] price_info["asks"] = list(map(lambda x: list(map(lambda d: float(d), x)), data["asks"])) price_info["bids"] = list(map(lambda x: list(map(lambda d: float(d), x)), data["bids"]))
def write_log(text=""): s = open('log.txt').read() mm = str(from_time_stamp())[0:7] if s.find(mm) != -1: f = open(r'log.txt', 'w') f.write(text + "\n" + s) f.close() else: f = open(r'log.txt', 'a') f.writelines("\n") f.close() # write old logs old_f = open( str(from_time_stamp(int(time.time()) - 86400 * 10))[0:7] + '.txt', 'w') old_f.writelines(open('log.txt').readlines()[::-1]) # write count config.read("config.ini") symbols = json.loads(config.get("trade", "symbol")) for symbol in symbols: cfg_field = symbol + "-stat" sum_count = 0 try: sum_count = sum(json.loads(config.get(cfg_field, "count"))) except Exception as err: logger.error("Error: write_log,{}".format(err)) old_f.writelines(symbol + " [" + str(sum_count) + "]") old_f.close() f = open(r'log.txt', 'w') f.write(text) f.close()
def questions_per_topic(topic_id, header, rQ): for page in range(1, 51): topic_url = 'https://www.zhihu.com/topic/%s/top-answers?page=%d' % ( topic_id, page) proxy = rand_proxy() user_agent = random.choice(agents) header.update({'User-Agent': user_agent}) try: html = requests.get(topic_url, headers=header, proxies=proxy).content.decode('utf-8') except Exception as e: logger.error('exception url: %s' % topic_url) logger.error(e) continue #questions_per_topic(topic_id, header, rQ) # 查找本页第一个问题的点赞数量,如果小于1000,忽略本页内容 first_vote = max_vote_p.search(html) if first_vote: max_vote = first_vote.group(1) if int(max_vote) < 1000: break tree = etree.HTML(html) questions = tree.xpath( '//div[@class="feed-main"]//a[@class="question_link"]') #logger.info('topic: %s, page: %s, find %s questions' % (topic_id, page, len(questions))) for q in questions: rQ.enqueue(per_question, q.attrib['href'])
def common_login(self): try: response = self.session.get(signURL).content.decode('utf-8') # 获取 _xsrf xsrf_p = '<input type="hidden" name="_xsrf" value="([0-9a-z]*)"/>' result = re.search(xsrf_p, response) if result: _xsrf = result.group(1) else: logger.info('xsrf not found') return False captcha = self.session.get(captchaURL % (time.time() * 1000)).content with open(self.captchaFile, 'wb') as output: output.write(captcha) #subprocess.call(self.captchaFile, shell=True) captcha = input('input captcha:') # login form_data = { '_xsrf': _xsrf, 'email': self.email, 'password': self.password, 'remember_me': True, 'captcha': captcha } self.requestHeader.update({ 'X-Requested-With': 'XMLHttpRequest', 'X-Xsrftoken': _xsrf }) self.session.headers = self.requestHeader response = self.session.post(url=loginURL, data=form_data) if response.status_code == 200: logger.info(response.text) # 检查是否已经登陆成功 response = self.session.get(authTestURL) if response.status_code == 200: # 保存登陆认证cookie self.cookie = self.session.cookies.get_dict() logger.info('知乎账户登陆成功') os.remove(self.captchaFile) with open(self.cookieFile, 'w') as output: cookies = self.session.cookies.get_dict() json.dump(cookies, output) logger.info("已在同目录下生成cookie文件") except Exception as e: logger.info('知乎账户登陆失败') logger.error(e) finally: self.session.close() logger.info('session closed')
def rand_proxy(): rdb = redis.Redis() proxy_size = rdb.llen('valid_proxy') if proxy_size == 0: logger.error('no available proxies') raise RuntimeError('no available proxies') randint = random.randint(0, proxy_size-1) ip_port = rdb.lrange('valid_proxy', randint, randint)[0].decode('utf-8') proxy = {'http': 'http://{0}'.format(ip_port)} # {'http': 'http://127.0.0.1:8088'} return proxy
def ws_connect(self): if self.ws is None or not self.ws.connected: try: self.ws = create_connection("wss://real.okex.com:10442/ws/v3", timeout=5) logger.info('websocket connected!') pair = self.SYMBOL_T.upper().replace("_", "-") sub_param = {"op": "subscribe", "args": ["spot/depth5:{}".format(pair)]} sub_str = json.dumps(sub_param) self.ws.send(sub_str) result = self.inflate(self.ws.recv()) logger.info("{} subscribe:{}".format(pair, result)) except Exception as e: logger.error('\nconnect ws error[{}],retry...'.format(e)) time.sleep(2) self.ws_connect()
def get_klines(cls, symbol, period, size): result = {} granularity = granularityDict[period] end_s = int("%0.0f" % datetime.datetime.utcnow().timestamp()) start_s = end_s - granularity * size start = datetime.datetime.fromtimestamp(start_s).strftime("%Y-%m-%dT%H:%M:%S.000Z") end = datetime.datetime.fromtimestamp(end_s).strftime("%Y-%m-%dT%H:%M:%S.000Z") try: result = spotAPI.get_kline(symbol, start, end, granularity) except Exception as e: logger.error("***klines:%s" % e) if isinstance(result, list): return list(map(cls.get_line_data, result)) else: return cls.get_klines(symbol, period, size)
def get_account_info(self): logger.info('-----------------------------------spot account info--------------------------------------------') try: accounts = ['USDT', self.BALANCE_T.upper()] for symbol in accounts: t_account = spotAPI.get_coin_account_info(symbol) if t_account.get('currency') == symbol: logger.info("%s:balance %s available %s frozen %s" % (symbol, t_account["available"], t_account["available"], t_account["frozen"])) else: logger.warning("getAccountInfo Fail,Try again!") self.get_account_info() except Exception as err: logger.error(err) self.get_account_info()
def makePool(self,ConnNum): print("====================开始创建数据库连接池...==============================") startTime = time.time() retry = 0 while(1): try: self.pool = PooledDB( pymssql, ConnNum, host=self.host,user=self.user,password=self.password,database=self.db,charset="utf8") break except Exception as e: logger.error("连接数据库失败 ") retry += 1 logger.info("尝试第%s次重新创建数据库连接池..."%retry) print("<<<<< 创建时间:"+str(int(time.time()-startTime))+"s 连接数:"+str(ConnNum)+" >>>>>") print("====================创建数据库连接池完成!==============================")
def crawl_proxies(self): for m in dir(ProxyGetter): crawl_method_r = re.match('freeProxy\d+', m) if not crawl_method_r: continue crawl_method = crawl_method_r.group() logger.info('running %s' % crawl_method) # catch generator exceptions try: for proxy in getattr(ProxyGetter, crawl_method)(): if proxy: # Using LREM and replacing it if it was found. # LREM list 0 "hello", 0 means remove all elements equal to value self.db.lrem(RAW_PROXY_QUEUE, num=0, value=proxy) self.db.lpush(RAW_PROXY_QUEUE, proxy) #logger.info("fetch proxy:{0}, {1}".format(crawl_method, proxy)) except Exception as e: logger.error(e) continue
def questions_per_page(topic_id, page, header): question_url = 'https://www.zhihu.com/topic/{0}/questions?page={1}'.format( topic_id, page) user_agent = random.choice(agents) header.update({'User-Agent': user_agent}) html = requests.get(question_url, headers=header, proxies=rand_proxy()).content.decode('utf-8') questions = re.findall(question_p, html) for q in questions: try: mongo_conn().questions.insert_one({ 'qid': q[1], 'stid': topic_id, 'href': q[0], 'name': q[2] }) except DuplicateKeyError as e: logger.error(e) logger.info("topic_id: {0}, href: {1} exists".format( topic_id, q[0]))
def SELECT(self,query,param=()): """ 主要处理select语句 :param query: str sql请求 :param param: tuple 填入参数,tuple格式 :return: result: tuple 或 None """ conn = self.conn cur = conn.cursor() result = None try: startTime = time.time() cur.execute(query,param) result = cur.fetchall() logger.info(query + "," + str(param) + " Execute time:" + str(time.time()-startTime)) except Exception as e: logger.error("Error: unable to fecth data with sql query: " + query + "," + str(param)) logger.error(traceback.format_exc()) cur.close() return result
def make_order(cls, my_order_info): logger.info('-----------------------------------------spot order----------------------------------------------') result = {} try: result = spotAPI.take_order(my_order_info.orderType, my_order_info.symbol, 2, my_order_info.price, my_order_info.amount) except Exception as e: logger.error("***trade:%s" % e) if result is not None and result.get('result'): logger.info( "Order {} {} {} {} {} {}".format(result['order_id'], my_order_info.symbol, my_order_info.orderType, my_order_info.price, my_order_info.amount, from_time_stamp())) return result['order_id'] else: logger.error( "order failed!{} {} {} {} {}".format(my_order_info.symbol, my_order_info.orderType, my_order_info.price, my_order_info.amount, round(my_order_info.price * my_order_info.amount, 3))) return -1
def send_email(content, _subtype='plain', _subject="bitcoinrobot"): # 第三方 SMTP 服务 mail_host = "smtp.gmail.com" # 设置服务器 mail_user = "******" # 用户名 mail_pass = "******" # 口令 message = MIMEText(content, _subtype, 'utf-8') message['From'] = Header(mail_user) message['To'] = Header(",".join(receivers)) message['Subject'] = Header(_subject) try: server = smtplib.SMTP_SSL(mail_host, 465) server.ehlo() server.login(mail_user, mail_pass) server.sendmail(mail_user, receivers, message.as_string()) server.close() logger.info("邮件发送成功") return True except smtplib.SMTPException as err: logger.error("Error: 邮件发送失败,{}".format(err)) return False
def UPDATE(self,query,param=()): """ 处理update和delete和insert语句 :param query: str sql请求 :param param: tuple 填入参数,tuple格式 :return: bool """ conn = self.conn cur = conn.cursor() result = True try: startTime = time.time() cur.execute(query,param) conn.commit() logger.info(query + "," + str(param) + " Execute time:" + str(time.time() - startTime)) except Exception as e: logger.error("Error: unable to fecth data with sql query: " + query + "," + str(param)) logger.error(traceback.format_exc()) conn.rollback() result = False cur.close() return result
def check_order_status(self, my_order_info, wait_count=0): order_id = my_order_info.orderId order_result = {} try: order_result = spotAPI.get_order_info(my_order_info.orderId, my_order_info.symbol) except Exception as e: logger.error("***orderinfo:%s" % e) if order_result is not None and order_result.get('order_id') == my_order_info.orderId: order = order_result order_id = order["order_id"] status = order["status"] filled_size = float(order["filled_size"]) if filled_size > 0: my_order_info.set_deal_amount(filled_size) my_order_info.set_avg_price(float(order["filled_notional"]) / filled_size) if status == self.CANCELLED_STATUS: logger.info("order {} canceled".format(order_id)) elif status == 'open': if wait_count == self.TRADE_WAIT_COUNT: logger.info("timeout no deal") else: logger.info("no deal") elif status == 'part_filled': if wait_count == self.TRADE_WAIT_COUNT: logger.info("timeout part deal {}".format(my_order_info.dealAmount)) else: logger.info("part deal {}".format(my_order_info.dealAmount)) elif status == self.FILLED_STATUS: logger.info("order {} filled".format(order_id)) elif status == 'canceling': logger.info("order {} canceling".format(order_id)) elif status == 'ordering': logger.info("order {} ordering".format(order_id)) return status else: logger.warning("order {} checkOrderStatus failed,try again.".format(order_id)) return self.check_order_status(my_order_info, wait_count)
def top_answers(topic_id, page, header): question_url = 'https://www.zhihu.com/topic/{0}/top-answers?page={1}'.format( topic_id, page) proxy = rand_proxy() user_agent = random.choice(agents) header.update({'User-Agent': user_agent}) try: html = requests.get(question_url, headers=header, proxies=proxy).content.decode('utf-8') except Exception as e: logger.error('exception url: %s' % question_url) logger.error(e) top_answers(topic_id, page, header) # 查找本页第一个问题的点赞数量,如果小于1000,忽略本页内容 first_vote = max_vote_p.search(html) if first_vote: max_vote = first_vote.group(1) if int(max_vote) < 1000: logger.info('ignore %s, max_vote:%s' % (question_url, max_vote)) return answers = re.findall(top_answer_p, html) if len(answers) == 0: logger.error('{0} answers not found, proxy: {1}'.format( question_url, proxy)) return logger.info('{0} found answer {1}'.format(question_url, len(answers))) for a in answers: qid, aid, href = a[1], a[2], a[0] try: mongo_conn().answers.insert_one({ 'topic': topic_id, 'question': a[1], 'answer': a[2], 'href': a[0] }) except DuplicateKeyError as e: return
def max_page(topic_id, header): question_url = 'https://www.zhihu.com/topic/{0}/top-answers'.format( topic_id) err = 0 while 1: user_agent = random.choice(agents) proxy = rand_proxy() ip = proxy['http'].split(':')[1][2:] header.update({'User-Agent': user_agent}) try: response = requests.get(question_url, headers=header, proxies=proxy) except Exception as e: logger.error(e) logger.error(topic_id) continue logger.info('visit: %s' % question_url) if response.status_code != 200: logger.error('{0} ERROR'.format(question_url)) logger.error(header) return html = response.content.decode('utf-8') html_tree = etree.HTML(html) page_numbers = html_tree.xpath( '//div[@class="zm-invite-pager"]/span/a/text()') try: # span.text: 上一页 1 2 3 ... 13801 下一页 return page_numbers[-2] except Exception as e: if html.find('系统检测到您的帐号或IP存在异常流量') > -1: logger.error( '统检测到您的帐号或IP存在异常流量, proxy: {0}, user-agent: {1}'.format( proxy, user_agent)) if err == 5: break err += 1 continue logger.error(e) logger.error('topic_id: {0}'.format(topic_id)) return 1
def decorate(*args, **kwargs): try: return func(*args, **kwargs) except Exception as e: logger.error(e)
def per_question(q_href): #def per_question(q_href, cookie): time.sleep(random.randint(1, 8)) q_url = 'https://www.zhihu.com%s' % q_href proxy = rand_proxy() user_agent = random.choice(agents) header = requestHeader header.update({'User-Agent': user_agent}) try: #response = requests.get(q_url, headers=header, proxies=proxy, cookies=cookie).content response = requests.get(q_url, headers=header, proxies=proxy).content html = response.decode('utf-8') except Exception as e: logger.error('exception url: %s' % q_url) logger.error(e) #logger.info(response) #sys.exit() per_question(q_href) #if '系统检测到您的帐号或IP存在异常流量' in html: # logger.error('proxy error, {0}'.format(proxy)) # raise Exception tree = etree.HTML(html) tags = tree.xpath('//div[@class="Popover"]/text()') #question_a = tree.xpath('//h1[@class="QuestionHeader-title"]/text()') question_a = tree.xpath('//title[@data-react-helmet="true"]/text()') if question_a: title = question_a[0].replace(' - 知乎', '') if '安全验证' == title: logger.error('proxy error, {0}'.format(proxy)) raise Exception logger.info(title) else: logger.error('%s title not found' % q_url) if '你正在使用的浏览器版本过低' in html: logger.info(user_agent) per_question(q_href) else: raise Exception #detail_a = tree.xpath('//div[@class="QuestionHeader-detail"]/div/div/span/text()') #if detail_a: # content = detail_a[0] #else: # content = None topics = tree.xpath('//a[@class="TopicLink"]') sub_topic = mongo_conn().sub_topic for t in topics: # https://www.zhihu.com/topic/19552832 tid = t.attrib['href'].split('/')[-1] name = t.xpath('.//text()')[0] try: sub_topic.insert_one({'sub_tid': tid, 'sub_name': name}) except DuplicateKeyError as e: continue items = tree.xpath('//div[@class="ContentItem AnswerItem"]') for i in items: # "1792 人赞同了该回答" vote_text = i.xpath('.//span[@class="Voters"]/button/text()') if len(vote_text) == 0: logger.info('%s no votes' % q_url) break vote_num = re.match('\d+', vote_text[0]).group() if int(vote_num) >= 800: href = i.xpath('.//meta[@itemprop="url"]')[1].attrib['content'] answer = i.xpath( './/span[@class="RichText CopyrightRichText-richText"]')[0] s = etree.tostring(answer).decode('utf-8') body = html2text.html2text(s.replace('<br>', '')) try: mongo_conn().top_answers.insert_one({ 'title': title, 'answer': body, 'href': href, 'vote': vote_num }) except DuplicateKeyError as e: continue