Exemple #1
0
def getSubTopics(topic_id):
    offset = 0

    while 1:
        form_data = {'method': 'next', 'params': '{"topic_id": %s, "offset": %s, "hash_id": ""}' % (topic_id, offset)}
        try:
            response = requests.post(url=subTopic_url, data=form_data, headers=requestHeader, proxies=rand_proxy())
            datas = response.content.decode('utf-8')
            jr = json.loads(datas)
            # convert string array to string
            body = ''.join(jr['msg'])
            items = subTopic_p.findall(body)
            if len(items) == 0:
                break

            for item in items:
                #logger.info(item[0], item[1])
                yield(item)

            offset += 20
        except Exception as e:
            # A 400 means that the request was malformed. 
            # In other words, the data stream sent by the client to the server didn't follow the rules
            logger.error(e)
            logger.info('args -> topic_id: {0}, offset: {1}'.format(topic_id, offset))
Exemple #2
0
 def get_coin_price(self, symbol):
     self.ws_connect()
     self.socketData = None
     threading.Thread(target=self.socket_recv, args=(self,)).start()
     i = 0
     while not self.socketData:
         time.sleep(0.1)
         i += 1
         if i == 150:
             self.ping = True
             try:
                 self.ws.send(b"ping")
                 logger.info("ping.........")
             except Exception as e:
                 logger.info("ping exception,{}".format(e))
             time.sleep(1)
             break
     if self.ping and self.socketData != 'pong':
         logger.warning("ping failed,reconnect!")
         self.ping = False
         self.ws.close()
         self.get_coin_price(symbol)
         return
     res = None
     try:
         res = json.loads(self.socketData)
     except Exception as e:
         logger.error("{} : {}".format(self.socketData, e))
     if res and res.get("data") is not None:
         data = res.get("data")[0]
         price_info = self.priceInfo[symbol]
         price_info["asks"] = list(map(lambda x: list(map(lambda d: float(d), x)), data["asks"]))
         price_info["bids"] = list(map(lambda x: list(map(lambda d: float(d), x)), data["bids"]))
Exemple #3
0
def write_log(text=""):
    s = open('log.txt').read()
    mm = str(from_time_stamp())[0:7]
    if s.find(mm) != -1:
        f = open(r'log.txt', 'w')
        f.write(text + "\n" + s)
        f.close()
    else:
        f = open(r'log.txt', 'a')
        f.writelines("\n")
        f.close()
        # write old logs
        old_f = open(
            str(from_time_stamp(int(time.time()) - 86400 * 10))[0:7] + '.txt',
            'w')
        old_f.writelines(open('log.txt').readlines()[::-1])
        # write count
        config.read("config.ini")
        symbols = json.loads(config.get("trade", "symbol"))
        for symbol in symbols:
            cfg_field = symbol + "-stat"
            sum_count = 0
            try:
                sum_count = sum(json.loads(config.get(cfg_field, "count")))
            except Exception as err:
                logger.error("Error: write_log,{}".format(err))
            old_f.writelines(symbol + " [" + str(sum_count) + "]")
        old_f.close()
        f = open(r'log.txt', 'w')
        f.write(text)
        f.close()
Exemple #4
0
def questions_per_topic(topic_id, header, rQ):
    for page in range(1, 51):
        topic_url = 'https://www.zhihu.com/topic/%s/top-answers?page=%d' % (
            topic_id, page)
        proxy = rand_proxy()
        user_agent = random.choice(agents)
        header.update({'User-Agent': user_agent})
        try:
            html = requests.get(topic_url, headers=header,
                                proxies=proxy).content.decode('utf-8')
        except Exception as e:
            logger.error('exception url: %s' % topic_url)
            logger.error(e)
            continue
            #questions_per_topic(topic_id, header, rQ)

        # 查找本页第一个问题的点赞数量,如果小于1000,忽略本页内容
        first_vote = max_vote_p.search(html)
        if first_vote:
            max_vote = first_vote.group(1)
            if int(max_vote) < 1000:
                break

        tree = etree.HTML(html)
        questions = tree.xpath(
            '//div[@class="feed-main"]//a[@class="question_link"]')
        #logger.info('topic: %s, page: %s, find %s questions' % (topic_id, page, len(questions)))
        for q in questions:
            rQ.enqueue(per_question, q.attrib['href'])
Exemple #5
0
    def common_login(self):
        try:
            response = self.session.get(signURL).content.decode('utf-8')
            # 获取 _xsrf
            xsrf_p = '<input type="hidden" name="_xsrf" value="([0-9a-z]*)"/>'
            result = re.search(xsrf_p, response)
            if result:
                _xsrf = result.group(1)
            else:
                logger.info('xsrf not found')
                return False

            captcha = self.session.get(captchaURL %
                                       (time.time() * 1000)).content
            with open(self.captchaFile, 'wb') as output:
                output.write(captcha)

            #subprocess.call(self.captchaFile, shell=True)
            captcha = input('input captcha:')

            # login
            form_data = {
                '_xsrf': _xsrf,
                'email': self.email,
                'password': self.password,
                'remember_me': True,
                'captcha': captcha
            }
            self.requestHeader.update({
                'X-Requested-With': 'XMLHttpRequest',
                'X-Xsrftoken': _xsrf
            })
            self.session.headers = self.requestHeader
            response = self.session.post(url=loginURL, data=form_data)
            if response.status_code == 200:
                logger.info(response.text)
                # 检查是否已经登陆成功
                response = self.session.get(authTestURL)
                if response.status_code == 200:
                    # 保存登陆认证cookie
                    self.cookie = self.session.cookies.get_dict()
                    logger.info('知乎账户登陆成功')
                    os.remove(self.captchaFile)

                    with open(self.cookieFile, 'w') as output:
                        cookies = self.session.cookies.get_dict()
                        json.dump(cookies, output)
                        logger.info("已在同目录下生成cookie文件")

        except Exception as e:
            logger.info('知乎账户登陆失败')
            logger.error(e)
        finally:
            self.session.close()
            logger.info('session closed')
Exemple #6
0
def rand_proxy():
    rdb = redis.Redis()
    proxy_size = rdb.llen('valid_proxy')
    if proxy_size == 0:
        logger.error('no available proxies')
        raise RuntimeError('no available proxies')

    randint = random.randint(0, proxy_size-1)
    ip_port = rdb.lrange('valid_proxy', randint, randint)[0].decode('utf-8')
    proxy = {'http': 'http://{0}'.format(ip_port)}
    # {'http': 'http://127.0.0.1:8088'}
    return proxy
Exemple #7
0
 def ws_connect(self):
     if self.ws is None or not self.ws.connected:
         try:
             self.ws = create_connection("wss://real.okex.com:10442/ws/v3", timeout=5)
             logger.info('websocket connected!')
             pair = self.SYMBOL_T.upper().replace("_", "-")
             sub_param = {"op": "subscribe", "args": ["spot/depth5:{}".format(pair)]}
             sub_str = json.dumps(sub_param)
             self.ws.send(sub_str)
             result = self.inflate(self.ws.recv())
             logger.info("{} subscribe:{}".format(pair, result))
         except Exception as e:
             logger.error('\nconnect ws error[{}],retry...'.format(e))
             time.sleep(2)
             self.ws_connect()
Exemple #8
0
 def get_klines(cls, symbol, period, size):
     result = {}
     granularity = granularityDict[period]
     end_s = int("%0.0f" % datetime.datetime.utcnow().timestamp())
     start_s = end_s - granularity * size
     start = datetime.datetime.fromtimestamp(start_s).strftime("%Y-%m-%dT%H:%M:%S.000Z")
     end = datetime.datetime.fromtimestamp(end_s).strftime("%Y-%m-%dT%H:%M:%S.000Z")
     try:
         result = spotAPI.get_kline(symbol, start, end, granularity)
     except Exception as e:
         logger.error("***klines:%s" % e)
     if isinstance(result, list):
         return list(map(cls.get_line_data, result))
     else:
         return cls.get_klines(symbol, period, size)
Exemple #9
0
 def get_account_info(self):
     logger.info('-----------------------------------spot account info--------------------------------------------')
     try:
         accounts = ['USDT', self.BALANCE_T.upper()]
         for symbol in accounts:
             t_account = spotAPI.get_coin_account_info(symbol)
             if t_account.get('currency') == symbol:
                 logger.info("%s:balance %s available %s frozen %s" % (symbol, t_account["available"],
                                                                       t_account["available"],
                                                                       t_account["frozen"]))
             else:
                 logger.warning("getAccountInfo Fail,Try again!")
                 self.get_account_info()
     except Exception as err:
         logger.error(err)
         self.get_account_info()
Exemple #10
0
 def makePool(self,ConnNum):
     print("====================开始创建数据库连接池...==============================")
     startTime = time.time()
     retry = 0
     while(1):
         try:
             self.pool = PooledDB(
                 pymssql,
                 ConnNum,
                 host=self.host,user=self.user,password=self.password,database=self.db,charset="utf8")
             break
         except Exception as e:
             logger.error("连接数据库失败 ")
             retry += 1
             logger.info("尝试第%s次重新创建数据库连接池..."%retry)
     print("<<<<< 创建时间:"+str(int(time.time()-startTime))+"s 连接数:"+str(ConnNum)+" >>>>>")
     print("====================创建数据库连接池完成!==============================")
Exemple #11
0
 def crawl_proxies(self):
     for m in dir(ProxyGetter):
         crawl_method_r = re.match('freeProxy\d+', m)
         if not crawl_method_r:
             continue
         crawl_method = crawl_method_r.group() 
         logger.info('running %s' % crawl_method)
         # catch generator exceptions 
         try:
             for proxy in getattr(ProxyGetter, crawl_method)():
                 if proxy:
                     # Using LREM and replacing it if it was found.
                     # LREM list 0 "hello", 0 means remove all elements equal to value
                     self.db.lrem(RAW_PROXY_QUEUE, num=0, value=proxy)
                     self.db.lpush(RAW_PROXY_QUEUE, proxy)
                     #logger.info("fetch proxy:{0}, {1}".format(crawl_method, proxy))
         except Exception as e:
             logger.error(e)
             continue
Exemple #12
0
def questions_per_page(topic_id, page, header):
    question_url = 'https://www.zhihu.com/topic/{0}/questions?page={1}'.format(
        topic_id, page)
    user_agent = random.choice(agents)
    header.update({'User-Agent': user_agent})
    html = requests.get(question_url, headers=header,
                        proxies=rand_proxy()).content.decode('utf-8')
    questions = re.findall(question_p, html)
    for q in questions:
        try:
            mongo_conn().questions.insert_one({
                'qid': q[1],
                'stid': topic_id,
                'href': q[0],
                'name': q[2]
            })
        except DuplicateKeyError as e:
            logger.error(e)
            logger.info("topic_id: {0}, href: {1} exists".format(
                topic_id, q[0]))
Exemple #13
0
 def SELECT(self,query,param=()):
     """
     主要处理select语句
     :param query: str      sql请求
     :param param: tuple    填入参数,tuple格式
     :return: result: tuple 或 None
     """
     conn = self.conn
     cur = conn.cursor()
     result = None
     try:
         startTime = time.time()
         cur.execute(query,param)
         result = cur.fetchall()
         logger.info(query + "," + str(param) + " Execute time:" + str(time.time()-startTime))
     except Exception as e:
         logger.error("Error: unable to fecth data with sql query: " + query + "," + str(param))
         logger.error(traceback.format_exc())
     cur.close()
     return result
Exemple #14
0
 def make_order(cls, my_order_info):
     logger.info('-----------------------------------------spot order----------------------------------------------')
     result = {}
     try:
         result = spotAPI.take_order(my_order_info.orderType, my_order_info.symbol, 2, my_order_info.price,
                                     my_order_info.amount)
     except Exception as e:
         logger.error("***trade:%s" % e)
     if result is not None and result.get('result'):
         logger.info(
             "Order {} {} {} {} {} {}".format(result['order_id'], my_order_info.symbol, my_order_info.orderType,
                                              my_order_info.price, my_order_info.amount,
                                              from_time_stamp()))
         return result['order_id']
     else:
         logger.error(
             "order failed!{} {} {} {} {}".format(my_order_info.symbol, my_order_info.orderType, my_order_info.price,
                                                  my_order_info.amount,
                                                  round(my_order_info.price * my_order_info.amount, 3)))
         return -1
Exemple #15
0
def send_email(content, _subtype='plain', _subject="bitcoinrobot"):
    # 第三方 SMTP 服务
    mail_host = "smtp.gmail.com"  # 设置服务器
    mail_user = "******"  # 用户名
    mail_pass = "******"  # 口令

    message = MIMEText(content, _subtype, 'utf-8')
    message['From'] = Header(mail_user)
    message['To'] = Header(",".join(receivers))
    message['Subject'] = Header(_subject)
    try:
        server = smtplib.SMTP_SSL(mail_host, 465)
        server.ehlo()
        server.login(mail_user, mail_pass)
        server.sendmail(mail_user, receivers, message.as_string())
        server.close()
        logger.info("邮件发送成功")
        return True
    except smtplib.SMTPException as err:
        logger.error("Error: 邮件发送失败,{}".format(err))
        return False
Exemple #16
0
 def UPDATE(self,query,param=()):
     """
     处理update和delete和insert语句
     :param query: str      sql请求
     :param param: tuple    填入参数,tuple格式
     :return: bool
     """
     conn = self.conn
     cur = conn.cursor()
     result = True
     try:
         startTime = time.time()
         cur.execute(query,param)
         conn.commit()
         logger.info(query + "," + str(param) + " Execute time:" + str(time.time() - startTime))
     except Exception as e:
         logger.error("Error: unable to fecth data with sql query: " + query + "," + str(param))
         logger.error(traceback.format_exc())
         conn.rollback()
         result = False
     cur.close()
     return result
Exemple #17
0
 def check_order_status(self, my_order_info, wait_count=0):
     order_id = my_order_info.orderId
     order_result = {}
     try:
         order_result = spotAPI.get_order_info(my_order_info.orderId, my_order_info.symbol)
     except Exception as e:
         logger.error("***orderinfo:%s" % e)
     if order_result is not None and order_result.get('order_id') == my_order_info.orderId:
         order = order_result
         order_id = order["order_id"]
         status = order["status"]
         filled_size = float(order["filled_size"])
         if filled_size > 0:
             my_order_info.set_deal_amount(filled_size)
             my_order_info.set_avg_price(float(order["filled_notional"]) / filled_size)
         if status == self.CANCELLED_STATUS:
             logger.info("order {} canceled".format(order_id))
         elif status == 'open':
             if wait_count == self.TRADE_WAIT_COUNT:
                 logger.info("timeout no deal")
             else:
                 logger.info("no deal")
         elif status == 'part_filled':
             if wait_count == self.TRADE_WAIT_COUNT:
                 logger.info("timeout part deal {}".format(my_order_info.dealAmount))
             else:
                 logger.info("part deal {}".format(my_order_info.dealAmount))
         elif status == self.FILLED_STATUS:
             logger.info("order {} filled".format(order_id))
         elif status == 'canceling':
             logger.info("order {} canceling".format(order_id))
         elif status == 'ordering':
             logger.info("order {} ordering".format(order_id))
         return status
     else:
         logger.warning("order {} checkOrderStatus failed,try again.".format(order_id))
         return self.check_order_status(my_order_info, wait_count)
Exemple #18
0
def top_answers(topic_id, page, header):
    question_url = 'https://www.zhihu.com/topic/{0}/top-answers?page={1}'.format(
        topic_id, page)
    proxy = rand_proxy()
    user_agent = random.choice(agents)
    header.update({'User-Agent': user_agent})
    try:
        html = requests.get(question_url, headers=header,
                            proxies=proxy).content.decode('utf-8')
    except Exception as e:
        logger.error('exception url: %s' % question_url)
        logger.error(e)
        top_answers(topic_id, page, header)

    # 查找本页第一个问题的点赞数量,如果小于1000,忽略本页内容
    first_vote = max_vote_p.search(html)
    if first_vote:
        max_vote = first_vote.group(1)
        if int(max_vote) < 1000:
            logger.info('ignore %s, max_vote:%s' % (question_url, max_vote))
            return

    answers = re.findall(top_answer_p, html)
    if len(answers) == 0:
        logger.error('{0} answers not found, proxy: {1}'.format(
            question_url, proxy))

        return
    logger.info('{0} found answer {1}'.format(question_url, len(answers)))
    for a in answers:
        qid, aid, href = a[1], a[2], a[0]
        try:
            mongo_conn().answers.insert_one({
                'topic': topic_id,
                'question': a[1],
                'answer': a[2],
                'href': a[0]
            })
        except DuplicateKeyError as e:
            return
Exemple #19
0
def max_page(topic_id, header):
    question_url = 'https://www.zhihu.com/topic/{0}/top-answers'.format(
        topic_id)
    err = 0
    while 1:
        user_agent = random.choice(agents)
        proxy = rand_proxy()
        ip = proxy['http'].split(':')[1][2:]
        header.update({'User-Agent': user_agent})
        try:
            response = requests.get(question_url,
                                    headers=header,
                                    proxies=proxy)
        except Exception as e:
            logger.error(e)
            logger.error(topic_id)
            continue

        logger.info('visit: %s' % question_url)
        if response.status_code != 200:
            logger.error('{0} ERROR'.format(question_url))
            logger.error(header)
            return
        html = response.content.decode('utf-8')
        html_tree = etree.HTML(html)
        page_numbers = html_tree.xpath(
            '//div[@class="zm-invite-pager"]/span/a/text()')
        try:
            # span.text: 上一页 1 2 3 ... 13801 下一页
            return page_numbers[-2]
        except Exception as e:
            if html.find('系统检测到您的帐号或IP存在异常流量') > -1:
                logger.error(
                    '统检测到您的帐号或IP存在异常流量, proxy: {0}, user-agent: {1}'.format(
                        proxy, user_agent))
                if err == 5:
                    break
                err += 1
                continue

            logger.error(e)
            logger.error('topic_id: {0}'.format(topic_id))
            return 1
Exemple #20
0
 def decorate(*args, **kwargs):
     try:
         return func(*args, **kwargs)
     except Exception as e:
         logger.error(e)
Exemple #21
0
def per_question(q_href):
    #def per_question(q_href, cookie):
    time.sleep(random.randint(1, 8))
    q_url = 'https://www.zhihu.com%s' % q_href
    proxy = rand_proxy()
    user_agent = random.choice(agents)
    header = requestHeader
    header.update({'User-Agent': user_agent})
    try:
        #response = requests.get(q_url, headers=header, proxies=proxy, cookies=cookie).content
        response = requests.get(q_url, headers=header, proxies=proxy).content
        html = response.decode('utf-8')
    except Exception as e:
        logger.error('exception url: %s' % q_url)
        logger.error(e)
        #logger.info(response)
        #sys.exit()
        per_question(q_href)

    #if '系统检测到您的帐号或IP存在异常流量' in html:
    #    logger.error('proxy error, {0}'.format(proxy))
    #    raise Exception

    tree = etree.HTML(html)
    tags = tree.xpath('//div[@class="Popover"]/text()')

    #question_a = tree.xpath('//h1[@class="QuestionHeader-title"]/text()')
    question_a = tree.xpath('//title[@data-react-helmet="true"]/text()')
    if question_a:
        title = question_a[0].replace(' - 知乎', '')
        if '安全验证' == title:
            logger.error('proxy error, {0}'.format(proxy))
            raise Exception

        logger.info(title)
    else:
        logger.error('%s title not found' % q_url)
        if '你正在使用的浏览器版本过低' in html:
            logger.info(user_agent)
            per_question(q_href)
        else:
            raise Exception

    #detail_a = tree.xpath('//div[@class="QuestionHeader-detail"]/div/div/span/text()')
    #if detail_a:
    #    content = detail_a[0]
    #else:
    #    content = None

    topics = tree.xpath('//a[@class="TopicLink"]')
    sub_topic = mongo_conn().sub_topic
    for t in topics:
        # https://www.zhihu.com/topic/19552832
        tid = t.attrib['href'].split('/')[-1]
        name = t.xpath('.//text()')[0]
        try:
            sub_topic.insert_one({'sub_tid': tid, 'sub_name': name})
        except DuplicateKeyError as e:
            continue

    items = tree.xpath('//div[@class="ContentItem AnswerItem"]')
    for i in items:
        # "1792 人赞同了该回答"
        vote_text = i.xpath('.//span[@class="Voters"]/button/text()')
        if len(vote_text) == 0:
            logger.info('%s no votes' % q_url)
            break

        vote_num = re.match('\d+', vote_text[0]).group()
        if int(vote_num) >= 800:
            href = i.xpath('.//meta[@itemprop="url"]')[1].attrib['content']
            answer = i.xpath(
                './/span[@class="RichText CopyrightRichText-richText"]')[0]
            s = etree.tostring(answer).decode('utf-8')
            body = html2text.html2text(s.replace('<br>', ''))

            try:
                mongo_conn().top_answers.insert_one({
                    'title': title,
                    'answer': body,
                    'href': href,
                    'vote': vote_num
                })
            except DuplicateKeyError as e:
                continue