コード例 #1
0
    def get_futunn_live(self):

        lasttime = DateUtil.string_toDatetime(self.mongodbutil_live.getLastLivetime())

        for i in range(0,-1,-1):
            p = int(1000*time.mktime(time.localtime())) + i
            url = 'https://news.futunn.com/main/live-list?page={0}page_size=50&_=1556778263374'.format(i,p)

            logger.info("address current url {0}...".format(url))

            arr = []
            header = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
            try:
                res = requests.get(url, headers=header, timeout=60)
                res.raise_for_status()
                if res.status_code == 200:
                    data = res.text
                    js = json.loads(data)

                    list = js['data']['list']
                    for elem in list:
                        itemTime = DateUtil.string_toDatetime(elem['time'])

                        if itemTime > lasttime:
                            arr.append( elem )
                            logger.info(elem)
                        else:
                            continue

                    if len(arr) > 0 :
                        self.mongodbutil_live.insertItems(arr)
                        logger.info("store items to mongodb ...")
                    else:
                        logger.info("still have no new live message")

            except Exception as err:
                #time.sleep(4 * random.random())
                logger.warning(err)
            except requests.exceptions.ConnectTimeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except requests.exceptions.ReadTimeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except requests.exceptions.Timeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except:
                logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
                time.sleep(random.random())
                ret_code = -1
                ret_data = ''
            finally:
                res.close()

        return 1, 'ok'
コード例 #2
0
    def get_individual_companyinfo(self, market, code):
        ret_code = -1
        ret_data = ''
        self.itemArray = []

        url = "https://finance.futunn.com/api/finance/company-info?code={0}&label={1}".format(code.upper(), market.lower())

        try:
            header = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',
                'Accept': 'application/json,text/javascript,*.*;q=0.01',
                'Origin': 'https://www.futunn.com',
                'Referer': 'https://www.futunn.com/quote/stock-info?m={0}&code={1}&type=finance_analyse'.format(
                    market.lower(), code.upper)
            }
            res = requests.get(url, headers=header)
            if res.encoding == 'ISO-8859-1':
                res.encoding = 'gbk'
            res.raise_for_status()

            if res.status_code == 200:
                data = res.text
                js = json.loads(data)


                obj = js['data']
                if len(obj) != 0:
                    obj['market'] = market
                    obj['code'] = code

                    self.itemArray.append(obj)

                ret_code = 0
                ret_data = ''
        except Exception as err:
            # time.sleep(4 * random.random())
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ConnectTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ReadTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.Timeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except:
            logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
            time.sleep(random.random())
            ret_code = -1
            ret_data = ''
        finally:
            res.close()
        return ret_code, ret_data
コード例 #3
0
def job_info_appender(*_args):
    '''
    :return:
    '''
    global is_closing
    global working
    global timerid

    working = True

    store = _args[0][0]
    arr = _args[0][1]
    futunews = _args[0][2]

    logger.info('start crawl current futu news...')

    while not is_closing:
        if is_closing:
            break

        begin = time.time()

        logger.info('Current Time:{}, info'.format(datetime.datetime.now()))

        try:
            ret_code, ret_data = futunews.get_live_info()
            items = futunews.get_item_array()
            if len(items) > 0:
                futunews.mongodbutil.insertItems(items)
                logger.info("store items to mongodb ...")
            else:
                logger.info("all items exists")
        except Exception as err:
            time.sleep(4 * random.random())
            logger.warning(err)


        try:
            ret_code, ret_data = futunews.get_futunn_live()

        except Exception as err:
            time.sleep(4 * random.random())
            logger.warning(err)



        if is_closing is True:
            break

        working = False
        if not is_closing:
            sched.add_job(scheduled_job2, 'interval', seconds=random.randint(30,50), id=timerid)

        end = time.time()
        logger.info("fetching for one  period , cost time: {}".format((end - begin)))

        break
コード例 #4
0
ファイル: sinanews_service.py プロジェクト: github4n/hsstock
    def get_chn_page(self, market, code, page):
        self.itemArray = []
        url = self.generate_page_url(market, code, page)
        logger.info('fetch url: {}'.format(url))
        try:
            res = requests.get(url, timeout=60, headers={'Content-type':'text/html;charset=gb2312'})
            if res.encoding == 'ISO-8859-1':
                res.encoding = 'gbk'
            html = res.text #.encode(res.encoding)
            res.raise_for_status()
            if res.status_code == 200:
                contentSoup = bs4.BeautifulSoup(html, 'lxml')
                strList = str(contentSoup.select('.datelist > ul'))[10:-12]
                elems = strList.split("<br/>")
                if len(elems) < 2:
                    return -1,''
                for elem in elems:
                    if elem == '':
                        continue
                    json = {}
                    elem = elem.lstrip()
                    parts = elem.split('<a href="')
                    json['code'] = code
                    json['date'] = parts[0].rstrip() + ":00"
                    s = json['date']
                    parts1 = parts[1].split('" target="_blank">')
                    json['href'] = parts1[0]
                    json['year'] = 'real'
                    parts2 = parts1[1].split('</a>')
                    json['title'] = parts2[0]
                    logger.info("date:{},title:{}".format(s, json['title']))
                    ret, content = self.get_content(json['href'], "utf-8")
                    # if ret != -1:
                    #     time.sleep(4 * random.random())

                    if ret == 0:
                        json['content'] = content
                        self.itemArray.append(json)
        except Exception as err:
            #time.sleep(4 * random.random())
            logger.warning(err)
        except:
            logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
            time.sleep(random.random())
            ret_code = -1
            ret_data = ''
        finally:
            res.close()
        return page + 1,''
コード例 #5
0
    def get_content(self, url, enco):
        content = ''
        ret = -1

        urlExist = self.mongodbutil.urlIsExist(url)
        if urlExist:
            logger.info('This url:{} has existed'.format(url))
            return -2, content

        header = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
        try:
            res = requests.get(url, headers=header, timeout=60)
            res.encoding = enco
            res.raise_for_status()
            if res.status_code == 200:
                soup = bs4.BeautifulSoup(res.text, 'lxml')
                elems = soup.select('.inner')
                if len(elems) > 0:
                    content = elems[0].getText()
                    ret = 0

        except Exception as err:
            #time.sleep(4 * random.random())
            logger.warning(err)
        except requests.exceptions.ConnectTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ReadTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.Timeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except:
            logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
            time.sleep(random.random())
            ret_code = -1
            ret_data = ''
        finally:
            res.close()
        return ret, content
コード例 #6
0
def job_calendar_appender(*_args):
    '''
    :return:
    '''
    global is_closing
    global working
    global timerid


    store = _args[0][0]
    arr = _args[0][1]
    futunews = _args[0][2]
    working = True
    logger.info('start crawl current calendar ...')

    while not is_closing:
        if is_closing:
            break

        begin = time.time()

        logger.info('Current Time:{}, info'.format(datetime.datetime.now()))

        try:
            ret_code, ret_data = futunews.get_calendars()

        except Exception as err:
            time.sleep(4 * random.random())
            logger.warning(err)

        if is_closing is True:
            break


        end = time.time()
        logger.info("fetching calendar for one  period , cost time: {}".format((end - begin)))

        break

    working = False
コード例 #7
0
    def get_calendars(self):

        urls = [
            'https://news.futunn.com/new-calendar/events-list?begin_time={0}&end_time=2037-12-31&event_type=%5B%22%E6%B8%AF%E8%82%A1%E6%96%B0%E8%82%A1%22%2C%22%E7%BE%8E%E8%82%A1%E6%96%B0%E8%82%A1%22%2C%22A%E8%82%A1%E6%96%B0%E8%82%A1%22%5D&stock_type=&_={1}',
            'https://news.futunn.com/new-calendar/events-list?begin_time={0}&end_time=2037-12-31&event_type=["港股财报"%2C"美股财报"%2C"A股财报"]&stock_type=&_={1}',
            'https://news.futunn.com/new-calendar/events-list?begin_time={0}&end_time=2037-12-31&event_type=["港股除权除息"%2C"美股除权除息"%2C"A股除权除息"]&stock_type=&_={1}',
            'https://news.futunn.com/new-calendar/events-list?begin_time={0}&end_time=2037-12-31&event_type=["财经事件"]&stock_type=&_={1}',
            'https://news.futunn.com/new-calendar/events-list?begin_time={0}&end_time=2037-12-31&event_type=["经济数据"]&stock_type=&_={1}',
            'https://news.futunn.com/new-calendar/events-list?begin_time={0}&end_time=2037-12-31&event_type=["休市提醒"]&stock_type=&_={1}'
        ]

        for idx in range(0,len(urls),1):

            print(idx)
            url = urls[idx].format(DateUtil.getTodayStr(),int(1000*time.mktime(time.localtime())) + idx)
            print(url)
            logger.info("address current url {0}...".format(url))

            arr = []
            header = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
            try:
                res = requests.get(url, headers=header, timeout=60)
                res.raise_for_status()
                if res.status_code == 200:
                    data = res.text
                    js = json.loads(data)

                    list = js['data']['list']
                    for elem in list:
                        # { unique key, drop duplicate
                        #     "market_type": 1,
                        #     "event_type": 1,
                        #     "event_time": 1
                        # }
                        # elem['event_type']
                        # elem['market_type']
                        # elem['event_time']
                        #itemTime = DateUtil.string_toDatetime(elem['time'])

                        # 'event_type': '港股新股',
                        # 'market_type': 'HK',
                        # 'event_text': '认购中<br/><a href="http://www.futunn.com/quote/stock?m=hk&code=01832" target="_blank" data-market="hk" data-code="01832" class="js-nn-stock">海天地悦旅(01832)</a><br/><a href="http://www.futunn.com/quote/stock?m=hk&code=02230" target="_blank" data-market="hk" data-code="02230" class="js-nn-stock">羚邦集团(02230)</a><br/>',
                        # 'event_time': '2019-05-05 00:00:00',
                        # 'total': 2}

                        # if itemTime > lasttime:
                        #     arr.append( elem )
                        #     logger.info(elem)
                        # else:
                        #     continue

                        arr.append(elem)

                    if len(arr) > 0 :
                        self.mongodbutil_calendar.insertItems(arr)
                        logger.info("store items to mongodb ...")
                    else:
                        logger.info("still have no calendar live message")

            except Exception as err:
                #time.sleep(4 * random.random())
                logger.warning(err)
            except requests.exceptions.ConnectTimeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except requests.exceptions.ReadTimeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except requests.exceptions.Timeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except:
                logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
                time.sleep(random.random())
                ret_code = -1
                ret_data = ''
            finally:
                res.close()

        return 1, 'ok'
コード例 #8
0
    def get_futunn_news(self):

        for i in range(94471,94480,1):
            url = 'https://news.futunn.com/market/{0}?src=3'.format(i)

            urlExist = self.mongodbutil.urlIsExist(url)
            if urlExist:
                logger.info('This url:{} has existed'.format(url))
                continue

            json = {}
            header = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
            try:
                res = requests.get(url, headers=header, timeout=60)
                res.raise_for_status()
                if res.status_code == 200:
                    soup = bs4.BeautifulSoup(res.text, 'lxml')
                    elems = soup.select('.inner')
                    json['content']  = elems[0].getText()
                    elems = soup.select('.news-title > h1')
                    json['title'] = elems[0].getText()
                    elems = soup.select('.news-title > .timeBar')

                    pos = elems[0].getText().strip().find('2')
                    json['date'] = elems[0].getText().strip()[pos:pos+16]
                    json['href'] = url
                    json['code'] = ' '
                    json['year'] = DateUtil.string_toDatetime2(json['date']).year
                    json['sourcefrom'] = 'futunn'
                    self.itemArray.append(json)

                    if len(self.get_item_array()) > 50:
                        self.mongodbutil.insertItems(self.get_item_array())
                        logger.info("store items to mongodb ...")
                        self.clear_item_array()


            except Exception as err:
                #time.sleep(4 * random.random())
                logger.warning(err)
            except requests.exceptions.ConnectTimeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except requests.exceptions.ReadTimeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except requests.exceptions.Timeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except:
                logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
                time.sleep(random.random())
                ret_code = -1
                ret_data = ''
            finally:
                res.close()

        return 1, 'ok'
コード例 #9
0
    def get_live_info(self):

        ret_code = -1
        ret_data = ''
        self.itemArray = []

        lasttime = DateUtil.string_toDatetime2('2019-05-01 09:00')

        try:
            res = requests.get(self.url)
            if res.encoding == 'ISO-8859-1':
                res.encoding = 'gbk'
            html = res.text  # .encode(res.encoding)
            res.raise_for_status()
            if res.status_code == 200 :
                    contentSoup = bs4.BeautifulSoup(html, 'lxml')
                    elems = contentSoup.find_all('a', class_='news-link')

                    for elem in elems:
                        json = {}
                        json['code'] = ' '


                        newstime = elem.select('span')
                        time = newstime[len(newstime) - 1].getText()
                        json['date'] = DateUtil.string_toDatetime2(time)
                        s = json['date']

                        if s < lasttime :
                            continue
                        else:
                            lasttime = s

                        h3 = elem.select('h3')
                        json['title'] = h3[len(h3) - 1].getText()

                        logger.info("date:{},title:{}".format(s, json['title']))
                        json['href'] = elem.attrs['href']
                        json['year'] = json['date'].year
                        json['sourcefrom'] = 'futunn'
                        ret,content = self.get_content(json['href'],'utf-8')
                        # if ret != -1 :
                        #     time.sleep(4 * random.random())

                        if ret == 0 :
                            json['content'] = content
                            self.itemArray.append(json)
                        ret_code = 0
                        ret_data = ''
        except Exception as err:
            #time.sleep(4 * random.random())
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ConnectTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ReadTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.Timeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except:
            logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
            time.sleep(random.random())
            ret_code = -1
            ret_data = ''
        finally:
            res.close()
        return ret_code,ret_data
コード例 #10
0
    def get_individual_news(self,market, code):
        ret_code = -1
        ret_data = ''
        self.itemArray = []

        url = "https://www.futunn.com/quote/stock-news?m={0}&code={1}".format(market.lower(),code.upper())

        try:
            header = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',
                'Accept': 'application/json,text/javascript,*.*;q=0.01',
                'Origin': 'https://www.futunn.com',
                'Referer': 'https://www.futunn.com/quote/stock-info?m={0}&code={1}&type=finance_analyse'.format(market.lower(),code.upper)
            }
            res = requests.get(url, headers=header)
            if res.encoding == 'ISO-8859-1':
                res.encoding = 'gbk'
            html = res.text  # .encode(res.encoding)
            res.raise_for_status()
            if res.status_code == 200:
                contentSoup = bs4.BeautifulSoup(html, 'lxml')

                elems = contentSoup.select('.ulList02 >  ul > li')

                for elem in elems:
                    json = {}
                    json['code'] = code
                    json['market']  = market
                    json['title'] = elem.select('.txt01')[0].getText()
                    json['href'] = elem.select('.txt01 > a')[0]['href']
                    json['date'] = DateUtil.string_toDatetime2(elem.select('.bar01')[0].getText().strip()[3:])
                    json['year'] = json['date'].year
                    json['sourcefrom'] = 'futunn'

                    ret, content = self.get_content(json['href'],'utf-8')

                    # if ret != -1:
                    #     time.sleep(4 * random.random())

                    if ret == 0:
                        json['content'] = content
                        self.itemArray.append(json)


                ret_code = 0
                ret_data = ''
        except Exception as err:
            # time.sleep(4 * random.random())
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ConnectTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ReadTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.Timeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except:
            logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
            time.sleep(random.random())
            ret_code = -1
            ret_data = ''
        finally:
            res.close()
        return ret_code, ret_data
コード例 #11
0
ファイル: sinanews_service.py プロジェクト: github4n/hsstock
    def get_page(self,market, code,url):

        ret_code = -1
        ret_data = ''
        self.itemArray = []

        try:
            res = requests.get(url, timeout=60, headers={
                'Content-type': 'text/html;charset=gb2312'
            })
            if res.encoding == 'ISO-8859-1':
                res.encoding = 'gbk'
            html = res.text  # .encode(res.encoding)
            res.raise_for_status()
            if res.status_code == 200 :
                    contentSoup = bs4.BeautifulSoup(html,'lxml')
                    elems = contentSoup.select('#js_ggzx > li,.li_point > ul > li,.col02_22 > ul > li')
                    for elem in elems:
                        json = {}
                        json['code'] = code
                        temp = elem.__str__()[4:5]
                        if (temp == '\n') and market == 'US':
                            continue
                        ele = elem.select('span')
                        json['date'] = DateUtil.format_date(ele[0].getText()[1:-1])
                        s = json['date']
                        ele = elem.select('a')
                        json['title'] = ele[len(ele)-1].getText()
                        logger.info("date:{},title:{}".format(s, json['title']))
                        json['href'] = ele[len(ele)-1].attrs['href']
                        json['year'] = 'guess'
                        ret,content = self.get_content(json['href'],'utf-8')
                        # if ret != -1 :
                        #     time.sleep(4 * random.random())

                        if ret == 0 :
                            json['content'] = content
                            self.itemArray.append(json)
                        ret_code = 0
                        ret_data = ''
        except Exception as err:
            #time.sleep(4 * random.random())
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ConnectTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ReadTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.Timeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except:
            logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
            time.sleep(random.random())
            ret_code = -1
            ret_data = ''
        finally:
            res.close()
        return ret_code,ret_data
コード例 #12
0
ファイル: sinanews_service.py プロジェクト: github4n/hsstock
    def get_us_page(self, market, code, page, type):
        """
        :param market:
        :param code:
        :param page:
        :param type:
        :return: (page_number, type), page_number:-1
        """
        self.itemArray = []
        url = self.generate_page_url(market, code, page)
        url = url + type
        logger.info('fetch url: {}'.format(url))
        try:
            res = requests.get(url, timeout=60, headers={'Content-type': 'text/html;charset=gb2312'})
            if res.encoding == 'ISO-8859-1':
                res.encoding = 'gbk'
            html = res.text  # .encode(res.encoding)
            res.raise_for_status()
            if res.status_code == 200:
                contentSoup = bs4.BeautifulSoup(html, 'lxml')
                elems = contentSoup.select('.xb_news > ul > li')
                if page >= 100:
                    if type.__eq__("1"):
                        return 1, '2'
                    else:
                        return -1, '2'
                for elem in elems:
                    json = {}
                    json['code'] = code
                    ele = elem.select('span')
                    if len(ele) == 0:
                        continue
                    json['date'] = DateUtil.format_date_us_history(ele[0].getText())
                    s = json['date']
                    ele = elem.select('a')
                    json['title'] = ele[len(ele) - 1].getText()
                    logger.info("date:{},title:{}".format(s, json['title']))
                    json['href'] = ele[len(ele) - 1].attrs['href']
                    json['year'] = 'real'
                    ret, content = self.get_content(json['href'], "utf-8")
                    # if ret != -1:
                    #     time.sleep(4 * random.random())

                    if ret == 0:
                        json['content'] = content
                        self.itemArray.append(json)
        except Exception as err:
            #time.sleep(4 * random.random())
            logger.warning(err)
        except requests.exceptions.ConnectTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ReadTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.Timeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except:
            logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
            time.sleep(random.random())
            ret_code = -1
            ret_data = ''
        finally:
            res.close()
        return page + 1, type
コード例 #13
0
def job_once_global(*_args):
    '''
    :return:
    '''
    global is_closing

    store = _args[0][0]
    arr = _args[0][1]
    sinanewshistory = _args[0][2]

    while not is_closing:
        begin = time.time()
        ret_arr = arr

        total = len(ret_arr)
        curr = 0
        for code in ret_arr:
            curr += 1

            logger.info(
                "current fetching entry progress {}/{} code:{} ".format(
                    curr, total, code))
            if curr < 10398:
                continue

            market = code[0:2]
            symbol = code[3:]

            # if code != 'SH.600996':
            #     logger.info("current fetching entry progress {}/{} code:{} ".format(curr, total, code))
            #     continue

            sinanewshistory.clear_item_array()
            logger.info('Current Time:{}, code:{}, market:{}'.format(
                datetime.datetime.now(), symbol, market))

            page = 1
            type = '1'
            while page != -1 or (page > 0 and page < 2):
                if is_closing:
                    break
                try:
                    if market == 'HK':
                        page, _ = sinanewshistory.get_hk_page(
                            market, symbol, page)
                    if market == 'US':
                        page, type = sinanewshistory.get_us_page(
                            market, symbol, page, type)
                    if market == 'SZ' or market == 'SH':
                        page, _ = sinanewshistory.get_chn_page(
                            market, symbol, page)

                    items = sinanewshistory.get_item_array()
                    if len(items) > 0:
                        sinanewshistory.mongodbutil.insertItems(items)
                        time.sleep(random.random())
                        logger.info("store items to mongodb ...")
                    else:
                        logger.info("all items exists")
                        page = -1
                except Exception as err:
                    time.sleep(4 * random.random())
                    logger.warning('my err:{}'.format(err))
                    page = -1

            if is_closing is True:
                break

        end = time.time()
        logger.info("fetching for one  period , cost time: {}".format(
            (end - begin)))

        signal_int_handler(0, 0)
        break
コード例 #14
0
def job_once_individuals(*_args):
    '''
    :return:
    '''
    global is_closing
    global working
    global timerid

    working = True

    store = _args[0][0]
    arr = _args[0][1]
    futunews = _args[0][2]

    logger.info('start crawl current news...')

    while not is_closing:
        begin = time.time()
        ret_arr = arr

        total = len(ret_arr)
        curr = 0
        for code in ret_arr:
            if is_closing:
                break

            curr += 1

            logger.info("current fetching individuals progress {}/{} code:{} ".format(curr,total,code))
            if curr < 0:
                continue

            market = code[0:2]
            symbol = code[3:]

            logger.info('Current Time:{}, code:{}, market:{}'.format(datetime.datetime.now(), symbol, market))

            try:
                futunews.get_individual_news(market, symbol)
                items = futunews.get_item_array()
                if len(items) > 0:
                    futunews.mongodbutil.insertItems(items)
                    logger.info("store items to mongodb news ...")
                else:
                    logger.info("all news items exists")
            except Exception as err:
                time.sleep(4 * random.random())
                logger.warning(err)


            # try:
            #     futunews.get_individual_balancesheet(market, symbol)
            #     items = futunews.get_item_array()
            #     if len(items) > 0:
            #         futunews.mongodbutil_balancesheet.insertItems(items)
            #         logger.info("store items to mongodb  balancesheet...")
            #     else:
            #         logger.info("all balance sheet items exists")
            # except Exception as err:
            #     time.sleep(4 * random.random())
            #     logger.warning(err)
            #
            #
            # try:
            #     futunews.get_individual_cashflow(market, symbol)
            #     items = futunews.get_item_array()
            #     if len(items) > 0:
            #         futunews.mongodbutil_cash.insertItems(items)
            #         logger.info("store items to mongodb  cashflow...")
            #     else:
            #         logger.info("all cash flow items exists")
            # except Exception as err:
            #     time.sleep(4 * random.random())
            #     logger.warning(err)
            #
            # try:
            #     futunews.get_individual_income(market, symbol)
            #     items = futunews.get_item_array()
            #     if len(items) > 0:
            #         futunews.mongodbutil_income.insertItems(items)
            #         logger.info("store items to mongodb  income...")
            #     else:
            #         logger.info("all income items exists")
            # except Exception as err:
            #     time.sleep(4 * random.random())
            #     logger.warning(err)
            #
            # try:
            #     futunews.get_individual_companyinfo(market, symbol)
            #     items = futunews.get_item_array()
            #     if len(items) > 0:
            #         futunews.mongodbutil_companyinfo.insertItems(items)
            #         logger.info("store items to mongodb  companyinfo...")
            #     else:
            #         logger.info("all companyinfo items exists")
            # except Exception as err:
            #     time.sleep(4 * random.random())
            #     logger.warning(err)
            #
            # try:
            #     futunews.get_individual_dividend(market, symbol)
            #     items = futunews.get_item_array()
            #     if len(items) > 0:
            #         futunews.mongodbutil_dividend.insertItems(items)
            #         logger.info("store items to mongodb  dividend...")
            #     else:
            #         logger.info("all dividend items exists")
            # except Exception as err:
            #     time.sleep(4 * random.random())
            #     logger.warning(err)




            if is_closing is True:
                break

        working = False

        end = time.time()
        logger.info("fetching for one  period , cost time: {}".format((end - begin)))

        break
コード例 #15
0
def job_appender(*_args):
    '''
    :return:
    '''
    global is_closing
    global working
    global timerid

    working = True

    store = _args[0][0]
    arr = _args[0][1]
    sinanews = _args[0][2]

    logger.info('start crawl current news...')

    while not is_closing:
        begin = time.time()
        ret_arr = arr

        total = len(ret_arr)
        curr = 0
        for code in ret_arr:
            if is_closing:
                break

            curr += 1

            logger.info(
                "current fetching entry progress {}/{} code:{} ".format(
                    curr, total, code))
            if curr < 3061:
                continue

            market = code[0:2]
            symbol = code[3:]
            url = sinanews.generate_url(market, symbol)

            logger.info('Current Time:{}, code:{}, market:{}'.format(
                datetime.datetime.now(), symbol, market))

            try:
                sinanews.get_page(market, symbol, url)
                items = sinanews.get_item_array()
                if len(items) > 0:
                    sinanews.mongodbutil.insertItems(items)
                    logger.info("store items to mongodb ...")
                else:
                    logger.info("all items exists")
            except Exception as err:
                time.sleep(4 * random.random())
                logger.warning(err)

            if is_closing is True:
                break

        working = False
        if not is_closing:
            sched.add_job(scheduled_job, 'interval', seconds=1, id=timerid)

        end = time.time()
        logger.info("fetching for one  period , cost time: {}".format(
            (end - begin)))

        break