コード例 #1
0
def scheduled_job2():
    logger.info('scheduled_job2222..')
    if working == False:
        sched.remove_job(timerid)
        catch_lastest_news() # near to live
    else:
        logger.info('pre-timer is working')
コード例 #2
0
ファイル: migration.py プロジェクト: hsstock/hsstock
def deal_1m_old():
    from_table = 230
    to_table = 235

    migrate_num = 5

    try:
        for index in range(0, 30, 1):
            logger.info('addressing {0} ....'.format(index))
            # # step 1: fetch one code
            sql = 'select distinct(code) from ft_1M_{0} limit {1}'.format(
                from_table, migrate_num)
            result = storeservice.executeSql(sql)
            codes = tuple([code[0] for code in result.cursor._result.rows])
            print(codes)
            # step 2: insert into new table
            sql = 'insert into ft_1M_{0} select * from ft_1M_{1} where code in {2}'.format(
                to_table, from_table, codes)
            logger.info(sql)
            result = storeservice.executeSql(sql)

            # step 3: update sys_sharding.tindex
            sql = 'update sys_sharding set tindex = {0} where code in {1} and dtype = \"hk_1m\"'.format(
                to_table, codes)
            logger.info(sql)
            result = storeservice.executeSql(sql)

            # step 4: drop records from from_table
            sql = 'delete from ft_1M_{0} where code in {1}'.format(
                from_table, codes)
            logger.info(sql)
            result = storeservice.executeSql(sql)

    except Exception as err:
        logger.info(err)
コード例 #3
0
def scheduled_job():
    logger.info('scheduled_job..')
    if working == False:
        sched.remove_job(timerid)
        catch_lastest_urls()
    else:
        logger.info('pre-timer is working')
コード例 #4
0
def job_info_appender(*_args):
    '''
    :return:
    '''
    global is_closing
    global working
    global timerid

    working = True

    store = _args[0][0]
    arr = _args[0][1]
    futunews = _args[0][2]

    logger.info('start crawl current futu news...')

    while not is_closing:
        if is_closing:
            break

        begin = time.time()

        logger.info('Current Time:{}, info'.format(datetime.datetime.now()))

        try:
            ret_code, ret_data = futunews.get_live_info()
            items = futunews.get_item_array()
            if len(items) > 0:
                futunews.mongodbutil.insertItems(items)
                logger.info("store items to mongodb ...")
            else:
                logger.info("all items exists")
        except Exception as err:
            time.sleep(4 * random.random())
            logger.warning(err)


        try:
            ret_code, ret_data = futunews.get_futunn_live()

        except Exception as err:
            time.sleep(4 * random.random())
            logger.warning(err)



        if is_closing is True:
            break

        working = False
        if not is_closing:
            sched.add_job(scheduled_job2, 'interval', seconds=random.randint(30,50), id=timerid)

        end = time.time()
        logger.info("fetching for one  period , cost time: {}".format((end - begin)))

        break
コード例 #5
0
def scheduled_job():
    logger.info('scheduled_job..')
    if working == False:
        sched.remove_job(timerid)
        catch_lastest_news() # near to live
        #catch_futu_individuals() # 一次性
        #catch_futunn_news_byapi() # 一次
        #job_catch_calendar() # catch calendar , schedule
    else:
        logger.info('pre-timer is working')
コード例 #6
0
    def get_futunn_live(self):

        lasttime = DateUtil.string_toDatetime(self.mongodbutil_live.getLastLivetime())

        for i in range(0,-1,-1):
            p = int(1000*time.mktime(time.localtime())) + i
            url = 'https://news.futunn.com/main/live-list?page={0}page_size=50&_=1556778263374'.format(i,p)

            logger.info("address current url {0}...".format(url))

            arr = []
            header = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
            try:
                res = requests.get(url, headers=header, timeout=60)
                res.raise_for_status()
                if res.status_code == 200:
                    data = res.text
                    js = json.loads(data)

                    list = js['data']['list']
                    for elem in list:
                        itemTime = DateUtil.string_toDatetime(elem['time'])

                        if itemTime > lasttime:
                            arr.append( elem )
                            logger.info(elem)
                        else:
                            continue

                    if len(arr) > 0 :
                        self.mongodbutil_live.insertItems(arr)
                        logger.info("store items to mongodb ...")
                    else:
                        logger.info("still have no new live message")

            except Exception as err:
                #time.sleep(4 * random.random())
                logger.warning(err)
            except requests.exceptions.ConnectTimeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except requests.exceptions.ReadTimeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except requests.exceptions.Timeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except:
                logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
                time.sleep(random.random())
                ret_code = -1
                ret_data = ''
            finally:
                res.close()

        return 1, 'ok'
コード例 #7
0
ファイル: sinanews_service.py プロジェクト: github4n/hsstock
    def get_chn_page(self, market, code, page):
        self.itemArray = []
        url = self.generate_page_url(market, code, page)
        logger.info('fetch url: {}'.format(url))
        try:
            res = requests.get(url, timeout=60, headers={'Content-type':'text/html;charset=gb2312'})
            if res.encoding == 'ISO-8859-1':
                res.encoding = 'gbk'
            html = res.text #.encode(res.encoding)
            res.raise_for_status()
            if res.status_code == 200:
                contentSoup = bs4.BeautifulSoup(html, 'lxml')
                strList = str(contentSoup.select('.datelist > ul'))[10:-12]
                elems = strList.split("<br/>")
                if len(elems) < 2:
                    return -1,''
                for elem in elems:
                    if elem == '':
                        continue
                    json = {}
                    elem = elem.lstrip()
                    parts = elem.split('<a href="')
                    json['code'] = code
                    json['date'] = parts[0].rstrip() + ":00"
                    s = json['date']
                    parts1 = parts[1].split('" target="_blank">')
                    json['href'] = parts1[0]
                    json['year'] = 'real'
                    parts2 = parts1[1].split('</a>')
                    json['title'] = parts2[0]
                    logger.info("date:{},title:{}".format(s, json['title']))
                    ret, content = self.get_content(json['href'], "utf-8")
                    # if ret != -1:
                    #     time.sleep(4 * random.random())

                    if ret == 0:
                        json['content'] = content
                        self.itemArray.append(json)
        except Exception as err:
            #time.sleep(4 * random.random())
            logger.warning(err)
        except:
            logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
            time.sleep(random.random())
            ret_code = -1
            ret_data = ''
        finally:
            res.close()
        return page + 1,''
コード例 #8
0
ファイル: migration.py プロジェクト: hsstock/hsstock
def counter_statistics():

    storeservice = MysqlService()

    index = 0
    to_table = 234

    try:
        for index in range(1, 250, 1):
            logger.info('addressing {0} ....'.format(index))
            # step 1: fetch one code
            sql = 'select count(code) from ft_1M_{0}'.format(index)
            result = storeservice.executeSql(sql)
            print(result.cursor._result.rows[0])

    except Exception as err:
        logger.info(err)
コード例 #9
0
    def get_content(self, url, enco):
        content = ''
        ret = -1

        urlExist = self.mongodbutil.urlIsExist(url)
        if urlExist:
            logger.info('This url:{} has existed'.format(url))
            return -2, content

        header = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
        try:
            res = requests.get(url, headers=header, timeout=60)
            res.encoding = enco
            res.raise_for_status()
            if res.status_code == 200:
                soup = bs4.BeautifulSoup(res.text, 'lxml')
                elems = soup.select('.inner')
                if len(elems) > 0:
                    content = elems[0].getText()
                    ret = 0

        except Exception as err:
            #time.sleep(4 * random.random())
            logger.warning(err)
        except requests.exceptions.ConnectTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ReadTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.Timeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except:
            logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
            time.sleep(random.random())
            ret_code = -1
            ret_data = ''
        finally:
            res.close()
        return ret, content
コード例 #10
0
def job_calendar_appender(*_args):
    '''
    :return:
    '''
    global is_closing
    global working
    global timerid


    store = _args[0][0]
    arr = _args[0][1]
    futunews = _args[0][2]
    working = True
    logger.info('start crawl current calendar ...')

    while not is_closing:
        if is_closing:
            break

        begin = time.time()

        logger.info('Current Time:{}, info'.format(datetime.datetime.now()))

        try:
            ret_code, ret_data = futunews.get_calendars()

        except Exception as err:
            time.sleep(4 * random.random())
            logger.warning(err)

        if is_closing is True:
            break


        end = time.time()
        logger.info("fetching calendar for one  period , cost time: {}".format((end - begin)))

        break

    working = False
コード例 #11
0
def try_exit():
    global is_closing
    if is_closing:
        # clean up here
        logger.info('exit success')
コード例 #12
0
    def get_futunn_news(self):

        for i in range(94471,94480,1):
            url = 'https://news.futunn.com/market/{0}?src=3'.format(i)

            urlExist = self.mongodbutil.urlIsExist(url)
            if urlExist:
                logger.info('This url:{} has existed'.format(url))
                continue

            json = {}
            header = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
            try:
                res = requests.get(url, headers=header, timeout=60)
                res.raise_for_status()
                if res.status_code == 200:
                    soup = bs4.BeautifulSoup(res.text, 'lxml')
                    elems = soup.select('.inner')
                    json['content']  = elems[0].getText()
                    elems = soup.select('.news-title > h1')
                    json['title'] = elems[0].getText()
                    elems = soup.select('.news-title > .timeBar')

                    pos = elems[0].getText().strip().find('2')
                    json['date'] = elems[0].getText().strip()[pos:pos+16]
                    json['href'] = url
                    json['code'] = ' '
                    json['year'] = DateUtil.string_toDatetime2(json['date']).year
                    json['sourcefrom'] = 'futunn'
                    self.itemArray.append(json)

                    if len(self.get_item_array()) > 50:
                        self.mongodbutil.insertItems(self.get_item_array())
                        logger.info("store items to mongodb ...")
                        self.clear_item_array()


            except Exception as err:
                #time.sleep(4 * random.random())
                logger.warning(err)
            except requests.exceptions.ConnectTimeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except requests.exceptions.ReadTimeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except requests.exceptions.Timeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except:
                logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
                time.sleep(random.random())
                ret_code = -1
                ret_data = ''
            finally:
                res.close()

        return 1, 'ok'
コード例 #13
0
    def get_live_info(self):

        ret_code = -1
        ret_data = ''
        self.itemArray = []

        lasttime = DateUtil.string_toDatetime2('2019-05-01 09:00')

        try:
            res = requests.get(self.url)
            if res.encoding == 'ISO-8859-1':
                res.encoding = 'gbk'
            html = res.text  # .encode(res.encoding)
            res.raise_for_status()
            if res.status_code == 200 :
                    contentSoup = bs4.BeautifulSoup(html, 'lxml')
                    elems = contentSoup.find_all('a', class_='news-link')

                    for elem in elems:
                        json = {}
                        json['code'] = ' '


                        newstime = elem.select('span')
                        time = newstime[len(newstime) - 1].getText()
                        json['date'] = DateUtil.string_toDatetime2(time)
                        s = json['date']

                        if s < lasttime :
                            continue
                        else:
                            lasttime = s

                        h3 = elem.select('h3')
                        json['title'] = h3[len(h3) - 1].getText()

                        logger.info("date:{},title:{}".format(s, json['title']))
                        json['href'] = elem.attrs['href']
                        json['year'] = json['date'].year
                        json['sourcefrom'] = 'futunn'
                        ret,content = self.get_content(json['href'],'utf-8')
                        # if ret != -1 :
                        #     time.sleep(4 * random.random())

                        if ret == 0 :
                            json['content'] = content
                            self.itemArray.append(json)
                        ret_code = 0
                        ret_data = ''
        except Exception as err:
            #time.sleep(4 * random.random())
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ConnectTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ReadTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.Timeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except:
            logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
            time.sleep(random.random())
            ret_code = -1
            ret_data = ''
        finally:
            res.close()
        return ret_code,ret_data
コード例 #14
0
ファイル: sinanews_service.py プロジェクト: github4n/hsstock
    def get_page(self,market, code,url):

        ret_code = -1
        ret_data = ''
        self.itemArray = []

        try:
            res = requests.get(url, timeout=60, headers={
                'Content-type': 'text/html;charset=gb2312'
            })
            if res.encoding == 'ISO-8859-1':
                res.encoding = 'gbk'
            html = res.text  # .encode(res.encoding)
            res.raise_for_status()
            if res.status_code == 200 :
                    contentSoup = bs4.BeautifulSoup(html,'lxml')
                    elems = contentSoup.select('#js_ggzx > li,.li_point > ul > li,.col02_22 > ul > li')
                    for elem in elems:
                        json = {}
                        json['code'] = code
                        temp = elem.__str__()[4:5]
                        if (temp == '\n') and market == 'US':
                            continue
                        ele = elem.select('span')
                        json['date'] = DateUtil.format_date(ele[0].getText()[1:-1])
                        s = json['date']
                        ele = elem.select('a')
                        json['title'] = ele[len(ele)-1].getText()
                        logger.info("date:{},title:{}".format(s, json['title']))
                        json['href'] = ele[len(ele)-1].attrs['href']
                        json['year'] = 'guess'
                        ret,content = self.get_content(json['href'],'utf-8')
                        # if ret != -1 :
                        #     time.sleep(4 * random.random())

                        if ret == 0 :
                            json['content'] = content
                            self.itemArray.append(json)
                        ret_code = 0
                        ret_data = ''
        except Exception as err:
            #time.sleep(4 * random.random())
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ConnectTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ReadTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.Timeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except:
            logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
            time.sleep(random.random())
            ret_code = -1
            ret_data = ''
        finally:
            res.close()
        return ret_code,ret_data
コード例 #15
0
def job_appender(*_args):
    '''
    :return:
    '''
    global is_closing
    global working
    global timerid

    working = True

    store = _args[0][0]
    arr = _args[0][1]
    sinanews = _args[0][2]

    logger.info('start crawl current news...')

    while not is_closing:
        begin = time.time()
        ret_arr = arr

        total = len(ret_arr)
        curr = 0
        for code in ret_arr:
            if is_closing:
                break

            curr += 1

            logger.info(
                "current fetching entry progress {}/{} code:{} ".format(
                    curr, total, code))
            if curr < 3061:
                continue

            market = code[0:2]
            symbol = code[3:]
            url = sinanews.generate_url(market, symbol)

            logger.info('Current Time:{}, code:{}, market:{}'.format(
                datetime.datetime.now(), symbol, market))

            try:
                sinanews.get_page(market, symbol, url)
                items = sinanews.get_item_array()
                if len(items) > 0:
                    sinanews.mongodbutil.insertItems(items)
                    logger.info("store items to mongodb ...")
                else:
                    logger.info("all items exists")
            except Exception as err:
                time.sleep(4 * random.random())
                logger.warning(err)

            if is_closing is True:
                break

        working = False
        if not is_closing:
            sched.add_job(scheduled_job, 'interval', seconds=1, id=timerid)

        end = time.time()
        logger.info("fetching for one  period , cost time: {}".format(
            (end - begin)))

        break
コード例 #16
0
ファイル: sinanews_service.py プロジェクト: github4n/hsstock
    def get_us_page(self, market, code, page, type):
        """
        :param market:
        :param code:
        :param page:
        :param type:
        :return: (page_number, type), page_number:-1
        """
        self.itemArray = []
        url = self.generate_page_url(market, code, page)
        url = url + type
        logger.info('fetch url: {}'.format(url))
        try:
            res = requests.get(url, timeout=60, headers={'Content-type': 'text/html;charset=gb2312'})
            if res.encoding == 'ISO-8859-1':
                res.encoding = 'gbk'
            html = res.text  # .encode(res.encoding)
            res.raise_for_status()
            if res.status_code == 200:
                contentSoup = bs4.BeautifulSoup(html, 'lxml')
                elems = contentSoup.select('.xb_news > ul > li')
                if page >= 100:
                    if type.__eq__("1"):
                        return 1, '2'
                    else:
                        return -1, '2'
                for elem in elems:
                    json = {}
                    json['code'] = code
                    ele = elem.select('span')
                    if len(ele) == 0:
                        continue
                    json['date'] = DateUtil.format_date_us_history(ele[0].getText())
                    s = json['date']
                    ele = elem.select('a')
                    json['title'] = ele[len(ele) - 1].getText()
                    logger.info("date:{},title:{}".format(s, json['title']))
                    json['href'] = ele[len(ele) - 1].attrs['href']
                    json['year'] = 'real'
                    ret, content = self.get_content(json['href'], "utf-8")
                    # if ret != -1:
                    #     time.sleep(4 * random.random())

                    if ret == 0:
                        json['content'] = content
                        self.itemArray.append(json)
        except Exception as err:
            #time.sleep(4 * random.random())
            logger.warning(err)
        except requests.exceptions.ConnectTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ReadTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.Timeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except:
            logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
            time.sleep(random.random())
            ret_code = -1
            ret_data = ''
        finally:
            res.close()
        return page + 1, type
コード例 #17
0
    def get_calendars(self):

        urls = [
            'https://news.futunn.com/new-calendar/events-list?begin_time={0}&end_time=2037-12-31&event_type=%5B%22%E6%B8%AF%E8%82%A1%E6%96%B0%E8%82%A1%22%2C%22%E7%BE%8E%E8%82%A1%E6%96%B0%E8%82%A1%22%2C%22A%E8%82%A1%E6%96%B0%E8%82%A1%22%5D&stock_type=&_={1}',
            'https://news.futunn.com/new-calendar/events-list?begin_time={0}&end_time=2037-12-31&event_type=["港股财报"%2C"美股财报"%2C"A股财报"]&stock_type=&_={1}',
            'https://news.futunn.com/new-calendar/events-list?begin_time={0}&end_time=2037-12-31&event_type=["港股除权除息"%2C"美股除权除息"%2C"A股除权除息"]&stock_type=&_={1}',
            'https://news.futunn.com/new-calendar/events-list?begin_time={0}&end_time=2037-12-31&event_type=["财经事件"]&stock_type=&_={1}',
            'https://news.futunn.com/new-calendar/events-list?begin_time={0}&end_time=2037-12-31&event_type=["经济数据"]&stock_type=&_={1}',
            'https://news.futunn.com/new-calendar/events-list?begin_time={0}&end_time=2037-12-31&event_type=["休市提醒"]&stock_type=&_={1}'
        ]

        for idx in range(0,len(urls),1):

            print(idx)
            url = urls[idx].format(DateUtil.getTodayStr(),int(1000*time.mktime(time.localtime())) + idx)
            print(url)
            logger.info("address current url {0}...".format(url))

            arr = []
            header = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
            try:
                res = requests.get(url, headers=header, timeout=60)
                res.raise_for_status()
                if res.status_code == 200:
                    data = res.text
                    js = json.loads(data)

                    list = js['data']['list']
                    for elem in list:
                        # { unique key, drop duplicate
                        #     "market_type": 1,
                        #     "event_type": 1,
                        #     "event_time": 1
                        # }
                        # elem['event_type']
                        # elem['market_type']
                        # elem['event_time']
                        #itemTime = DateUtil.string_toDatetime(elem['time'])

                        # 'event_type': '港股新股',
                        # 'market_type': 'HK',
                        # 'event_text': '认购中<br/><a href="http://www.futunn.com/quote/stock?m=hk&code=01832" target="_blank" data-market="hk" data-code="01832" class="js-nn-stock">海天地悦旅(01832)</a><br/><a href="http://www.futunn.com/quote/stock?m=hk&code=02230" target="_blank" data-market="hk" data-code="02230" class="js-nn-stock">羚邦集团(02230)</a><br/>',
                        # 'event_time': '2019-05-05 00:00:00',
                        # 'total': 2}

                        # if itemTime > lasttime:
                        #     arr.append( elem )
                        #     logger.info(elem)
                        # else:
                        #     continue

                        arr.append(elem)

                    if len(arr) > 0 :
                        self.mongodbutil_calendar.insertItems(arr)
                        logger.info("store items to mongodb ...")
                    else:
                        logger.info("still have no calendar live message")

            except Exception as err:
                #time.sleep(4 * random.random())
                logger.warning(err)
            except requests.exceptions.ConnectTimeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except requests.exceptions.ReadTimeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except requests.exceptions.Timeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except:
                logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
                time.sleep(random.random())
                ret_code = -1
                ret_data = ''
            finally:
                res.close()

        return 1, 'ok'
コード例 #18
0
def signal_term_handler(*args):
    global is_closing
    logger.info('killed, exiting...')
    is_closing = True
コード例 #19
0
def signal_int_handler(signum, frame):
    global is_closing
    logger.info('exiting...')
    is_closing = True
コード例 #20
0
def scheduled_job3():
    logger.info('scheduled_job333..')
    if working == False:
        catch_futu_individuals()  # 一次性
    else:
        logger.info('pre-timer is working')
コード例 #21
0
def job_once_individuals(*_args):
    '''
    :return:
    '''
    global is_closing
    global working
    global timerid

    working = True

    store = _args[0][0]
    arr = _args[0][1]
    futunews = _args[0][2]

    logger.info('start crawl current news...')

    while not is_closing:
        begin = time.time()
        ret_arr = arr

        total = len(ret_arr)
        curr = 0
        for code in ret_arr:
            if is_closing:
                break

            curr += 1

            logger.info("current fetching individuals progress {}/{} code:{} ".format(curr,total,code))
            if curr < 0:
                continue

            market = code[0:2]
            symbol = code[3:]

            logger.info('Current Time:{}, code:{}, market:{}'.format(datetime.datetime.now(), symbol, market))

            try:
                futunews.get_individual_news(market, symbol)
                items = futunews.get_item_array()
                if len(items) > 0:
                    futunews.mongodbutil.insertItems(items)
                    logger.info("store items to mongodb news ...")
                else:
                    logger.info("all news items exists")
            except Exception as err:
                time.sleep(4 * random.random())
                logger.warning(err)


            # try:
            #     futunews.get_individual_balancesheet(market, symbol)
            #     items = futunews.get_item_array()
            #     if len(items) > 0:
            #         futunews.mongodbutil_balancesheet.insertItems(items)
            #         logger.info("store items to mongodb  balancesheet...")
            #     else:
            #         logger.info("all balance sheet items exists")
            # except Exception as err:
            #     time.sleep(4 * random.random())
            #     logger.warning(err)
            #
            #
            # try:
            #     futunews.get_individual_cashflow(market, symbol)
            #     items = futunews.get_item_array()
            #     if len(items) > 0:
            #         futunews.mongodbutil_cash.insertItems(items)
            #         logger.info("store items to mongodb  cashflow...")
            #     else:
            #         logger.info("all cash flow items exists")
            # except Exception as err:
            #     time.sleep(4 * random.random())
            #     logger.warning(err)
            #
            # try:
            #     futunews.get_individual_income(market, symbol)
            #     items = futunews.get_item_array()
            #     if len(items) > 0:
            #         futunews.mongodbutil_income.insertItems(items)
            #         logger.info("store items to mongodb  income...")
            #     else:
            #         logger.info("all income items exists")
            # except Exception as err:
            #     time.sleep(4 * random.random())
            #     logger.warning(err)
            #
            # try:
            #     futunews.get_individual_companyinfo(market, symbol)
            #     items = futunews.get_item_array()
            #     if len(items) > 0:
            #         futunews.mongodbutil_companyinfo.insertItems(items)
            #         logger.info("store items to mongodb  companyinfo...")
            #     else:
            #         logger.info("all companyinfo items exists")
            # except Exception as err:
            #     time.sleep(4 * random.random())
            #     logger.warning(err)
            #
            # try:
            #     futunews.get_individual_dividend(market, symbol)
            #     items = futunews.get_item_array()
            #     if len(items) > 0:
            #         futunews.mongodbutil_dividend.insertItems(items)
            #         logger.info("store items to mongodb  dividend...")
            #     else:
            #         logger.info("all dividend items exists")
            # except Exception as err:
            #     time.sleep(4 * random.random())
            #     logger.warning(err)




            if is_closing is True:
                break

        working = False

        end = time.time()
        logger.info("fetching for one  period , cost time: {}".format((end - begin)))

        break
コード例 #22
0
ファイル: sina_remedy.py プロジェクト: github4n/hsstock
if __name__ == '__main__':
    AppConfig.get_config()
    mongodbutil = MongodbUtil(AppConfig.mongodb_ip, AppConfig.mongodb_port,
                              AppConfig.mongodb_collection)

    # PreResUrl = ''.join(
    #     (os.path.abspath(''.join((__file__, '../../../'))), AppConfig.get_config().get('tf_idf', 'pre.res.filename')))
    # config = AppConfig.get_config()
    # interval = config.getfloat('portal', 'timer.interval')
    # timer = threading.Timer(interval, make_keyword, args=[interval, PreResUrl])
    # timer.start()
    # make_tag()
    connection = mongodbutil.collection

    logger.info('Starting time: {}'.format(datetime.datetime.now()))

    for s in connection.find({}):
        content = str(s['content'])
        if content.startswith('\n\n\n\n\n\n\n\n'):
            content = content.partition('\n\n\n\n\n\n\n\n')
            if content.__len__() == 3:
                content = content[2].partition('\n\n\n\n\n\n\n\n')
                if content.__len__() == 3 and content[0].startswith(
                        '.ct_hqimg'):
                    content = content[2]
                    connection.update({"_id": s['_id']},
                                      {"$set": {
                                          "content": content
                                      }})
コード例 #23
0
def job_once_global(*_args):
    '''
    :return:
    '''
    global is_closing

    store = _args[0][0]
    arr = _args[0][1]
    sinanewshistory = _args[0][2]

    while not is_closing:
        begin = time.time()
        ret_arr = arr

        total = len(ret_arr)
        curr = 0
        for code in ret_arr:
            curr += 1

            logger.info(
                "current fetching entry progress {}/{} code:{} ".format(
                    curr, total, code))
            if curr < 10398:
                continue

            market = code[0:2]
            symbol = code[3:]

            # if code != 'SH.600996':
            #     logger.info("current fetching entry progress {}/{} code:{} ".format(curr, total, code))
            #     continue

            sinanewshistory.clear_item_array()
            logger.info('Current Time:{}, code:{}, market:{}'.format(
                datetime.datetime.now(), symbol, market))

            page = 1
            type = '1'
            while page != -1 or (page > 0 and page < 2):
                if is_closing:
                    break
                try:
                    if market == 'HK':
                        page, _ = sinanewshistory.get_hk_page(
                            market, symbol, page)
                    if market == 'US':
                        page, type = sinanewshistory.get_us_page(
                            market, symbol, page, type)
                    if market == 'SZ' or market == 'SH':
                        page, _ = sinanewshistory.get_chn_page(
                            market, symbol, page)

                    items = sinanewshistory.get_item_array()
                    if len(items) > 0:
                        sinanewshistory.mongodbutil.insertItems(items)
                        time.sleep(random.random())
                        logger.info("store items to mongodb ...")
                    else:
                        logger.info("all items exists")
                        page = -1
                except Exception as err:
                    time.sleep(4 * random.random())
                    logger.warning('my err:{}'.format(err))
                    page = -1

            if is_closing is True:
                break

        end = time.time()
        logger.info("fetching for one  period , cost time: {}".format(
            (end - begin)))

        signal_int_handler(0, 0)
        break