def get_futunn_live(self): lasttime = DateUtil.string_toDatetime(self.mongodbutil_live.getLastLivetime()) for i in range(0,-1,-1): p = int(1000*time.mktime(time.localtime())) + i url = 'https://news.futunn.com/main/live-list?page={0}page_size=50&_=1556778263374'.format(i,p) logger.info("address current url {0}...".format(url)) arr = [] header = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'} try: res = requests.get(url, headers=header, timeout=60) res.raise_for_status() if res.status_code == 200: data = res.text js = json.loads(data) list = js['data']['list'] for elem in list: itemTime = DateUtil.string_toDatetime(elem['time']) if itemTime > lasttime: arr.append( elem ) logger.info(elem) else: continue if len(arr) > 0 : self.mongodbutil_live.insertItems(arr) logger.info("store items to mongodb ...") else: logger.info("still have no new live message") except Exception as err: #time.sleep(4 * random.random()) logger.warning(err) except requests.exceptions.ConnectTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ReadTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.Timeout as err: logger.warning(err) ret_code = -1 ret_data = err except: logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds') time.sleep(random.random()) ret_code = -1 ret_data = '' finally: res.close() return 1, 'ok'
def append_one_stock(service, code, dtype, listing_date): ''' :param worker: :param code: :param dtype: :param last_fetchdate: :param listing_date: :return: ''' global is_closing todayStr = DateUtil.getTodayStr() last_fetchdate = DateUtil.string_toDate('2018-08-02') start = None ld, tindex = service.storeservice.find_lastdate_and_tindex(code, dtype) lastdate = last_fetchdate if ld == None else ld if lastdate is not None and lastdate.date() > listing_date: start = DateUtil.getDatetimeFutureStr(lastdate.date(), 1) else: start = DateUtil.date_toString(last_fetchdate) end = todayStr gen = DateUtil.getNextHalfYear(DateUtil.string_toDate(start), DateUtil.string_toDate(end), ndays=360) while True: try: end = next(gen) if is_closing is True: break b2 = time.time() days = DateUtil.diff(start, end) lastest_date = service.get_hisft_klinetory_data( code[3:], start, days) if lastest_date is not None: service.storeservice.update_lastdate( code, dtype, DateUtil.string_toDatetime( DateUtil.datetime_toString(lastest_date))) logging.info('********I') e2 = time.time() logging.info( "fetching {} dtype {} listing_date: {} start: {} end:{} cost time {}" .format(code, dtype, listing_date, start, end, e2 - b2)) start = DateUtil.getDatetimeFutureStr(DateUtil.string_toDate(end), 1) except StopIteration as e: print(e) break
def append_one_stock(worker, code, dtype, ktype, listing_date): ''' :param worker: :param code: :param dtype: :param last_fetchdate: :param listing_date: :return: ''' global is_closing todayStr = DateUtil.getTodayStr() # last_fetchdate = DateUtil.string_toDate( DateUtil.getDatetimePastStr( DateUtil.string_toDate(todayStr),30) ) last_fetchdate = DateUtil.string_toDate('2018-08-02') start = None ld, tindex = worker.storeservice.find_lastdate_and_tindex(code, dtype) lastdate = last_fetchdate if ld == None else ld if lastdate is not None and lastdate.date() > listing_date: start = DateUtil.getDatetimeFutureStr(lastdate.date(), 1) else: # if listing_date.year == 1970: # listing_date = listing_date.replace(year=1997) # start = DateUtil.date_toString(listing_date) start = DateUtil.date_toString(last_fetchdate) end = todayStr gen = DateUtil.getNextHalfYear(DateUtil.string_toDate(start), DateUtil.string_toDate(end)) while True: try: end = next(gen) if is_closing is True: break b2 = time.time() _, _, lastest_date = worker.get_history_kline( code, tindex, start, end, ktype) if lastest_date is not None: worker.storeservice.update_lastdate( code, dtype, DateUtil.string_toDatetime(lastest_date)) e2 = time.time() logging.info( "fetching {} dtype {} ktype {} listing_date: {} start: {} end:{} cost time {}" .format(code, dtype, ktype, listing_date, start, end, e2 - b2)) start = DateUtil.getDatetimeFutureStr(DateUtil.string_toDate(end), 1) except StopIteration as e: print(e) break
def job_once_global_m5_append(*_args): ''' 线程工作:低频数据接口 :return: ''' global is_closing worker = _args[0][0] arr = _args[0][1] while not is_closing: begin = time.time() ret_arr = arr todayStr = DateUtil.getTodayStr() #last_fetchdate = DateUtil.string_toDate( DateUtil.getDatetimePastStr( DateUtil.string_toDate(todayStr),30) ) last_fetchdate = DateUtil.string_toDate('2018-08-02') total = len(ret_arr) curr = 0 for code, listing_date in ret_arr: curr += 1 logging.info("current fetching progress {}/{} code:{} ".format(curr,total,code)) if curr < 1: continue # KLType.K_DAY start = None ld = worker.storeservice.find_lastdate2(code,'hk') lastdate = last_fetchdate if ld == None else ld if lastdate is not None and lastdate.date() > listing_date: start = DateUtil.getDatetimeFutureStr( lastdate.date(),1 ) else: # if listing_date.year == 1970: # listing_date = listing_date.replace(year=1997) # start = DateUtil.date_toString(listing_date) start = DateUtil.date_toString(last_fetchdate) end = todayStr gen = DateUtil.getNextHalfYear(DateUtil.string_toDate(start), DateUtil.string_toDate(end)) b = time.time() while True: try: end = next(gen) if is_closing is True: break b2 = time.time() _, _, lastest_date = worker.get_history_kline(code, start, end, ktype=KLType.K_DAY) if lastest_date is not None: worker.storeservice.update_lastdate(code, 'hk', DateUtil.string_toDatetime(lastest_date)) e2 = time.time() logging.info( "fetching {} K_DAY listing_date: {} start: {} end:{} cost time {}".format(code, listing_date, start, end, e2-b2)) start = DateUtil.getDatetimeFutureStr(DateUtil.string_toDate(end),1) except StopIteration as e: print(e) break # KLType.K_5M start = None ld = worker.storeservice.find_lastdate2(code,'hk_5m') lastdate = last_fetchdate if ld == None else ld if lastdate is not None and lastdate.date() > listing_date: start = DateUtil.getDatetimeFutureStr(lastdate.date(), 1) else: # if listing_date.year == 1970: # listing_date = listing_date.replace(year=1997) # start = DateUtil.date_toString(listing_date) start = DateUtil.date_toString(last_fetchdate) end = todayStr gen = DateUtil.getNextHalfYear(DateUtil.string_toDate(start), DateUtil.string_toDate(end)) b = time.time() while True: try: end = next(gen) if is_closing is True: break b1 = time.time() _,_,lastest_date = worker.get_history_kline(code, start, end, ktype=KLType.K_5M) if lastest_date is not None: worker.storeservice.update_lastdate(code, 'hk_5m',lastest_date) e1 = time.time() logging.info( "fetching {} K_5M_LINE listing_date:{} start: {} end:{} cost time {}".format(code, listing_date, start, end, e1 - b1)) start = DateUtil.getDatetimeFutureStr(DateUtil.string_toDate(end), 1) except StopIteration as e: print(e) break e = time.time() logging.info("position {} fetching {} const time {}".format(curr, code, e - b)) if is_closing is True: break end = time.time() logging.info("fetching for one period , cost time: {}".format((end - begin))) break
def job_once_global_m5_append_multithread(*_args): ''' 线程工作:低频数据接口 :return: ''' global is_closing worker = _args[0][0] arr = _args[0][1] while not is_closing: begin = time.time() ret_arr = arr todayStr = DateUtil.getTodayStr() #last_fetchdate = DateUtil.string_toDate(DateUtil.getDatetimePastStr(DateUtil.string_toDate(todayStr), 30)) last_fetchdate = DateUtil.string_toDate('2018-08-02') total = len(ret_arr) curr = 0 for code, listing_date in ret_arr: curr += 1 #1 - (1~2998包含) #2 - (2999~15918不含) #3 - (15918~18986不含) #4 - (18986~19430不含)default InnoDB, #5 - (19430~21898不含) MyISAM engine,ft_history_kline_5 #6 - (21898~24768不含) MyISAM engine,ft_history_kline_6 #7 - (24768~26347不含) MyISAM engine, ft_history_kline_7 #8 - (26347~27096不含) MyISAM engine, ft_history_kline_8, trigged by docker upgrade #9 - (27096~28123不含) MyISAM engine, ft_history_kline_9 #10 - (28123~31918) MyISAM engine, ft_history_kline_10 # ft_history_kline tale as the mrg_myisam logging.info("current fetching progress {}/{} ".format(curr,total)) if curr < 1: continue # KLType.K_DAY start = None ld = worker.storeservice.find_lastdate2(code, 'hk') lastdate = last_fetchdate if ld == None else ld if lastdate is not None and lastdate.date() > listing_date: start = DateUtil.getDatetimeFutureStr( lastdate.date(),1 ) else: # if listing_date.year == 1970: # listing_date = listing_date.replace(year=1997) # start = DateUtil.date_toString(listing_date) start = DateUtil.date_toString(last_fetchdate) end = todayStr gen = DateUtil.getNextHalfYear(DateUtil.string_toDate(start), DateUtil.string_toDate(end)) b = time.time() while True: try: end = next(gen) if is_closing is True: break b2 = time.time() _, _, lastest_date = worker.get_history_kline(code, start, end, ktype=KLType.K_DAY) if lastest_date is not None: worker.storeservice.update_lastdate(code, 'hk', DateUtil.string_toDatetime(lastest_date)) e2 = time.time() logging.info( "fetching {} K_DAY listing_date: {} start: {} end:{} cost time {}".format(code, listing_date, start, end, e2-b2)) start = DateUtil.getDatetimeFutureStr(DateUtil.string_toDate(end),1) except StopIteration as e: print(e) break # KLType.K_5M start = None ld = worker.storeservice.find_lastdate2(code, 'hk_5m') lastdate = last_fetchdate if ld == None else ld if lastdate is not None and lastdate.date() > listing_date: start = DateUtil.getDatetimeFutureStr(lastdate.date(), 1) else: # if listing_date.year == 1970: # listing_date = listing_date.replace(year=1997) # start = DateUtil.date_toString(listing_date) start = DateUtil.date_toString(last_fetchdate) end = todayStr gen = DateUtil.getNextHalfYear(DateUtil.string_toDate(start), DateUtil.string_toDate(end)) b = time.time() while True: try: end = next(gen) if is_closing is True: break b1 = time.time() _, _, lastest_date = worker.get_history_kline(code, start, end, ktype=KLType.K_5M) if lastest_date is not None: worker.storeservice.update_lastdate(code, 'hk_5m', lastest_date) e1 = time.time() logging.info( "fetching {} K_5M_LINE listing_date:{} start: {} end:{} cost time {}".format(code, listing_date, start, end, e1 - b1)) start = DateUtil.getDatetimeFutureStr(DateUtil.string_toDate(end), 1) except StopIteration as e: print(e) break e = time.time() logging.info("position {} fetching {} const time {}".format(curr, code, e - b)) if is_closing is True: break end = time.time() logging.info("fetching for one period , cost time: {}".format((end - begin))) break
# content = content.partition('\n\n\n\n\n\n\n\n') # if content.__len__() == 3: # content = content[2].partition('\n\n\n\n\n\n\n\n') # if content.__len__() ==3 and content[0].startswith('.ct_hqimg'): # content = content[2] # connection.update({"_id": s['_id']}, {"$set": {"content": content}}) isDateCount = 0 for s in connection.find({}): url = str(s['href']) if isinstance(s['date'], str): date = None strDate = s['date'] try: strDate = strDate.replace(' </a>','') date = DateUtil.string_toDatetime(strDate) except: print('2') if date is not None: connection.update_one({"_id": s['_id']}, {"$set": {"date": date}}) else: try: date = DateUtil.string_toDatetime2(strDate) if date is not None: connection.update_one({"_id": s['_id']}, {"$set": {"date": date}}) else: connection.delete_one({"_id": s['_id']}) except:
def get_page(self,market, code,url): ret_code = -1 ret_data = '' self.itemArray = [] try: res = requests.get(url, timeout=60, headers={ 'Content-type': 'text/html;charset=gb2312' }) if res.encoding == 'ISO-8859-1': res.encoding = 'gbk' html = res.text # .encode(res.encoding) res.raise_for_status() if res.status_code == 200 : contentSoup = bs4.BeautifulSoup(html,'lxml') elems = contentSoup.select('#js_ggzx > li,.li_point > ul > li,.col02_22 > ul > li') for elem in elems: json = {} json['code'] = code temp = elem.__str__()[4:5] if (temp == '\n') and market == 'US': continue ele = elem.select('span') json['date'] = DateUtil.format_date(ele[0].getText()[1:-1]) s = json['date'] json['date'] = s.replace(' </a>', '') date = DateUtil.string_toDatetime(json['date']) json['date'] = date ele = elem.select('a') json['title'] = ele[len(ele)-1].getText() logger.info("date:{},title:{}".format(s, json['title'])) json['href'] = ele[len(ele)-1].attrs['href'] json['year'] = 'guess' ret,content = self.get_content(json['href'],'utf-8') # if ret != -1 : # time.sleep(4 * random.random()) if ret == 0 : json['content'] = content self.itemArray.append(json) ret_code = 0 ret_data = '' except Exception as err: #time.sleep(4 * random.random()) logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ConnectTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ReadTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.Timeout as err: logger.warning(err) ret_code = -1 ret_data = err except: logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds') time.sleep(random.random()) ret_code = -1 ret_data = '' finally: res.close() return ret_code,ret_data