def get_futunn_news(self): for i in range(94471,94480,1): url = 'https://news.futunn.com/market/{0}?src=3'.format(i) urlExist = self.mongodbutil.urlIsExist(url) if urlExist: logger.info('This url:{} has existed'.format(url)) continue json = {} header = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'} try: res = requests.get(url, headers=header, timeout=60) res.raise_for_status() if res.status_code == 200: soup = bs4.BeautifulSoup(res.text, 'lxml') elems = soup.select('.inner') json['content'] = elems[0].getText() elems = soup.select('.news-title > h1') json['title'] = elems[0].getText() elems = soup.select('.news-title > .timeBar') pos = elems[0].getText().strip().find('2') json['date'] = elems[0].getText().strip()[pos:pos+16] json['href'] = url json['code'] = ' ' json['year'] = DateUtil.string_toDatetime2(json['date']).year json['sourcefrom'] = 'futunn' self.itemArray.append(json) if len(self.get_item_array()) > 50: self.mongodbutil.insertItems(self.get_item_array()) logger.info("store items to mongodb ...") self.clear_item_array() except Exception as err: #time.sleep(4 * random.random()) logger.warning(err) except requests.exceptions.ConnectTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ReadTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.Timeout as err: logger.warning(err) ret_code = -1 ret_data = err except: logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds') time.sleep(random.random()) ret_code = -1 ret_data = '' finally: res.close() return 1, 'ok'
def get_individual_news(self,market, code): ret_code = -1 ret_data = '' self.itemArray = [] url = "https://www.futunn.com/quote/stock-news?m={0}&code={1}".format(market.lower(),code.upper()) try: header = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36', 'Accept': 'application/json,text/javascript,*.*;q=0.01', 'Origin': 'https://www.futunn.com', 'Referer': 'https://www.futunn.com/quote/stock-info?m={0}&code={1}&type=finance_analyse'.format(market.lower(),code.upper) } res = requests.get(url, headers=header) if res.encoding == 'ISO-8859-1': res.encoding = 'gbk' html = res.text # .encode(res.encoding) res.raise_for_status() if res.status_code == 200: contentSoup = bs4.BeautifulSoup(html, 'lxml') elems = contentSoup.select('.ulList02 > ul > li') for elem in elems: json = {} json['code'] = code json['market'] = market json['title'] = elem.select('.txt01')[0].getText() json['href'] = elem.select('.txt01 > a')[0]['href'] json['date'] = DateUtil.string_toDatetime2(elem.select('.bar01')[0].getText().strip()[3:]) json['year'] = json['date'].year json['sourcefrom'] = 'futunn' ret, content = self.get_content(json['href'],'utf-8') # if ret != -1: # time.sleep(4 * random.random()) if ret == 0: json['content'] = content self.itemArray.append(json) ret_code = 0 ret_data = '' except Exception as err: # time.sleep(4 * random.random()) logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ConnectTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ReadTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.Timeout as err: logger.warning(err) ret_code = -1 ret_data = err except: logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds') time.sleep(random.random()) ret_code = -1 ret_data = '' finally: res.close() return ret_code, ret_data
def get_live_info(self): ret_code = -1 ret_data = '' self.itemArray = [] lasttime = DateUtil.string_toDatetime2('2019-05-01 09:00') try: res = requests.get(self.url) if res.encoding == 'ISO-8859-1': res.encoding = 'gbk' html = res.text # .encode(res.encoding) res.raise_for_status() if res.status_code == 200 : contentSoup = bs4.BeautifulSoup(html, 'lxml') elems = contentSoup.find_all('a', class_='news-link') for elem in elems: json = {} json['code'] = ' ' newstime = elem.select('span') time = newstime[len(newstime) - 1].getText() json['date'] = DateUtil.string_toDatetime2(time) s = json['date'] if s < lasttime : continue else: lasttime = s h3 = elem.select('h3') json['title'] = h3[len(h3) - 1].getText() logger.info("date:{},title:{}".format(s, json['title'])) json['href'] = elem.attrs['href'] json['year'] = json['date'].year json['sourcefrom'] = 'futunn' ret,content = self.get_content(json['href'],'utf-8') # if ret != -1 : # time.sleep(4 * random.random()) if ret == 0 : json['content'] = content self.itemArray.append(json) ret_code = 0 ret_data = '' except Exception as err: #time.sleep(4 * random.random()) logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ConnectTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ReadTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.Timeout as err: logger.warning(err) ret_code = -1 ret_data = err except: logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds') time.sleep(random.random()) ret_code = -1 ret_data = '' finally: res.close() return ret_code,ret_data
url = str(s['href']) if isinstance(s['date'], str): date = None strDate = s['date'] try: strDate = strDate.replace(' </a>','') date = DateUtil.string_toDatetime(strDate) except: print('2') if date is not None: connection.update_one({"_id": s['_id']}, {"$set": {"date": date}}) else: try: date = DateUtil.string_toDatetime2(strDate) if date is not None: connection.update_one({"_id": s['_id']}, {"$set": {"date": date}}) else: connection.delete_one({"_id": s['_id']}) except: print('3') else: isDateCount = isDateCount+1 if isDateCount > 10000: print(isDateCount) isDateCount = 0