def get_latest_news(self,top=5, show_content=True): ''' 功能: 即时新闻 参数说明: top:int,显示最新消息的条数,默认为80条 show_content:boolean,是否显示新闻内容,默认False 结果返回的数据属性说明如下: classify :新闻类别 title :新闻标题 time :发布时间 url :新闻链接 content:新闻内容(在show_content为True的情况下出现) ''' df = None try: if time.time() - AppConfig.latest_news_pulltime > 1000: top = 80 df = ts.get_latest_news(top, show_content) if df is None: logging.info('df is None') return table = 'ts2_latest_news' latest_pulltime = None pulltime = None dropindex = -1 df.sort_values(by="time", ascending=False) for i in range(0,len(df)): pulltime = df.iloc[i]['time'] pulltime = DateUtil.string_toTimestamp(DateUtil.format_date(pulltime)) if i == 0: latest_pulltime = pulltime if pulltime <= AppConfig.latest_news_pulltime: #remove dropindex = i print('dropindex:',dropindex) break if dropindex != -1: df = df.drop(range(dropindex,len(df),1)) if len(df) > 0 : self.storeservice.insert_many(table, df) AppConfig.write_news_pulltime(latest_pulltime,True) except IOError as err: logging.error("OS|error: {0}".format(err)) except OperationalError as err: logging.error("OS|error: {0}".format(err)) else: pass
def get_page(self,market, code,url): ret_code = -1 ret_data = '' self.itemArray = [] try: res = requests.get(url, timeout=60, headers={ 'Content-type': 'text/html;charset=gb2312' }) if res.encoding == 'ISO-8859-1': res.encoding = 'gbk' html = res.text # .encode(res.encoding) res.raise_for_status() if res.status_code == 200 : contentSoup = bs4.BeautifulSoup(html,'lxml') elems = contentSoup.select('#js_ggzx > li,.li_point > ul > li,.col02_22 > ul > li') for elem in elems: json = {} json['code'] = code temp = elem.__str__()[4:5] if (temp == '\n') and market == 'US': continue ele = elem.select('span') json['date'] = DateUtil.format_date(ele[0].getText()[1:-1]) s = json['date'] ele = elem.select('a') json['title'] = ele[len(ele)-1].getText() logger.info("date:{},title:{}".format(s, json['title'])) json['href'] = ele[len(ele)-1].attrs['href'] json['year'] = 'guess' ret,content = self.get_content(json['href'],'utf-8') # if ret != -1 : # time.sleep(4 * random.random()) if ret == 0 : json['content'] = content self.itemArray.append(json) ret_code = 0 ret_data = '' except Exception as err: #time.sleep(4 * random.random()) logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ConnectTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ReadTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.Timeout as err: logger.warning(err) ret_code = -1 ret_data = err except: logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds') time.sleep(random.random()) ret_code = -1 ret_data = '' finally: res.close() return ret_code,ret_data