예제 #1
0
    def get_latest_news(self,top=5, show_content=True):
        '''
        功能:
            即时新闻


        参数说明:
            top:int,显示最新消息的条数,默认为80条
            show_content:boolean,是否显示新闻内容,默认False

        结果返回的数据属性说明如下:

            classify :新闻类别
            title :新闻标题
            time :发布时间
            url :新闻链接
            content:新闻内容(在show_content为True的情况下出现)

        '''
        df = None
        try:
            if time.time() - AppConfig.latest_news_pulltime > 1000:
                top = 80
            df = ts.get_latest_news(top, show_content)
            if df is None:
                logging.info('df is None')
                return
            table = 'ts2_latest_news'
            latest_pulltime = None
            pulltime = None
            dropindex = -1
            df.sort_values(by="time", ascending=False)
            for i in range(0,len(df)):
                pulltime = df.iloc[i]['time']
                pulltime = DateUtil.string_toTimestamp(DateUtil.format_date(pulltime))
                if i == 0:
                    latest_pulltime = pulltime
                if pulltime <= AppConfig.latest_news_pulltime:
                    #remove
                    dropindex = i
                    print('dropindex:',dropindex)
                    break
            if dropindex != -1:
                df = df.drop(range(dropindex,len(df),1))
            if len(df) > 0 :
                self.storeservice.insert_many(table, df)
                AppConfig.write_news_pulltime(latest_pulltime,True)
        except IOError as err:
            logging.error("OS|error: {0}".format(err))
        except OperationalError as err:
            logging.error("OS|error: {0}".format(err))
        else:
            pass
예제 #2
0
    def get_page(self,market, code,url):

        ret_code = -1
        ret_data = ''
        self.itemArray = []

        try:
            res = requests.get(url, timeout=60, headers={
                'Content-type': 'text/html;charset=gb2312'
            })
            if res.encoding == 'ISO-8859-1':
                res.encoding = 'gbk'
            html = res.text  # .encode(res.encoding)
            res.raise_for_status()
            if res.status_code == 200 :
                    contentSoup = bs4.BeautifulSoup(html,'lxml')
                    elems = contentSoup.select('#js_ggzx > li,.li_point > ul > li,.col02_22 > ul > li')
                    for elem in elems:
                        json = {}
                        json['code'] = code
                        temp = elem.__str__()[4:5]
                        if (temp == '\n') and market == 'US':
                            continue
                        ele = elem.select('span')
                        json['date'] = DateUtil.format_date(ele[0].getText()[1:-1])
                        s = json['date']
                        ele = elem.select('a')
                        json['title'] = ele[len(ele)-1].getText()
                        logger.info("date:{},title:{}".format(s, json['title']))
                        json['href'] = ele[len(ele)-1].attrs['href']
                        json['year'] = 'guess'
                        ret,content = self.get_content(json['href'],'utf-8')
                        # if ret != -1 :
                        #     time.sleep(4 * random.random())

                        if ret == 0 :
                            json['content'] = content
                            self.itemArray.append(json)
                        ret_code = 0
                        ret_data = ''
        except Exception as err:
            #time.sleep(4 * random.random())
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ConnectTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ReadTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.Timeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except:
            logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
            time.sleep(random.random())
            ret_code = -1
            ret_data = ''
        finally:
            res.close()
        return ret_code,ret_data