예제 #1
0
    def get_futunn_news(self):

        for i in range(94471,94480,1):
            url = 'https://news.futunn.com/market/{0}?src=3'.format(i)

            urlExist = self.mongodbutil.urlIsExist(url)
            if urlExist:
                logger.info('This url:{} has existed'.format(url))
                continue

            json = {}
            header = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
            try:
                res = requests.get(url, headers=header, timeout=60)
                res.raise_for_status()
                if res.status_code == 200:
                    soup = bs4.BeautifulSoup(res.text, 'lxml')
                    elems = soup.select('.inner')
                    json['content']  = elems[0].getText()
                    elems = soup.select('.news-title > h1')
                    json['title'] = elems[0].getText()
                    elems = soup.select('.news-title > .timeBar')

                    pos = elems[0].getText().strip().find('2')
                    json['date'] = elems[0].getText().strip()[pos:pos+16]
                    json['href'] = url
                    json['code'] = ' '
                    json['year'] = DateUtil.string_toDatetime2(json['date']).year
                    json['sourcefrom'] = 'futunn'
                    self.itemArray.append(json)

                    if len(self.get_item_array()) > 50:
                        self.mongodbutil.insertItems(self.get_item_array())
                        logger.info("store items to mongodb ...")
                        self.clear_item_array()


            except Exception as err:
                #time.sleep(4 * random.random())
                logger.warning(err)
            except requests.exceptions.ConnectTimeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except requests.exceptions.ReadTimeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except requests.exceptions.Timeout as err:
                logger.warning(err)
                ret_code = -1
                ret_data = err
            except:
                logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
                time.sleep(random.random())
                ret_code = -1
                ret_data = ''
            finally:
                res.close()

        return 1, 'ok'
예제 #2
0
    def get_individual_news(self,market, code):
        ret_code = -1
        ret_data = ''
        self.itemArray = []

        url = "https://www.futunn.com/quote/stock-news?m={0}&code={1}".format(market.lower(),code.upper())

        try:
            header = {
                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',
                'Accept': 'application/json,text/javascript,*.*;q=0.01',
                'Origin': 'https://www.futunn.com',
                'Referer': 'https://www.futunn.com/quote/stock-info?m={0}&code={1}&type=finance_analyse'.format(market.lower(),code.upper)
            }
            res = requests.get(url, headers=header)
            if res.encoding == 'ISO-8859-1':
                res.encoding = 'gbk'
            html = res.text  # .encode(res.encoding)
            res.raise_for_status()
            if res.status_code == 200:
                contentSoup = bs4.BeautifulSoup(html, 'lxml')

                elems = contentSoup.select('.ulList02 >  ul > li')

                for elem in elems:
                    json = {}
                    json['code'] = code
                    json['market']  = market
                    json['title'] = elem.select('.txt01')[0].getText()
                    json['href'] = elem.select('.txt01 > a')[0]['href']
                    json['date'] = DateUtil.string_toDatetime2(elem.select('.bar01')[0].getText().strip()[3:])
                    json['year'] = json['date'].year
                    json['sourcefrom'] = 'futunn'

                    ret, content = self.get_content(json['href'],'utf-8')

                    # if ret != -1:
                    #     time.sleep(4 * random.random())

                    if ret == 0:
                        json['content'] = content
                        self.itemArray.append(json)


                ret_code = 0
                ret_data = ''
        except Exception as err:
            # time.sleep(4 * random.random())
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ConnectTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ReadTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.Timeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except:
            logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
            time.sleep(random.random())
            ret_code = -1
            ret_data = ''
        finally:
            res.close()
        return ret_code, ret_data
예제 #3
0
    def get_live_info(self):

        ret_code = -1
        ret_data = ''
        self.itemArray = []

        lasttime = DateUtil.string_toDatetime2('2019-05-01 09:00')

        try:
            res = requests.get(self.url)
            if res.encoding == 'ISO-8859-1':
                res.encoding = 'gbk'
            html = res.text  # .encode(res.encoding)
            res.raise_for_status()
            if res.status_code == 200 :
                    contentSoup = bs4.BeautifulSoup(html, 'lxml')
                    elems = contentSoup.find_all('a', class_='news-link')

                    for elem in elems:
                        json = {}
                        json['code'] = ' '


                        newstime = elem.select('span')
                        time = newstime[len(newstime) - 1].getText()
                        json['date'] = DateUtil.string_toDatetime2(time)
                        s = json['date']

                        if s < lasttime :
                            continue
                        else:
                            lasttime = s

                        h3 = elem.select('h3')
                        json['title'] = h3[len(h3) - 1].getText()

                        logger.info("date:{},title:{}".format(s, json['title']))
                        json['href'] = elem.attrs['href']
                        json['year'] = json['date'].year
                        json['sourcefrom'] = 'futunn'
                        ret,content = self.get_content(json['href'],'utf-8')
                        # if ret != -1 :
                        #     time.sleep(4 * random.random())

                        if ret == 0 :
                            json['content'] = content
                            self.itemArray.append(json)
                        ret_code = 0
                        ret_data = ''
        except Exception as err:
            #time.sleep(4 * random.random())
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ConnectTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.ReadTimeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except requests.exceptions.Timeout as err:
            logger.warning(err)
            ret_code = -1
            ret_data = err
        except:
            logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
            time.sleep(random.random())
            ret_code = -1
            ret_data = ''
        finally:
            res.close()
        return ret_code,ret_data
예제 #4
0
        url = str(s['href'])
        if isinstance(s['date'], str):
            date = None
            strDate = s['date']
            try:
                strDate = strDate.replace('  </a>','')
                date = DateUtil.string_toDatetime(strDate)
            except:
                print('2')


            if date is not None:
                connection.update_one({"_id": s['_id']}, {"$set": {"date": date}})
            else:
                try:
                    date = DateUtil.string_toDatetime2(strDate)

                    if date is not None:
                        connection.update_one({"_id": s['_id']}, {"$set": {"date": date}})
                    else:
                        connection.delete_one({"_id": s['_id']})
                except:
                    print('3')


        else:
            isDateCount = isDateCount+1
            if isDateCount > 10000:
                print(isDateCount)
                isDateCount = 0