Esempio n. 1
0
 def parse(self, response):
     auth = tweepy.OAuthHandler(self.consumer_key, self.consumer_secret)
     auth.set_access_token(self.access_token, self.access_token_secret)
     api = tweepy.API(auth)
     list = api.user_timeline(screen_name=self.tweetName, count=1, tweet_mode="extended", exclude_replies="true",
                              include_rts=1)
     jsonContent = list[0]
     jsonData = jsonContent._json
     try:
         content = jsonData['retweeted_status']['full_text']
     except:
         content = jsonData['full_text']
     title = content[0:15] + "..."
     created_at = jsonData['created_at'].replace("+0000 ", "")
     time_tuple = time.strptime(created_at, "%a %b %d %H:%M:%S %Y")
     ms = time.mktime(time_tuple) + 8 * 60 * 60
     dateTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(ms))
     Id = jsonData['id_str']
     url = "https://twitter.com/" + str(jsonData['user']['screen_name']) + "/status/" + str(Id)
     item = SecondBaseNoticeItem()
     item['name'] = 'poloniex'
     item['resource'] = 'poloniex.com'
     item['url'] = url
     item['title'] = title
     item['main'] = content
     item['time'] = dateTime
     logging.log(logging.INFO, '[Poloniex] Get item:', item)
     yield item
Esempio n. 2
0
 def parse(self, response):
     soup = BeautifulSoup(response.text, 'html.parser')
     # print(soup)
     div = soup.find('div', attrs={"id": "primary-fullwidth"})
     url = div.article.div.a.get("href")  # 文章url
     head = {
         'User-Agent':
         "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0"
     }
     html = requests.get(url=url, headers=head, timeout=20)
     soup = BeautifulSoup(html.text, 'html.parser')
     div = soup.find('div', attrs={"id": "primary-left"})
     title = div.h3.get_text().strip()  # 标题
     dateTime = div.find("li", attrs={
         "class": "posted-date"
     }).get_text().strip()  # 日期
     content = div.find("div", attrs={
         "class": "entry-content"
     }).get_text().strip()  # 内容
     item = SecondBaseNoticeItem()
     item['name'] = 'Bithumb'
     item['resource'] = 'bithumb.cafe'
     item['url'] = url
     item['time'] = dateTime
     item['title'] = title
     item['main'] = content
     yield item
Esempio n. 3
0
 def parse(self, response):
     cookie = response.headers['Set-Cookie']
     cookie = str(cookie, encoding="utf-8")
     print(cookie)
     Head = {
         'Cookie': cookie.replace("domain=allcoin.ca,", ""),
         'User-Agent': self.UserAgent,
     }
     html = requests.get(self.url, headers=Head, verify=False)
     soup = BeautifulSoup(html.text, 'html.parser')
     li = soup.find('li', attrs={"class": "hideli"})
     # print(li.get_text())
     # print(li.a.get("href"))
     url = self.mainUrl + li.a.get("href")  # 文章url
     html = requests.get(url, headers=Head, verify=False)
     soup = BeautifulSoup(html.text, 'html.parser')
     div = soup.find('div', attrs={"class": "newsarea_box"})
     title = div.h2.get_text().strip()  # 标题
     dateTime = div.p.get_text().strip()  # 日期
     content = div.find("div", attrs={
         "class": "paragraph"
     }).get_text().strip()
     item = SecondBaseNoticeItem()
     item['name'] = 'Bcex'
     item['resource'] = 'bcex.top'
     item['url'] = url
     item['time'] = dateTime
     item['title'] = title
     item['main'] = content
     yield item
Esempio n. 4
0
    def parse_item(self, response):
        print("parse_item开始爬")
        doc = pq(response.body.decode('utf8'))
        posts = doc('.article-list').items()
        post = list(posts)[0]
        item = SecondBaseNoticeItem()
        item['name'] = 'BigOne'
        item['resource'] = 'big.one'

        item['url'] = self.base_url + post('li a').attr('href')

        doc_detail = pq(requests.get(item['url']).text)
        item['title'] = doc_detail('.article-header h1').text()
        item['main'] = doc_detail('.article-body').text()

        date = doc_detail('.meta-data time').attr('datetime')

        item['time'] = utc2local(date).strftime("%Y-%m-%d %H:%M:%S")

        logging.log(logging.DEBUG, '[BITFINE] Get item:', item)
        print("yield item1之前")
        yield item
        print("yield item1之后")

        doc = pq(response.body.decode('utf8'))
        posts = doc('.article-list').items()
        post = list(posts)[1]
        item = SecondBaseNoticeItem()
        item['name'] = 'BigOne'
        item['resource'] = 'big.one'

        item['url'] = self.base_url + post('li a').attr('href')

        doc_detail = pq(requests.get(item['url']).text)
        item['title'] = doc_detail('.article-header h1').text()
        item['main'] = doc_detail('.article-body').text()

        date = doc_detail('.meta-data time').attr('datetime')
        # year, mon, day, hour, minit,second = re.search('(\d+).*?(\d+).*?(\d+).*?(\d+).*?(\d+)', date).groups()

        item['time'] = utc2local(date).strftime("%Y-%m-%d %H:%M:%S")

        logging.log(logging.DEBUG, '[BITFINE] Get item:', item)
        print("yield item2之前")
        yield item
        print("yield item2之后")
Esempio n. 5
0
 def parse(self, response):
     data = json.loads(response.body.decode('utf8'))
     id = data['datas']['titelList'][0]['id']
     detail_resp = requests.get(self.detail_url.format(id=id))
     doc = pq(detail_resp.text)
     item = SecondBaseNoticeItem()
     item['name'] = 'exx'
     item['resource'] = 'exx.com'
     item['url'] = self.detail_url.format(id=id)
     title = doc('#blog h2')
     item['time'] = doc('#blog .time').text().split(':', 1)[-1]
     title.remove('p')
     item['title'] = title.text()
     item['main'] = doc('#blog .blog-info').text()
     yield item
Esempio n. 6
0
    def parse(self, response):
        doc = pq(response.body.decode('utf8'))
        notice_url = list(doc('#lcontentnews a').items())[0]
        detail_resp = requests.get(self.base_url + notice_url.attr('href'),
                                   headers=self.headers)
        doc = pq(detail_resp.text)

        item = SecondBaseNoticeItem()
        item['name'] = 'gate'
        item['resource'] = 'gateio.io'
        item['url'] = self.base_url + notice_url.attr('href')
        item['title'] = doc('.dtl-title').text()
        item['time'] = doc('.new-dtl-info span').text()
        main = doc('.dtl-content')
        main.remove('style').remove('#snsshare').remove('ul')
        item['main'] = main.text()
        yield item
Esempio n. 7
0
    def parse_notice(self, response):
        doc = pq(response.body.decode('utf8'))
        notice = list(doc('.article-list li').items())[0]
        detail_url = notice('a').attr('href')
        notice_detail = pq(self.base_url + detail_url)

        item = SecondBaseNoticeItem()
        item['name'] = 'okex'
        item['resource'] = 'okex.com'
        item['url'] =  self.base_url + detail_url
        date=notice_detail('.meta-data time').attr('datetime')

        item['time'] =utc2local(date).strftime("%Y-%m-%d %H:%M:%S")

        item['title'] = notice_detail('.article-title').text().replace('\n', '').replace('\'', '')
        item['main'] = notice_detail('.article-body').text()
        yield item
Esempio n. 8
0
 def parse(self, response):
     soup = BeautifulSoup(response.text, 'html.parser')
     ul = soup.find('ul', attrs={"id": "newList"})
     url = self.mainUrl + ul.li.a.get("href")  # 文章url
     html = requests.get(url, verify=False)
     soup = BeautifulSoup(html.text, 'html.parser')
     div = soup.find('div', attrs={"id": "product"})
     title = div.h2.get_text().strip()  # 标题
     dateTime = div.p.get_text().strip()  # 日期
     content = div.find("div", attrs={"class": "paragraph"}).get_text().strip()
     item = SecondBaseNoticeItem()
     item['name'] = 'Digifinex'
     item['resource'] = 'digifinex.com'
     item['url'] = url
     item['time'] = dateTime
     item['title'] = title
     item['main'] = content
     yield item
Esempio n. 9
0
    def title_parse(self, response):
        """
        Response test is json contains:
            {
                code: 200,
                data: [
                        {
                        id: 111,
                        title: "gochain(GO)现已上线币系",
                        enTitle: "gochain(GO) is listed on Bilaxy",
                        top: false,
                        createTime: 1528563029000,
                        },

                    ],
                totalCount: 46,
                page: 0,
                msg: null,
                id: null,
            }
        :param response:
        :return:
        """
        l_last = json.loads(response.text)['data'][0]
        tid = l_last['id']
        item = SecondBaseNoticeItem()
        item['name'] = 'Bilaxy'
        item['resource'] = 'bilaxy.com'
        item['url'] = 'https://bilaxy.com/news/detail?id=%s' % tid
        real_url = 'https://bilaxy.com/api/v1/articleDetail?id=%s' % tid
        res_detail = requests.get(real_url).text
        detail = json.loads(res_detail)['data']['content']

        # 此时返回所有title和该id的content
        item['title'] = detail['title']
        content = detail['content'].replace('<br />',
                                            '').replace('\t', '').replace(
                                                '&nbsp;', '')
        item['main'] = re.compile('<[^>]+>').sub("", content)
        x = time.localtime(detail['createTime'] / 1000)
        str_time = time.strftime('%Y-%m-%d %H:%M:%S', x)
        item['time'] = str_time
        logging.log(logging.DEBUG, '[BILAXY] Get item:', item)
        yield item
Esempio n. 10
0
 def detail_parse(self, response):
     """
     Response test is json contains:
        {
         msg: null,
         code: 0,
         data:
             {
             totalCount: 23,
             list: [
                     {
                     id: 99,
                     userName: null,
                     language: 1,
                     title: "关于币客BKEX 6月20日BKK赠送和分红公告",
                     content: "<table> <tbody> <tr> <td width="194"> <p>昨日平台总成交额</p>..."
                     sorting: 1,
                     createTime: 1529566192338,
                     updateTime: null,
                     status: 1,
                     tag: null,
                     readednum: 0,
                     },
                     ...
                 ]
             }
         }
     :param response:
     :return:
     """
     l_last = json.loads(response.text)['data']['list'][0]
     tid = l_last['id']
     item = SecondBaseNoticeItem()
     item['name'] = 'bkex'
     item['resource'] = 'bkex.com'
     item['url'] = 'https://www.bkex.com/#/notice/detail/%s' % tid
     item['title'] = l_last['title']
     content = l_last['content'].replace('\t', '').replace('&nbsp;', '')
     item['main'] = re.compile('<[^>]+>').sub("", content)
     x = time.localtime(l_last['createTime'] // 1000)
     str_time = time.strftime('%Y-%m-%d %H:%M:%S', x)
     item['time'] = str_time
     logging.log(logging.DEBUG, '[BKEX] Get item:', item)
     yield item
Esempio n. 11
0
    def parse_item(self, response):
        doc = pq(response.body.decode('utf8'))
        posts = doc('main div.news-list').items()
        post = list(posts)[0]

        item = SecondBaseNoticeItem()
        item['name'] = 'Coinw'
        item['resource'] = 'coinw.me'

        item['url'] = self.base_url + post('a.link-1').attr('href')

        headers = {
            "Accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
            "Accept-Encoding":
            "gzip, deflate, br",
            "Accept-Language":
            "zh-CN,zh;q=0.9",
            "Cache-Control":
            "max-age=0",
            "Connection":
            "keep-alive",
            "Cookie":
            "__cdnuid=f8fb4bffaf1f60209e71c09fe9bcfc55; Hm_lvt_525b7a4b6599566fc46ec53565d28557=1528446808; JSESSIONID=01C71D91925EAF16C954EEBD5764F891; Hm_lpvt_525b7a4b6599566fc46ec53565d28557=1528450115",
            "Host":
            "www.coinw.me",
            "Upgrade-Insecure-Requests":
            "1",
            "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36",
        }

        doc_detail = pq(requests.get(item['url'], headers=headers).text)
        item['title'] = doc_detail('.news-title h3').text()
        item['main'] = doc_detail('.news-article').text()
        date = doc_detail('div.news-title.ta-c.mb20 p span').text()
        year, mon, day, hour, mint, second = re.search(
            '(\d+).*?(\d+).*?(\d+).*?(\d+).*?(\d+).*?(\d+)', date).groups()
        item['time'] = "%s-%s-%s %s:%s:%s" % (year, mon, day, hour, mint,
                                              second)
        logging.log(logging.DEBUG, '[BITFINE] Get item:', item)
        yield item
Esempio n. 12
0
    def parse_notice(self, response):
        response_json = json.loads(response.body.decode('utf8'))

        if not response_json['data']:
            return

        notice = response_json['data'][0]

        item = SecondBaseNoticeItem()
        item['name'] = 'CoinEgg'
        item['resource'] = 'coinegg.com'
        item['title'] = notice['title']
        item['url'] = self.base_url + notice['category'] + str(notice['id'])

        doc_detail = pq(requests.get(item['url']).text)
        item['main'] = doc_detail('.gonggao-con').text()
        date = doc_detail('div.gonggao p.p2').text()
        year, mon, day, hour, mint, second = re.search('(\d+).*?(\d+).*?(\d+).*?(\d+).*?(\d+).*?(\d+)', date).groups()
        item['time'] = "%s-%s-%s %s:%s:%s" % (year, mon, day, hour, mint, second)
        yield item
Esempio n. 13
0
    def parse_item(self, response):
        doc = pq(response.body.decode('utf8'))
        posts = doc('.panel-body .span6 a').items()
        post = list(posts)[0]
        item = SecondBaseNoticeItem()
        item['name'] = 'TOPBTC'
        item['resource'] = 'topbtc.com'

        item['url'] = self.base_url + post.attr('href')

        date = post('#ctime').text()
        year, mon, day = re.search('(\d+).*?(\d+).*?(\d+)', date).groups()
        item['time'] = "%s-%s-%s" % (year, mon, day)
        headers = {"Accept-Language": "zh-CN,zh;q=0.9"}
        doc_detail = pq(requests.get(item['url'], headers=headers).text)
        item['title'] = doc_detail('.span12 .panel .panel-heading span').text()
        item['main'] = doc_detail('.newsbody').text()

        logging.log(logging.DEBUG, '[BITFINE] Get item:', item)
        yield item
Esempio n. 14
0
    def parse_item(self, response):
        doc = pq(response.body.decode('utf8'))
        posts = doc('.table').items()
        post=list(posts)[0]
        item = SecondBaseNoticeItem()
        item['name'] = 'CEX.COM'
        item['resource'] = 'cex.plus'

        item['url'] = self.base_url+post('td a.abs-hover').attr('href')

        date = post('.date').text()
        year, mon, day,hour, minit,second= re.search('(\d+).*?(\d+).*?(\d+).*?(\d+).*?(\d+).*?(\d+)', date).groups()
        item['time'] = "%s-%s-%s %s:%s:%s" % (year, mon, day,hour, minit,second)

        doc_detail=pq(requests.get(item['url']).text)
        item['title']=doc_detail('.detail h1').text()
        item['main'] =doc_detail('.txt').text()

        logging.log(logging.DEBUG, '[BITFINE] Get item:', item)
        yield item
Esempio n. 15
0
    def parse_item(self, response):
        doc = pq(response.body.decode('utf8'))
        posts = doc('.cbp_tmtimeline li').items()
        post = list(posts)[0]
        item = SecondBaseNoticeItem()
        item['name'] = 'ZB'
        item['resource'] = 'zb.com'

        item['url'] = self.base_url + post('.envor-post header h3 a').attr(
            'href')

        item['title'] = post('.envor-post header h3 a').text()
        headers = {
            "Accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
            "Accept-Encoding":
            "gzip, deflate, br",
            "Accept-Language":
            "zh-CN,zh;q=0.9",
            "Connection":
            "keep-alive",
            "Host":
            "www.bitkk.com",
            "Upgrade-Insecure-Requests":
            "1",
            "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36",
        }
        doc_detail = pq(requests.get(item['url'], headers=headers).text)
        item['main'] = doc_detail('.page-content').text()

        date = doc_detail('p.align-center span').text()
        print(date)
        year, mon, day, hour, minit = re.search(
            r'(\d+).*?(\d+).*?(\d+).*?(\d+).*?(\d+)', date).groups()

        item['time'] = "%s-%s-%s %s:%s:00" % (year, mon, day, hour, minit)

        logging.log(logging.DEBUG, '[BITFINE] Get item:', item)
        yield item
Esempio n. 16
0
 def parse(self, response):
     soup = BeautifulSoup(response.text, 'html.parser')
     div = soup.find('div', attrs={"id": "indexnewsList"})
     url = self.base_url + div.p.a.get("href")  # 文章url
     html = requests.get(url, verify=False, timeout=20)
     soup = BeautifulSoup(html.text, 'html.parser')
     div = soup.find('div', attrs={"class": "article-leftbg"})
     title = div.h2.get_text().strip()  # 标题
     dateTime = div.find("div", attrs={
         "class": "article-info"
     }).span.get_text().strip().split(": ")[1]  # 日期
     content = div.find("div", attrs={
         "class": "article-content"
     }).get_text().strip()  # 内容
     item = SecondBaseNoticeItem()
     item['name'] = 'Oex'
     item['resource'] = 'oex.top'
     item['url'] = url
     item['time'] = dateTime
     item['title'] = title
     item['main'] = content
     yield item
Esempio n. 17
0
 def parse(self, response):
     soup = BeautifulSoup(response.text, 'html.parser')
     # print(soup)
     article = soup.find('article')
     # print(html.text)
     url = article.h2.a.get("href")  # 文章url
     html = requests.get(url, headers=self.Head, verify=False, timeout=20)
     soup = BeautifulSoup(html.text, 'html.parser')
     article = soup.find('article')
     title = article.h1.get_text().strip()  # 标题
     dateTime = article.time.get_text().strip()  # 日期
     content = article.find("div", attrs={
         "class": "entry-content"
     }).get_text().strip()  # 内容
     item = SecondBaseNoticeItem()
     item['name'] = 'Hitbtc'
     item['resource'] = 'hitbtc.com'
     item['url'] = url
     item['time'] = dateTime
     item['title'] = title
     item['main'] = content
     yield item
Esempio n. 18
0
    def parse_notice(self, response):
        response_json = json.loads(response.body.decode('utf8'))

        if not response_json['success']:
            return

        notice = response_json['data']['list'][0]
        item = SecondBaseNoticeItem()
        item['name'] = 'Upbit'
        item['resource'] = 'upbit.com'
        item['title'] = notice['title']
        item['url'] = self.base_url+str(notice['id'])
        item['time'] = han2zhong(notice['updated_at']).strftime("%Y-%m-%d %H:%M:%S")
        response_str = requests.get(self.info_url+str(notice['id']), verify=False)
        content = response_str.content.decode('utf-8')
        content = json.loads(content)
        if not content['success']:
            return
        body=content['data']['body']
        result, number = re.subn(r"<a.*?>", '' , body)
        result, number = re.subn(r"</a>", '' , result)
        item['main'] =result
        yield item
Esempio n. 19
0
 def parse(self, response):
     soup = BeautifulSoup(response.text, 'html.parser')
     # print(soup)
     a = soup.find('a', attrs={"class": "msgLink"})
     url = self.mainUrl + a.get("href")  # 文章url
     html = requests.get(url, headers=self.Head, verify=False, timeout=20)
     soup = BeautifulSoup(html.text, 'html.parser')
     div = soup.find('div', attrs={"class": "msgContainer"})
     title = div.h3.get_text().strip()  # 标题
     dateTime = div.find("p", attrs={
         "class": "msgTime"
     }).get_text().strip()  # 日期
     content = div.find("div", attrs={
         "class": "via-article"
     }).get_text().strip()  # 内容
     item = SecondBaseNoticeItem()
     item['name'] = 'Coinex'
     item['resource'] = 'coinex.com'
     item['url'] = url
     item['time'] = dateTime
     item['title'] = title
     item['main'] = content
     yield item
Esempio n. 20
0
 def parse(self, response):
     soup = BeautifulSoup(response.text, 'html.parser')
     # print(soup)
     div = soup.find('div', attrs={"class": "u-paddingTop30"})
     url = div.a.get("href")  # 文章url
     dateTime = div.time.get("datetime").strip()  # 日期
     # print(url)
     # https://blog.coinbase.com/announcing-a-new-way-to-spend-your-coinbase-crypto-e-gift-cards-59687ff77c13?source=collection_home---5------0----------------
     html = requests.get(url, verify=False, timeout=20)
     soup = BeautifulSoup(html.text, 'html.parser')
     div = soup.find('div', attrs={"class": "postArticle-content"})
     title = div.h1.get_text().strip()  # 标题
     content = ""
     for c in div.findAll("p", attrs={"class": "graf--p"}):
         content = content + c.get_text().strip()  # 内容
     item = SecondBaseNoticeItem()
     item['name'] = 'Coinbase'
     item['resource'] = 'coinbase.com'
     item['url'] = url
     item['time'] = dateTime
     item['title'] = title
     item['main'] = content
     yield item
Esempio n. 21
0
    def parse(self, response):
        doc = pq(response.body.decode('utf8'))
        notice = list(doc('.article-list li').items())[0]
        detail_url = notice('a').attr('href')
        notice_detail = pq(self.base_url + detail_url)

        item = SecondBaseNoticeItem()
        item['name'] = 'binance'
        item['resource'] = 'binance.com'
        item['url'] = self.base_url + detail_url
        date = notice_detail('.meta-data time').attr('datetime')
        print(date)
        # year, mon, day, hour, minit,second = re.search('(\d+).*?(\d+).*?(\d+).*?(\d+).*?(\d+)', date).groups()

        item['time'] = utc2local(date).strftime("%Y-%m-%d %H:%M:%S")
        title = notice_detail('.article-title').text().replace('\n', '').replace('\'', '')
        item['title'] = title
        details = notice_detail('.article-body').text()
        item['main'] = details
        ls = ['上线', '上市']
        for l in ls:
            if l in title:
                sTime = re.search('将于(\d+)年(\d+)月(\d+)日(.*?)(\d+):(\d+)(.*?)上线(.*?),', details)
                coinName = sTime.group(8)
                coinTime = "%s-%s-%s %s-%s-00" % (sTime.group(1), sTime.group(2), sTime.group(3), sTime.group(5), sTime.group(6))
                data = {
                    "shop": 'binance',
                    "coinName": coinName,
                    "dateTime": coinTime,
                    "content": details,
                }
                a = requests.post(
                    'http://47.75.122.224/filterApi.php?insertTweet=True', data=data
                )
                print(a.text)
                break
        yield item
Esempio n. 22
0
 def detail_parse(self, response):
     """
     Response test is json contains:
        [
             {
                 "id": 100114,
                 "createdAt": 1529136845456,
                 "updatedAt": 1529171287662,
                 "columnId": 5,
                 "languageId": 1,
                 "beginTime": 1529107200000,
                 "endTime": 1556668800000,
                 "sortsId": 9945,
                 "userId": 100020,
                 "content": "<p style=\"line-height: 2em;\">雅黑, &quot;Microsoft YaHei&quot;..."
                 "title": "【公告】币为Btcdo已完成更换USDT合约",
                 "status": 1
             }
         ]
     :param response:
     :return:
     """
     l_last = json.loads(response.text)[0]
     tid = l_last['id']
     item = SecondBaseNoticeItem()
     item['name'] = 'btcdo'
     item['resource'] = 'btcdo.com'
     item[
         'url'] = 'https://www.btcdo.com/index/notice/noticeDetail?id=%s' % tid
     item['title'] = l_last['title']
     content = l_last['content'].replace('\t', '').replace('&nbsp;', '')
     item['main'] = re.compile('<[^>]+>').sub("", content)
     x = time.localtime(l_last['beginTime'] // 1000)
     str_time = time.strftime('%Y-%m-%d %H:%M:%S', x)
     item['time'] = str_time
     logging.log(logging.DEBUG, '[BTCDO] Get item:', item)
     yield item
Esempio n. 23
0
    def parse_notice(self, response):
        response_json = json.loads(response.body.decode('utf8'))

        if not response_json['success']:
            return

        notice = response_json['data']['items']
        toplist = []
        for data in notice:
            if data['topNotice']:
                toplist.append(data)
        notice = toplist[0]

        item = SecondBaseNoticeItem()
        item['name'] = 'HADAX'
        item['resource'] = 'hadax.com'
        item['title'] = notice['title']
        item['url'] = self.base_url + str(notice['id'])
        timestamp = notice['created'] / 1000
        timestamp = int(float(timestamp))
        item['time'] = time.strftime('%Y-%m-%d %H:%M:%S',
                                     time.localtime(timestamp))

        response_str = requests.get(self.info_url + str(notice['id']))
        content = response_str.content.decode('utf-8')
        content = json.loads(content)
        if not content['success']:
            return
        item['main'] = pq(content['data']['content']).text()
        print("第一个item")
        yield item

        # 非置顶-------------------------------------------------------
        response_json = json.loads(response.body.decode('utf8'))

        if not response_json['success']:
            return

        notice = response_json['data']['items']
        nottoplist = []
        for data in notice:
            if not data['topNotice']:
                nottoplist.append(data)
        notice = nottoplist[0]

        item = SecondBaseNoticeItem()
        item['name'] = 'HADAX'
        item['resource'] = 'hadax.com'
        item['title'] = notice['title']
        item['url'] = self.base_url + str(notice['id'])
        timestamp = notice['created'] / 1000
        timestamp = int(float(timestamp))
        item['time'] = time.strftime('%Y-%m-%d %H:%M:%S',
                                     time.localtime(timestamp))

        response_str = requests.get(self.info_url + str(notice['id']))
        content = response_str.content.decode('utf-8')
        content = json.loads(content)
        if not content['success']:
            return
        item['main'] = pq(content['data']['content']).text()
        print("第二个item")
        yield item
Esempio n. 24
0
    def parse_notice(self, response):
        response_json = json.loads(response.body.decode('utf8'))
        if not response_json['success']:
            return

        notice = response_json['data']['items']
        toplist = []
        for data in notice:
            if data['topNotice']:
                toplist.append(data)
        notice = toplist[0]

        item = SecondBaseNoticeItem()
        item['name'] = 'huobipro'
        item['resource'] = 'huobipro.com'
        item['title'] = notice['title']
        item['url'] = self.base_url+str(notice['id'])
        timestamp = notice['created'] / 1000
        timestamp =  int(float(timestamp))
        item['time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp))

        response_str = requests.get(self.notice_detail_url + str(notice['id']))

        content = response_str.content.decode('utf-8')
        content = json.loads(content)
        if not content['success']:
            return
        details = pq(content['data']['content']).text()
        item['main'] = details
        print("第一个item")
        ls = ['上线', '全球首发']
        for l in ls:
            if l in notice['title']:
                c = notice['title'].split(l)
                sTime = re.search(r'(\d+)月(\d+)日(\d+):(\d+)', c[0])
                coinName = c[1]
                coinTime = "2018-%s-%s %s-%s-00" % (sTime.group(1), sTime.group(2), sTime.group(3), sTime.group(4))
                data = {
                    "shop": 'huobipro',
                    "coinName": coinName,
                    "dateTime": coinTime,
                    "content": details,
                }
                requests.post(
                    'http://47.75.122.224/filterApi.php?insertTweet=True', data=data
                )
                break
        yield item
        # --------------------------------------------------------------------
        response_json = json.loads(response.body.decode('utf8'))
        if not response_json['success']:
            return

        notice = response_json['data']['items']
        nottoplist = []
        for data in notice:
            if not data['topNotice']:
                nottoplist.append(data)
        notice = nottoplist[0]

        item = SecondBaseNoticeItem()
        item['name'] = 'huobipro'
        item['resource'] = 'huobipro.com'
        item['title'] = notice['title']
        item['url'] = self.base_url + str(notice['id'])
        timestamp = notice['created'] / 1000
        timestamp = int(float(timestamp))
        item['time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp))

        response_str = requests.get(self.notice_detail_url + str(notice['id']))

        content = response_str.content.decode('utf-8')
        content = json.loads(content)
        if not content['success']:
            return
        details = pq(content['data']['content']).text()
        item['main'] = details
        print("第二个item")
        ls = ['上线', '全球首发']
        for l in ls:
            if l in notice['title']:
                c = notice['title'].split(l)
                sTime = re.search(r'(\d+)月(\d+)日(\d+):(\d+)', c[0])
                coinName = c[1]
                coinTime = "2018-%s-%s %s-%s-00" % (sTime.group(1), sTime.group(2), sTime.group(3), sTime.group(4))
                data = {
                    "shop": 'huobipro',
                    "coinName": coinName,
                    "dateTime": coinTime,
                    "content": details,
                }
                requests.post(
                    'http://47.75.122.224/filterApi.php?insertTweet=True', data=data
                )
                break
        yield item