def parse(self, response): auth = tweepy.OAuthHandler(self.consumer_key, self.consumer_secret) auth.set_access_token(self.access_token, self.access_token_secret) api = tweepy.API(auth) list = api.user_timeline(screen_name=self.tweetName, count=1, tweet_mode="extended", exclude_replies="true", include_rts=1) jsonContent = list[0] jsonData = jsonContent._json try: content = jsonData['retweeted_status']['full_text'] except: content = jsonData['full_text'] title = content[0:15] + "..." created_at = jsonData['created_at'].replace("+0000 ", "") time_tuple = time.strptime(created_at, "%a %b %d %H:%M:%S %Y") ms = time.mktime(time_tuple) + 8 * 60 * 60 dateTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(ms)) Id = jsonData['id_str'] url = "https://twitter.com/" + str(jsonData['user']['screen_name']) + "/status/" + str(Id) item = SecondBaseNoticeItem() item['name'] = 'poloniex' item['resource'] = 'poloniex.com' item['url'] = url item['title'] = title item['main'] = content item['time'] = dateTime logging.log(logging.INFO, '[Poloniex] Get item:', item) yield item
def parse(self, response): soup = BeautifulSoup(response.text, 'html.parser') # print(soup) div = soup.find('div', attrs={"id": "primary-fullwidth"}) url = div.article.div.a.get("href") # 文章url head = { 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0" } html = requests.get(url=url, headers=head, timeout=20) soup = BeautifulSoup(html.text, 'html.parser') div = soup.find('div', attrs={"id": "primary-left"}) title = div.h3.get_text().strip() # 标题 dateTime = div.find("li", attrs={ "class": "posted-date" }).get_text().strip() # 日期 content = div.find("div", attrs={ "class": "entry-content" }).get_text().strip() # 内容 item = SecondBaseNoticeItem() item['name'] = 'Bithumb' item['resource'] = 'bithumb.cafe' item['url'] = url item['time'] = dateTime item['title'] = title item['main'] = content yield item
def parse(self, response): cookie = response.headers['Set-Cookie'] cookie = str(cookie, encoding="utf-8") print(cookie) Head = { 'Cookie': cookie.replace("domain=allcoin.ca,", ""), 'User-Agent': self.UserAgent, } html = requests.get(self.url, headers=Head, verify=False) soup = BeautifulSoup(html.text, 'html.parser') li = soup.find('li', attrs={"class": "hideli"}) # print(li.get_text()) # print(li.a.get("href")) url = self.mainUrl + li.a.get("href") # 文章url html = requests.get(url, headers=Head, verify=False) soup = BeautifulSoup(html.text, 'html.parser') div = soup.find('div', attrs={"class": "newsarea_box"}) title = div.h2.get_text().strip() # 标题 dateTime = div.p.get_text().strip() # 日期 content = div.find("div", attrs={ "class": "paragraph" }).get_text().strip() item = SecondBaseNoticeItem() item['name'] = 'Bcex' item['resource'] = 'bcex.top' item['url'] = url item['time'] = dateTime item['title'] = title item['main'] = content yield item
def parse_item(self, response): print("parse_item开始爬") doc = pq(response.body.decode('utf8')) posts = doc('.article-list').items() post = list(posts)[0] item = SecondBaseNoticeItem() item['name'] = 'BigOne' item['resource'] = 'big.one' item['url'] = self.base_url + post('li a').attr('href') doc_detail = pq(requests.get(item['url']).text) item['title'] = doc_detail('.article-header h1').text() item['main'] = doc_detail('.article-body').text() date = doc_detail('.meta-data time').attr('datetime') item['time'] = utc2local(date).strftime("%Y-%m-%d %H:%M:%S") logging.log(logging.DEBUG, '[BITFINE] Get item:', item) print("yield item1之前") yield item print("yield item1之后") doc = pq(response.body.decode('utf8')) posts = doc('.article-list').items() post = list(posts)[1] item = SecondBaseNoticeItem() item['name'] = 'BigOne' item['resource'] = 'big.one' item['url'] = self.base_url + post('li a').attr('href') doc_detail = pq(requests.get(item['url']).text) item['title'] = doc_detail('.article-header h1').text() item['main'] = doc_detail('.article-body').text() date = doc_detail('.meta-data time').attr('datetime') # year, mon, day, hour, minit,second = re.search('(\d+).*?(\d+).*?(\d+).*?(\d+).*?(\d+)', date).groups() item['time'] = utc2local(date).strftime("%Y-%m-%d %H:%M:%S") logging.log(logging.DEBUG, '[BITFINE] Get item:', item) print("yield item2之前") yield item print("yield item2之后")
def parse(self, response): data = json.loads(response.body.decode('utf8')) id = data['datas']['titelList'][0]['id'] detail_resp = requests.get(self.detail_url.format(id=id)) doc = pq(detail_resp.text) item = SecondBaseNoticeItem() item['name'] = 'exx' item['resource'] = 'exx.com' item['url'] = self.detail_url.format(id=id) title = doc('#blog h2') item['time'] = doc('#blog .time').text().split(':', 1)[-1] title.remove('p') item['title'] = title.text() item['main'] = doc('#blog .blog-info').text() yield item
def parse(self, response): doc = pq(response.body.decode('utf8')) notice_url = list(doc('#lcontentnews a').items())[0] detail_resp = requests.get(self.base_url + notice_url.attr('href'), headers=self.headers) doc = pq(detail_resp.text) item = SecondBaseNoticeItem() item['name'] = 'gate' item['resource'] = 'gateio.io' item['url'] = self.base_url + notice_url.attr('href') item['title'] = doc('.dtl-title').text() item['time'] = doc('.new-dtl-info span').text() main = doc('.dtl-content') main.remove('style').remove('#snsshare').remove('ul') item['main'] = main.text() yield item
def parse_notice(self, response): doc = pq(response.body.decode('utf8')) notice = list(doc('.article-list li').items())[0] detail_url = notice('a').attr('href') notice_detail = pq(self.base_url + detail_url) item = SecondBaseNoticeItem() item['name'] = 'okex' item['resource'] = 'okex.com' item['url'] = self.base_url + detail_url date=notice_detail('.meta-data time').attr('datetime') item['time'] =utc2local(date).strftime("%Y-%m-%d %H:%M:%S") item['title'] = notice_detail('.article-title').text().replace('\n', '').replace('\'', '') item['main'] = notice_detail('.article-body').text() yield item
def parse(self, response): soup = BeautifulSoup(response.text, 'html.parser') ul = soup.find('ul', attrs={"id": "newList"}) url = self.mainUrl + ul.li.a.get("href") # 文章url html = requests.get(url, verify=False) soup = BeautifulSoup(html.text, 'html.parser') div = soup.find('div', attrs={"id": "product"}) title = div.h2.get_text().strip() # 标题 dateTime = div.p.get_text().strip() # 日期 content = div.find("div", attrs={"class": "paragraph"}).get_text().strip() item = SecondBaseNoticeItem() item['name'] = 'Digifinex' item['resource'] = 'digifinex.com' item['url'] = url item['time'] = dateTime item['title'] = title item['main'] = content yield item
def title_parse(self, response): """ Response test is json contains: { code: 200, data: [ { id: 111, title: "gochain(GO)现已上线币系", enTitle: "gochain(GO) is listed on Bilaxy", top: false, createTime: 1528563029000, }, ], totalCount: 46, page: 0, msg: null, id: null, } :param response: :return: """ l_last = json.loads(response.text)['data'][0] tid = l_last['id'] item = SecondBaseNoticeItem() item['name'] = 'Bilaxy' item['resource'] = 'bilaxy.com' item['url'] = 'https://bilaxy.com/news/detail?id=%s' % tid real_url = 'https://bilaxy.com/api/v1/articleDetail?id=%s' % tid res_detail = requests.get(real_url).text detail = json.loads(res_detail)['data']['content'] # 此时返回所有title和该id的content item['title'] = detail['title'] content = detail['content'].replace('<br />', '').replace('\t', '').replace( ' ', '') item['main'] = re.compile('<[^>]+>').sub("", content) x = time.localtime(detail['createTime'] / 1000) str_time = time.strftime('%Y-%m-%d %H:%M:%S', x) item['time'] = str_time logging.log(logging.DEBUG, '[BILAXY] Get item:', item) yield item
def detail_parse(self, response): """ Response test is json contains: { msg: null, code: 0, data: { totalCount: 23, list: [ { id: 99, userName: null, language: 1, title: "关于币客BKEX 6月20日BKK赠送和分红公告", content: "<table> <tbody> <tr> <td width="194"> <p>昨日平台总成交额</p>..." sorting: 1, createTime: 1529566192338, updateTime: null, status: 1, tag: null, readednum: 0, }, ... ] } } :param response: :return: """ l_last = json.loads(response.text)['data']['list'][0] tid = l_last['id'] item = SecondBaseNoticeItem() item['name'] = 'bkex' item['resource'] = 'bkex.com' item['url'] = 'https://www.bkex.com/#/notice/detail/%s' % tid item['title'] = l_last['title'] content = l_last['content'].replace('\t', '').replace(' ', '') item['main'] = re.compile('<[^>]+>').sub("", content) x = time.localtime(l_last['createTime'] // 1000) str_time = time.strftime('%Y-%m-%d %H:%M:%S', x) item['time'] = str_time logging.log(logging.DEBUG, '[BKEX] Get item:', item) yield item
def parse_item(self, response): doc = pq(response.body.decode('utf8')) posts = doc('main div.news-list').items() post = list(posts)[0] item = SecondBaseNoticeItem() item['name'] = 'Coinw' item['resource'] = 'coinw.me' item['url'] = self.base_url + post('a.link-1').attr('href') headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.9", "Cache-Control": "max-age=0", "Connection": "keep-alive", "Cookie": "__cdnuid=f8fb4bffaf1f60209e71c09fe9bcfc55; Hm_lvt_525b7a4b6599566fc46ec53565d28557=1528446808; JSESSIONID=01C71D91925EAF16C954EEBD5764F891; Hm_lpvt_525b7a4b6599566fc46ec53565d28557=1528450115", "Host": "www.coinw.me", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36", } doc_detail = pq(requests.get(item['url'], headers=headers).text) item['title'] = doc_detail('.news-title h3').text() item['main'] = doc_detail('.news-article').text() date = doc_detail('div.news-title.ta-c.mb20 p span').text() year, mon, day, hour, mint, second = re.search( '(\d+).*?(\d+).*?(\d+).*?(\d+).*?(\d+).*?(\d+)', date).groups() item['time'] = "%s-%s-%s %s:%s:%s" % (year, mon, day, hour, mint, second) logging.log(logging.DEBUG, '[BITFINE] Get item:', item) yield item
def parse_notice(self, response): response_json = json.loads(response.body.decode('utf8')) if not response_json['data']: return notice = response_json['data'][0] item = SecondBaseNoticeItem() item['name'] = 'CoinEgg' item['resource'] = 'coinegg.com' item['title'] = notice['title'] item['url'] = self.base_url + notice['category'] + str(notice['id']) doc_detail = pq(requests.get(item['url']).text) item['main'] = doc_detail('.gonggao-con').text() date = doc_detail('div.gonggao p.p2').text() year, mon, day, hour, mint, second = re.search('(\d+).*?(\d+).*?(\d+).*?(\d+).*?(\d+).*?(\d+)', date).groups() item['time'] = "%s-%s-%s %s:%s:%s" % (year, mon, day, hour, mint, second) yield item
def parse_item(self, response): doc = pq(response.body.decode('utf8')) posts = doc('.panel-body .span6 a').items() post = list(posts)[0] item = SecondBaseNoticeItem() item['name'] = 'TOPBTC' item['resource'] = 'topbtc.com' item['url'] = self.base_url + post.attr('href') date = post('#ctime').text() year, mon, day = re.search('(\d+).*?(\d+).*?(\d+)', date).groups() item['time'] = "%s-%s-%s" % (year, mon, day) headers = {"Accept-Language": "zh-CN,zh;q=0.9"} doc_detail = pq(requests.get(item['url'], headers=headers).text) item['title'] = doc_detail('.span12 .panel .panel-heading span').text() item['main'] = doc_detail('.newsbody').text() logging.log(logging.DEBUG, '[BITFINE] Get item:', item) yield item
def parse_item(self, response): doc = pq(response.body.decode('utf8')) posts = doc('.table').items() post=list(posts)[0] item = SecondBaseNoticeItem() item['name'] = 'CEX.COM' item['resource'] = 'cex.plus' item['url'] = self.base_url+post('td a.abs-hover').attr('href') date = post('.date').text() year, mon, day,hour, minit,second= re.search('(\d+).*?(\d+).*?(\d+).*?(\d+).*?(\d+).*?(\d+)', date).groups() item['time'] = "%s-%s-%s %s:%s:%s" % (year, mon, day,hour, minit,second) doc_detail=pq(requests.get(item['url']).text) item['title']=doc_detail('.detail h1').text() item['main'] =doc_detail('.txt').text() logging.log(logging.DEBUG, '[BITFINE] Get item:', item) yield item
def parse_item(self, response): doc = pq(response.body.decode('utf8')) posts = doc('.cbp_tmtimeline li').items() post = list(posts)[0] item = SecondBaseNoticeItem() item['name'] = 'ZB' item['resource'] = 'zb.com' item['url'] = self.base_url + post('.envor-post header h3 a').attr( 'href') item['title'] = post('.envor-post header h3 a').text() headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.9", "Connection": "keep-alive", "Host": "www.bitkk.com", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36", } doc_detail = pq(requests.get(item['url'], headers=headers).text) item['main'] = doc_detail('.page-content').text() date = doc_detail('p.align-center span').text() print(date) year, mon, day, hour, minit = re.search( r'(\d+).*?(\d+).*?(\d+).*?(\d+).*?(\d+)', date).groups() item['time'] = "%s-%s-%s %s:%s:00" % (year, mon, day, hour, minit) logging.log(logging.DEBUG, '[BITFINE] Get item:', item) yield item
def parse(self, response): soup = BeautifulSoup(response.text, 'html.parser') div = soup.find('div', attrs={"id": "indexnewsList"}) url = self.base_url + div.p.a.get("href") # 文章url html = requests.get(url, verify=False, timeout=20) soup = BeautifulSoup(html.text, 'html.parser') div = soup.find('div', attrs={"class": "article-leftbg"}) title = div.h2.get_text().strip() # 标题 dateTime = div.find("div", attrs={ "class": "article-info" }).span.get_text().strip().split(": ")[1] # 日期 content = div.find("div", attrs={ "class": "article-content" }).get_text().strip() # 内容 item = SecondBaseNoticeItem() item['name'] = 'Oex' item['resource'] = 'oex.top' item['url'] = url item['time'] = dateTime item['title'] = title item['main'] = content yield item
def parse(self, response): soup = BeautifulSoup(response.text, 'html.parser') # print(soup) article = soup.find('article') # print(html.text) url = article.h2.a.get("href") # 文章url html = requests.get(url, headers=self.Head, verify=False, timeout=20) soup = BeautifulSoup(html.text, 'html.parser') article = soup.find('article') title = article.h1.get_text().strip() # 标题 dateTime = article.time.get_text().strip() # 日期 content = article.find("div", attrs={ "class": "entry-content" }).get_text().strip() # 内容 item = SecondBaseNoticeItem() item['name'] = 'Hitbtc' item['resource'] = 'hitbtc.com' item['url'] = url item['time'] = dateTime item['title'] = title item['main'] = content yield item
def parse_notice(self, response): response_json = json.loads(response.body.decode('utf8')) if not response_json['success']: return notice = response_json['data']['list'][0] item = SecondBaseNoticeItem() item['name'] = 'Upbit' item['resource'] = 'upbit.com' item['title'] = notice['title'] item['url'] = self.base_url+str(notice['id']) item['time'] = han2zhong(notice['updated_at']).strftime("%Y-%m-%d %H:%M:%S") response_str = requests.get(self.info_url+str(notice['id']), verify=False) content = response_str.content.decode('utf-8') content = json.loads(content) if not content['success']: return body=content['data']['body'] result, number = re.subn(r"<a.*?>", '' , body) result, number = re.subn(r"</a>", '' , result) item['main'] =result yield item
def parse(self, response): soup = BeautifulSoup(response.text, 'html.parser') # print(soup) a = soup.find('a', attrs={"class": "msgLink"}) url = self.mainUrl + a.get("href") # 文章url html = requests.get(url, headers=self.Head, verify=False, timeout=20) soup = BeautifulSoup(html.text, 'html.parser') div = soup.find('div', attrs={"class": "msgContainer"}) title = div.h3.get_text().strip() # 标题 dateTime = div.find("p", attrs={ "class": "msgTime" }).get_text().strip() # 日期 content = div.find("div", attrs={ "class": "via-article" }).get_text().strip() # 内容 item = SecondBaseNoticeItem() item['name'] = 'Coinex' item['resource'] = 'coinex.com' item['url'] = url item['time'] = dateTime item['title'] = title item['main'] = content yield item
def parse(self, response): soup = BeautifulSoup(response.text, 'html.parser') # print(soup) div = soup.find('div', attrs={"class": "u-paddingTop30"}) url = div.a.get("href") # 文章url dateTime = div.time.get("datetime").strip() # 日期 # print(url) # https://blog.coinbase.com/announcing-a-new-way-to-spend-your-coinbase-crypto-e-gift-cards-59687ff77c13?source=collection_home---5------0---------------- html = requests.get(url, verify=False, timeout=20) soup = BeautifulSoup(html.text, 'html.parser') div = soup.find('div', attrs={"class": "postArticle-content"}) title = div.h1.get_text().strip() # 标题 content = "" for c in div.findAll("p", attrs={"class": "graf--p"}): content = content + c.get_text().strip() # 内容 item = SecondBaseNoticeItem() item['name'] = 'Coinbase' item['resource'] = 'coinbase.com' item['url'] = url item['time'] = dateTime item['title'] = title item['main'] = content yield item
def parse(self, response): doc = pq(response.body.decode('utf8')) notice = list(doc('.article-list li').items())[0] detail_url = notice('a').attr('href') notice_detail = pq(self.base_url + detail_url) item = SecondBaseNoticeItem() item['name'] = 'binance' item['resource'] = 'binance.com' item['url'] = self.base_url + detail_url date = notice_detail('.meta-data time').attr('datetime') print(date) # year, mon, day, hour, minit,second = re.search('(\d+).*?(\d+).*?(\d+).*?(\d+).*?(\d+)', date).groups() item['time'] = utc2local(date).strftime("%Y-%m-%d %H:%M:%S") title = notice_detail('.article-title').text().replace('\n', '').replace('\'', '') item['title'] = title details = notice_detail('.article-body').text() item['main'] = details ls = ['上线', '上市'] for l in ls: if l in title: sTime = re.search('将于(\d+)年(\d+)月(\d+)日(.*?)(\d+):(\d+)(.*?)上线(.*?),', details) coinName = sTime.group(8) coinTime = "%s-%s-%s %s-%s-00" % (sTime.group(1), sTime.group(2), sTime.group(3), sTime.group(5), sTime.group(6)) data = { "shop": 'binance', "coinName": coinName, "dateTime": coinTime, "content": details, } a = requests.post( 'http://47.75.122.224/filterApi.php?insertTweet=True', data=data ) print(a.text) break yield item
def detail_parse(self, response): """ Response test is json contains: [ { "id": 100114, "createdAt": 1529136845456, "updatedAt": 1529171287662, "columnId": 5, "languageId": 1, "beginTime": 1529107200000, "endTime": 1556668800000, "sortsId": 9945, "userId": 100020, "content": "<p style=\"line-height: 2em;\">雅黑, "Microsoft YaHei"..." "title": "【公告】币为Btcdo已完成更换USDT合约", "status": 1 } ] :param response: :return: """ l_last = json.loads(response.text)[0] tid = l_last['id'] item = SecondBaseNoticeItem() item['name'] = 'btcdo' item['resource'] = 'btcdo.com' item[ 'url'] = 'https://www.btcdo.com/index/notice/noticeDetail?id=%s' % tid item['title'] = l_last['title'] content = l_last['content'].replace('\t', '').replace(' ', '') item['main'] = re.compile('<[^>]+>').sub("", content) x = time.localtime(l_last['beginTime'] // 1000) str_time = time.strftime('%Y-%m-%d %H:%M:%S', x) item['time'] = str_time logging.log(logging.DEBUG, '[BTCDO] Get item:', item) yield item
def parse_notice(self, response): response_json = json.loads(response.body.decode('utf8')) if not response_json['success']: return notice = response_json['data']['items'] toplist = [] for data in notice: if data['topNotice']: toplist.append(data) notice = toplist[0] item = SecondBaseNoticeItem() item['name'] = 'HADAX' item['resource'] = 'hadax.com' item['title'] = notice['title'] item['url'] = self.base_url + str(notice['id']) timestamp = notice['created'] / 1000 timestamp = int(float(timestamp)) item['time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp)) response_str = requests.get(self.info_url + str(notice['id'])) content = response_str.content.decode('utf-8') content = json.loads(content) if not content['success']: return item['main'] = pq(content['data']['content']).text() print("第一个item") yield item # 非置顶------------------------------------------------------- response_json = json.loads(response.body.decode('utf8')) if not response_json['success']: return notice = response_json['data']['items'] nottoplist = [] for data in notice: if not data['topNotice']: nottoplist.append(data) notice = nottoplist[0] item = SecondBaseNoticeItem() item['name'] = 'HADAX' item['resource'] = 'hadax.com' item['title'] = notice['title'] item['url'] = self.base_url + str(notice['id']) timestamp = notice['created'] / 1000 timestamp = int(float(timestamp)) item['time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp)) response_str = requests.get(self.info_url + str(notice['id'])) content = response_str.content.decode('utf-8') content = json.loads(content) if not content['success']: return item['main'] = pq(content['data']['content']).text() print("第二个item") yield item
def parse_notice(self, response): response_json = json.loads(response.body.decode('utf8')) if not response_json['success']: return notice = response_json['data']['items'] toplist = [] for data in notice: if data['topNotice']: toplist.append(data) notice = toplist[0] item = SecondBaseNoticeItem() item['name'] = 'huobipro' item['resource'] = 'huobipro.com' item['title'] = notice['title'] item['url'] = self.base_url+str(notice['id']) timestamp = notice['created'] / 1000 timestamp = int(float(timestamp)) item['time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp)) response_str = requests.get(self.notice_detail_url + str(notice['id'])) content = response_str.content.decode('utf-8') content = json.loads(content) if not content['success']: return details = pq(content['data']['content']).text() item['main'] = details print("第一个item") ls = ['上线', '全球首发'] for l in ls: if l in notice['title']: c = notice['title'].split(l) sTime = re.search(r'(\d+)月(\d+)日(\d+):(\d+)', c[0]) coinName = c[1] coinTime = "2018-%s-%s %s-%s-00" % (sTime.group(1), sTime.group(2), sTime.group(3), sTime.group(4)) data = { "shop": 'huobipro', "coinName": coinName, "dateTime": coinTime, "content": details, } requests.post( 'http://47.75.122.224/filterApi.php?insertTweet=True', data=data ) break yield item # -------------------------------------------------------------------- response_json = json.loads(response.body.decode('utf8')) if not response_json['success']: return notice = response_json['data']['items'] nottoplist = [] for data in notice: if not data['topNotice']: nottoplist.append(data) notice = nottoplist[0] item = SecondBaseNoticeItem() item['name'] = 'huobipro' item['resource'] = 'huobipro.com' item['title'] = notice['title'] item['url'] = self.base_url + str(notice['id']) timestamp = notice['created'] / 1000 timestamp = int(float(timestamp)) item['time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp)) response_str = requests.get(self.notice_detail_url + str(notice['id'])) content = response_str.content.decode('utf-8') content = json.loads(content) if not content['success']: return details = pq(content['data']['content']).text() item['main'] = details print("第二个item") ls = ['上线', '全球首发'] for l in ls: if l in notice['title']: c = notice['title'].split(l) sTime = re.search(r'(\d+)月(\d+)日(\d+):(\d+)', c[0]) coinName = c[1] coinTime = "2018-%s-%s %s-%s-00" % (sTime.group(1), sTime.group(2), sTime.group(3), sTime.group(4)) data = { "shop": 'huobipro', "coinName": coinName, "dateTime": coinTime, "content": details, } requests.post( 'http://47.75.122.224/filterApi.php?insertTweet=True', data=data ) break yield item