Esempio n. 1
0
 def parse_credit_outlook(self, response):
     tds = response.xpath(".//div[@class='result-details']/div")
     for td in tds:
         article = NormalbankItem()
         article['link'] = BASE_URL + td.xpath('.//a/@href').get()
         article['title'] = td.xpath(".//a/@data-analytics-link").get()
         article['push_date'] = td.xpath(".//a/@data-analytics-link").get()
         article['text'] = 'tag-end'
         yield article
Esempio n. 2
0
 def parse_report(self,response):
     tds = response.xpath("//li[@class='ds-artifact-item even'] | //li[@class='ds-artifact-item odd']")
     for td in tds:
         item = NormalbankItem()
         item["title"] = td.xpath(".//h4/a/text()").extract_first()
         item["name"] = td.xpath(".//div[@class='content author-info']/span/a/text()").extract_first()
         item["text"] = td.xpath(".//div[@class='artifact-info hidden-md hidden-lg']/span/a/text()").extract_first()
         item['link'] = td.xpath(".//span/a/@href").extract_first()
         yield item
Esempio n. 3
0
 def parse_artificial_intelligence(self, response):
     tds = response.xpath(".//div[@class='outer']/div[last()]/section")
     for td in tds:
         article = NormalbankItem()
         article['link'] = BASE_URL + td.xpath(".//a/@href").get()
         article['push_date'] = 'tag-end'
         article['text'] = 'tag-end'
         article['title'] = td.xpath('.//h3/text()').get()
         yield article
Esempio n. 4
0
 def parse_main(self, response):
     items = json.loads(response.text)['data']['researches']
     for item in items:
         if item['authorizationType'] == 'Unauthorized': continue
         article = NormalbankItem()
         article['title'] = item['title']
         article['link'] = BASE_URL + item['url']
         article['text'] = item['synopsis']
         article['push_date'] = item['publishDate']
         yield article
Esempio n. 5
0
 def fitch_headlines(self, response):
     json_articles = json.loads(response.text)['items']
     for json_article in json_articles:
         article = NormalbankItem()
         article['title'] = json_article['title']
         article['push_date'] = json_article['date']
         article[
             'link'] = "https://www.fitchratings.com" + json_article['link']
         article['text'] = json_article['text']
         yield article
Esempio n. 6
0
 def parse_fsi(self, response):
     tds = response.xpath("//tbody/tr")
     for td in tds:
         item = NormalbankItem()
         item["title"] = td.xpath("./td[2]/a/text()").extract_first()
         item["push_date"] = td.xpath(
             "./td[1]/text()").extract_first().strip()
         item["link"] = td.xpath(".//a/@href").extract_first()
         item['text'] = 'tag-end'
         yield item
Esempio n. 7
0
 def parse_index(self, response):
     h4 = response.xpath("//*[@id='content-main']/h4")
     p = response.xpath("//*[@id='content-main']/p")
     for (i, t) in zip(h4, p):
         item = NormalbankItem()
         item["title"] = i.xpath("./a/text()").extract_first()
         item["push_date"] = t.xpath("./span/text()").extract_first()
         item['link'] = i.xpath(".//@href").extract_first()
         item['text'] = 'tag-end'
         yield item
Esempio n. 8
0
 def spglobal_index2(self, response):
     text = re.search("searchg2_\d+\((.+)\)$", response.text)
     tds = json.loads(text.group(1))['response']['docs']
     for td in tds:
         article = NormalbankItem()
         article['title'] = td['title']
         article['push_date'] = td['custom_dt_meta_publish_date']
         article['link'] = BASE_URL + td['custom_s_local_url']
         article['text'] = "tag-end"
         yield article
Esempio n. 9
0
 def parse_index(self, response):
     tds = response.xpath("//div[@class='list-content']/article")
     for td in tds:
         article = NormalbankItem()
         article["link"] = td.xpath(
             ".//div[@class='image-wrapper']/a/@href").extract_first()
         article["title"] = td.xpath(".//h4//text()").extract_first()
         article["push_date"] = td.xpath(".//time//text()").extract_first()
         article['text'] = 'tag-end'
         yield article
Esempio n. 10
0
 def fitch_country_risk(self, response):
     tds = response.xpath("//main//article/div/article")
     for td in tds:
         article = NormalbankItem()
         article['title'] = td.xpath("./h2/a/text()").get()
         article['link'] = "https://www.fitchsolutions.com" + td.xpath(
             "./h2/a/@href").get()
         article['push_date'] = td.xpath("./p/text()[last()]").get()
         article['text'] = td.xpath("./div//li/text()").get()
         yield article
Esempio n. 11
0
 def parse_discussion(self, response):
     tds = response.xpath(
         "//div[@id='main_0_universal_2_divBlockList']/div")
     for td in tds:
         article = NormalbankItem()
         article['link'] = BASE_URL + td.xpath(".//a/@href").get()
         article['push_date'] = td.xpath(".//time/text()").get()
         article['title'] = td.xpath(".//a/h3/text()").get()
         article['text'] = td.xpath(
             ".//div[@class='description']/text()").get()
         yield article
Esempio n. 12
0
 def parse_index(self, response):
     tds = response.xpath(
         "//tr[@class='item even'] | //tr[@class='item odd']")
     for td in tds:
         item = NormalbankItem()
         item["title"] = td.xpath("./td[2]/div[1]/a/text()").extract_first()
         item["push_date"] = td.xpath(
             "./td[1]/text()").extract_first().strip()
         item['link'] = td.xpath(".//a/@href").extract_first()
         item["text"] = "tag-end"
         yield item
Esempio n. 13
0
 def spglobal_index(self, response):
     tds = response.xpath(
         "//div[contains(@class,'carousel__wrapper')]/ul/li")
     for td in tds:
         article = NormalbankItem()
         article['title'] = td.xpath(".//h1/text()").get()
         article['link'] = BASE_URL + td.xpath("./a/@href").get()
         article['push_date'] = td.xpath(
             ".//ul[@class ='meta-data']/li[last()]/text()").get()
         article['text'] = "tag-end"
         yield article
Esempio n. 14
0
 def parse_all(self, response):
     text = response.body.decode("utf-8")
     tds = json.loads(text)['documents']
     for key in tds.keys():
         if tds[key].get('url') != None:
             item = NormalbankItem()
             item['link'] = tds[key]['url']
             item['title'] = tds[key]['title']['cdata!']
             item['text'] = tds[key]['descr']['cdata!']
             item['push_date'] = tds[key]['lnchdt']
             yield item
Esempio n. 15
0
 def parse_main(self, response):
     tds = response.xpath("//body/a")[0:-1]
     item = NormalbankItem()
     for td in tds:
         item["title"] = td.xpath(
             "./div[1]/div[4]/h2/text()").extract_first()
         item["text"] = td.xpath("./div[1]/div[4]/p/text()").extract_first()
         item['link'] = td.xpath("./@href").extract_first()
         item["push_date"] = td.xpath(
             "./div[1]/div[4]/div/span[3]/text()").extract_first()
         yield item
Esempio n. 16
0
 def fitch_white_papers(self, response):  #第二个网页站点,继续传递给下一个站点
     tds = response.xpath("//div[@class='content']/article")
     for td in tds:
         article = NormalbankItem()
         article['title'] = td.xpath("./h2//text()").get().strip()
         article['link'] = "https://www.fitchsolutions.com" + td.xpath(
             "./a/@href").get().strip()
         date = td.xpath("./p/text()[last()]").getall()
         date = "".join(date).strip()
         article['push_date'] = date
         article['text'] = td.xpath("./p/a/text()").get().strip()
         yield article
Esempio n. 17
0
 def parse_whitepapers(self, response):
     tds = response.xpath("//div[@class='columns']/div/div/article")
     for td in tds:
         article = NormalbankItem()
         article['link'] = BASE_URL + td.xpath("./a/@href").get()
         article['push_date'] = td.xpath(
             ".//div[@class='caption']/span[last()]/text()").get()
         article['title'] = td.xpath(
             ".//div[@class='tout__details']/h3/text()").get()
         article['text'] = td.xpath(
             ".//div[@class='tout__details']/p/text()").get()
         yield article
Esempio n. 18
0
 def parse_index(self, response):
     tds = response.xpath("//div[@id='mdcTS2']/p")
     print(tds)
     print(len(tds))
     for td in tds:
         article = NormalbankItem()
         if td.xpath("./a/@href").get() == None: continue
         article['push_date'] = td.xpath("./font/font/text()").get()
         article['link'] = BASE_URL + td.xpath("./a/@href").get()
         article['title'] = td.xpath("./a/font/font/text()").get()
         article['text'] = 'tag-end'
         yield article