Exemple #1
0
 def parse_info(self, response):
     item = JamanetworkItem()
     item['title'] = response.xpath(
         '//meta[@property="og:title"]/@content').extract_first()
     item['link'] = response.meta['link']
     issn = tranfrom_issn(response.xpath('//small//span//text()').get())
     item['issn'] = issn
     if issn == "1468-2060":
         item['source'] = "Annals of the rheumatic diseases"
         item['if_2017'] = 12.35
     elif issn == "1468-3288":
         item['source'] = "Gut"
         item['if_2017'] = 17.016
     item['pub_date'] = response.xpath(
         '//meta[@name="DC.Date"]/@content').get()
     item['abstract'] = strip_tag(
         response.xpath(
             '//meta[@name="DC.Description"]/@content').extract())
     item['doi'] = response.xpath(
         '//meta[@name="citation_doi"]/@content').get()
     item['authors'] = response.xpath(
         '//meta[@name="citation_author"]/@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item["is_pubmed"] = 0
     yield item
Exemple #2
0
 def parse_info(self, response):
     item = JamanetworkItem()
     item['title'] = response.xpath(
         '//meta[@name="wkhealth_title"]/@content').extract_first()
     item['link'] = response.meta['link']
     issn = response.xpath('//meta[@name="wkhealth_issn"]/@content').get()
     item['issn'] = issn
     if issn == "0021-9355":
         item[
             'source'] = "The Journal of bone and joint surgery. American volume"
         item['if_2017'] = 4.583
     else:
         item['source'] = "Annals of surgery"
         item['if_2017'] = 9.203
     item['pub_date'] = tranfrom_date(
         response.xpath('//meta[@name="wkhealth_date"]/@content').get())
     item['abstract'] = strip_tag(
         response.xpath('//section[@class="article-abstract"]//div//p').
         extract()) or ''
     item['doi'] = response.xpath(
         '//meta[@name="wkhealth_doi"]/@content').get()
     item['authors'] = response.xpath(
         '//meta[@name="wkhealth_authors"]/@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item["is_pubmed"] = 0
     yield item
Exemple #3
0
 def parse_info(self, response):
     item = JamanetworkItem()
     item['title'] = strip_tag(
         response.xpath('//meta[@property="og:title"]/@content').extract())
     item['link'] = response.meta['link']
     issn = response.xpath(
         '//meta[@name="citation_issn"]/@content').extract()[0]
     item['issn'] = issn
     if issn == "0140-6736" or "1474-547X":
         item['source'] = "Lancet"
         item['if_2017'] = 53.254
     elif issn == "2468-2667":
         item['source'] = "The Lancet. Public health"
         item['if_2017'] = 1.441
     elif issn == "2213-2600" or "2213-2619":
         item['source'] = "The Lancet. Respiratory medicine"
         item['if_2017'] = 3.23
     elif issn == "2542-5196":
         item['source'] = "The Lancet. Planetary health"
         item['if_2017'] = ""
     elif issn == "2215-0366" or "2215-0374":
         item['source'] = "The Lancet. Child & adolescent health"
         item['if_2017'] = 15.233
     elif issn == "1474-4465" or "1474-4422":
         item['source'] = "The Lancet. Neurology"
         item['if_2017'] = 27.138
     elif issn == "1470-2045" or "1474-5488":
         item['source'] = "The Lancet. Oncology"
         item['if_2017'] = 36.418
     elif issn == "2213-8587" or "2213-8595":
         item['source'] = "The lancet. Diabetes & endocrinology"
         item['if_2017'] = 19.313
     else:
         item['source'] = ""
         item['if_2017'] = ""
     item['pub_date'] = tranfrom_date(
         response.xpath('//meta[contains(@name,"date")]/@content').get())
     item['abstract'] = strip_tag(response.xpath('//meta[@name="citation_abstract"]/@content').extract()) or \
                        handler_abstract(strip_tag(response.xpath('//div[@class="section-paragraph"]//text()').extract())) \
                        or " "
     item['doi'] = response.xpath(
         '//meta[@name="citation_doi"]/@content').get()
     item['authors'] = response.xpath(
         '//meta[@name="citation_author"]/@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item["is_pubmed"] = 0
     yield item
 def parse_info(self, response):
     item = JamanetworkItem()
     item['title'] = strip_tag(response.xpath('//meta[@property="og:title"]/@content').extract())
     item['link'] = response.meta['link']
     item['source'] = "Science immunology"
     item['pub_date'] = response.xpath('//meta[@name="DC.Date"]/@content').get() or \
                        response.xpath('//meta[@property="article:published_time"]/@content').get()[:10]
     item['abstract'] = strip_tag(response.xpath('//meta[@name="og:description"]/@content').extract()) or \
                        strip_tag(response.xpath('//meta[@itemprop="description"]/@content').extract())
     item['doi'] = response.xpath('//meta[@name="citation_doi"]/@content').get() or \
                   response.xpath('//meta[@name="news_doi"]/@content').get() or ' '
     item['authors'] = response.xpath('//meta[@name="citation_author"]/@content').extract() or " "
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract() or ""
     item['if_2017'] = 0.001
     item['issn'] = '2470-9468'
     item["is_pubmed"] = 0
     yield item
 def parse_info(self, response):
     item = JamanetworkItem()
     item['title'] = strip_tag(response.xpath('//meta[@property="og:title"]/@content').extract())
     item['link'] = response.meta['link']
     item['source'] = "JAMA internal medicine"
     item['pub_date'] = tranfrom_date(response.xpath('//meta[@name="citation_online_date"]/@content').get()) or \
                        tranfrom_date(response.xpath('//meta[@name="citation_publication_date"]/@content').get())
     abstract = response.xpath('//meta[@name="citation_abstract"]/@content').extract_first()
     abstract1 = strip_tag(handler_abstract(response.xpath('//div[@class="abstract-content"]//p[@class="para"]//text() | '
                                         '//div[@class="article-full-text"]//p[@class="para"]//text()').extract()))
     item['abstract'] = abstract1 if not abstract else strip_tag(abstract)
     item['doi'] = response.xpath('//meta[@name="citation_doi"]/@content').get()
     item['authors'] = response.xpath('//meta[@name="citation_author"]/@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item['if_2017'] = 19.989
     item['issn'] = "2168-6114"
     item["is_pubmed"] = 0
     yield item
Exemple #6
0
 def parse_info(self, response):
     item = JamanetworkItem()
     item['title'] = strip_tag(
         response.xpath('//meta[@property="og:title"]/@content').extract())
     item['link'] = response.meta['link']
     item['source'] = "Diabetes"
     item['pub_date'] = response.xpath(
         '//meta[@name="DC.Date"]/@content').get()
     item['abstract'] = strip_tag(response.xpath('//meta[@name="og:description"]//@content').extract()) or \
                        strip_tag(handler_abstract(response.xpath('//meta[@name="citation_abstract"]//@content').extract()))
     item['doi'] = response.xpath(
         '//meta[@name="citation_doi"]/@content').get()
     item['authors'] = response.xpath(
         '//meta[@name="citation_author"]/@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item['if_2017'] = 7.273
     # "issnElectronic": "1939-327X"
     item['issn'] = '0012-1797'
     item["is_pubmed"] = 0
     yield item
Exemple #7
0
 def parse_info(self, response):
     item = JamanetworkItem()
     item['title'] = response.xpath(
         '//meta[@name="citation_title"]/@content').extract_first()
     item['link'] = response.meta['link']
     item['issn'] = response.xpath(
         '//meta[@name="citation_issn"]/@content').extract()[0]
     item['source'] = "BMJ : British medical journal"
     item['if_2017'] = 23.259
     item['pub_date'] = response.xpath(
         '//meta[@name="article:published_time"]/@content').get()
     item['abstract'] = strip_tag(
         response.xpath('//meta[@name="DC.Description"]//@content').get())
     item['doi'] = response.xpath(
         '//meta[@name="citation_doi"]/@content').get()
     item['authors'] = response.xpath(
         '//meta[@name="citation_author"]/@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item["is_pubmed"] = 0
     yield item
 def parse_info(self, response):
     item = JamanetworkItem()
     item['title'] = response.xpath(
         '//meta[@name="DC.Title"]/@content').extract_first()
     item['link'] = response.meta['link']
     item['issn'] = "2159-8290"
     item['source'] = "Cancer discovery"
     item['if_2017'] = 24.373
     item['pub_date'] = response.xpath(
         '//meta[@name="DC.Date"]/@content').get()
     item['abstract'] = strip_tag(
         response.xpath(
             '//meta[@name="DC.Description"]/@content').extract())
     item['doi'] = response.xpath(
         '//meta[@name="DC.Identifier"]/@content').get()
     item['authors'] = response.xpath(
         '//meta[@name="citation_author"]/@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item["is_pubmed"] = 0
     yield item
Exemple #9
0
 def parse_info(self, response):
     item = JamanetworkItem()
     item['title'] = strip_tag(
         response.xpath('//meta[@property="og:title"]/@content').extract())
     item['link'] = response.meta['link']
     # "issnPrint" : "0036-8075",
     item["issn"] = "1095-9203"
     item['if_2017'] = 41.058
     item['source'] = "Science"
     item['pub_date'] = response.xpath('//meta[@name="DC.Date"]/@content').get() or \
                        response.xpath('//meta[@property="article:published_time"]/@content').get()[:10]
     item['abstract'] = strip_tag(response.xpath('//meta[@name="og:description"]/@content').extract()) or \
                        strip_tag(response.xpath('//meta[@itemprop="description"]/@content').extract())
     item['doi'] = response.xpath('//meta[@name="citation_doi"]/@content').get() or \
                   response.xpath('//meta[@name="news_doi"]/@content').get() or \
                   response.xpath('//meta[@name="DC.Relation"]/@content').get()
     item['authors'] = response.xpath(
         '//meta[@name="citation_author"]/@content').extract() or " "
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract() or ""
     item["is_pubmed"] = 0
     yield item
Exemple #10
0
 def parse_info(self, response):
     item = JamanetworkItem()
     item['title'] = response.xpath(
         '//meta[@name="DC.Title"]/@content').extract_first()
     item['link'] = response.meta['link']
     # "issnPrint" : "1078-0432"
     item['issn'] = '1557-3265'
     item[
         'source'] = "Clinical cancer research : an official journal of the American Association for Cancer Research"
     item['if_2017'] = 10.199
     item['pub_date'] = response.xpath(
         '//meta[@name="DC.Date"]/@content').get()
     item['abstract'] = strip_tag(
         response.xpath(
             '//meta[@name="DC.Description"]/@content').extract())
     item['doi'] = response.xpath(
         '//meta[@name="DC.Identifier"]/@content').get()
     item['authors'] = response.xpath(
         '//meta[@name="citation_author"]/@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item["is_pubmed"] = 0
     yield item
Exemple #11
0
 def parse_info(self, response):
     item = JamanetworkItem()
     item['title'] = response.xpath(
         '//meta[@name="twitter:title"]/@content').get()
     item['link'] = response.meta['link']
     item['source'] = "The New England journal of medicine"
     item['pub_date'] = response.xpath('//meta[@name="evt-dt"]/@content').get() or\
                        response.xpath('//meta[@name="dc.Date"]/@content').get()
     item['abstract'] = strip_tag(response.xpath('//section[@class="o-article-body__section"]//text()').extract()) or \
                        strip_tag(response.xpath('//div[@class="m-letter"]//p[@class="f-body"]//text()').extract()) or ''
     item['doi'] = response.xpath(
         '//meta[@name="evt-doiPage"]/@content').get()
     item['authors'] = response.xpath(
         '//header//ul[contains(@class,"m-article-header__authors")]//li//text()'
     ).extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item['if_2017'] = 79.258
     # item['issn'] = tranfrom_issn(response.xpath('//li[@class="hidden-xs nowrap"]//text()').extract()[0])
     # "issnPrint" : "0028-4793"
     item['issn'] = "1533-4406"
     item["is_pubmed"] = 0
     yield item
Exemple #12
0
 def parse_info(self, response):
     item = JamanetworkItem()
     item['title'] = response.xpath(
         '//meta[@property="og:title"]/@content').extract_first()
     link = response.meta['link']
     item['link'] = link
     item[
         'source'] = "Brain : a journal of neurology" if 'brain' in link else "European heart journal"
     item['pub_date'] = response.xpath(
         '//meta[@property="og:updated_time"]/@content').get()
     abstract = strip_tag(response.xpath('//section[@class="abstract"]//text()').extract()).strip("[']") or \
                strip_tag(response.xpath('//div[@class="widget-items"]//p//text()').extract()).strip("[']")
     item['abstract'] = abstract if abstract else " "
     item['doi'] = response.xpath(
         '//meta[@name="citation_doi"]/@content').get()
     item['authors'] = response.xpath(
         '//meta[@name="citation_author"]/@content').extract()
     # item['AffiliationInfo'] = response.xpath('//meta[@name="citation_author_institution"]/@content').extract()
     item['if_2017'] = 10.84 if 'brain' in link else 23.425
     item['issn'] = tranfrom_issn(
         response.xpath(
             '//div[@class="journal-footer-colophon"]//li//text()').get())
     item["is_pubmed"] = 0
     yield item